aboutsummaryrefslogtreecommitdiff
path: root/libsanitizer/tsan
diff options
context:
space:
mode:
authorMartin Liska <mliska@suse.cz>2022-05-03 12:56:26 +0200
committerMartin Liska <mliska@suse.cz>2022-05-04 11:00:48 +0200
commitf732bf6a603721f61102a08ad2d023c7c2670870 (patch)
treed0d8dafedac59ab6d55b678e53afe19fdd113fba /libsanitizer/tsan
parente2285af309000b74da0f7dc756a0b55e5f0b1b56 (diff)
downloadgcc-f732bf6a603721f61102a08ad2d023c7c2670870.zip
gcc-f732bf6a603721f61102a08ad2d023c7c2670870.tar.gz
gcc-f732bf6a603721f61102a08ad2d023c7c2670870.tar.bz2
libsanitizer: merge from upstream (0a1bcab9f3bf75c4c5d3e53bafb3eeb80320af46).
Diffstat (limited to 'libsanitizer/tsan')
-rw-r--r--libsanitizer/tsan/tsan_clock.cpp625
-rw-r--r--libsanitizer/tsan/tsan_clock.h293
-rw-r--r--libsanitizer/tsan/tsan_debugging.cpp2
-rw-r--r--libsanitizer/tsan/tsan_defs.h54
-rw-r--r--libsanitizer/tsan/tsan_dense_alloc.h9
-rw-r--r--libsanitizer/tsan/tsan_fd.cpp91
-rw-r--r--libsanitizer/tsan/tsan_fd.h1
-rw-r--r--libsanitizer/tsan/tsan_flags.cpp8
-rw-r--r--libsanitizer/tsan/tsan_flags.inc16
-rw-r--r--libsanitizer/tsan/tsan_interceptors.h20
-rw-r--r--libsanitizer/tsan/tsan_interceptors_posix.cpp200
-rw-r--r--libsanitizer/tsan/tsan_interface.cpp14
-rw-r--r--libsanitizer/tsan/tsan_interface.inc8
-rw-r--r--libsanitizer/tsan/tsan_interface_atomic.cpp87
-rw-r--r--libsanitizer/tsan/tsan_interface_java.cpp4
-rw-r--r--libsanitizer/tsan/tsan_mman.cpp82
-rw-r--r--libsanitizer/tsan/tsan_mman.h4
-rw-r--r--libsanitizer/tsan/tsan_mutexset.cpp54
-rw-r--r--libsanitizer/tsan/tsan_mutexset.h11
-rw-r--r--libsanitizer/tsan/tsan_platform.h285
-rw-r--r--libsanitizer/tsan/tsan_platform_linux.cpp48
-rw-r--r--libsanitizer/tsan/tsan_platform_mac.cpp152
-rw-r--r--libsanitizer/tsan/tsan_platform_posix.cpp18
-rw-r--r--libsanitizer/tsan/tsan_platform_windows.cpp3
-rw-r--r--libsanitizer/tsan/tsan_report.cpp29
-rw-r--r--libsanitizer/tsan/tsan_report.h6
-rw-r--r--libsanitizer/tsan/tsan_rtl.cpp724
-rw-r--r--libsanitizer/tsan/tsan_rtl.h341
-rw-r--r--libsanitizer/tsan/tsan_rtl_access.cpp895
-rw-r--r--libsanitizer/tsan/tsan_rtl_amd64.S236
-rw-r--r--libsanitizer/tsan/tsan_rtl_mutex.cpp645
-rw-r--r--libsanitizer/tsan/tsan_rtl_ppc64.S1
-rw-r--r--libsanitizer/tsan/tsan_rtl_proc.cpp1
-rw-r--r--libsanitizer/tsan/tsan_rtl_report.cpp369
-rw-r--r--libsanitizer/tsan/tsan_rtl_thread.cpp228
-rw-r--r--libsanitizer/tsan/tsan_shadow.h315
-rw-r--r--libsanitizer/tsan/tsan_sync.cpp82
-rw-r--r--libsanitizer/tsan/tsan_sync.h47
-rw-r--r--libsanitizer/tsan/tsan_trace.h113
-rw-r--r--libsanitizer/tsan/tsan_update_shadow_word.inc59
40 files changed, 2619 insertions, 3561 deletions
diff --git a/libsanitizer/tsan/tsan_clock.cpp b/libsanitizer/tsan/tsan_clock.cpp
deleted file mode 100644
index d122b67..0000000
--- a/libsanitizer/tsan/tsan_clock.cpp
+++ /dev/null
@@ -1,625 +0,0 @@
-//===-- tsan_clock.cpp ----------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-//===----------------------------------------------------------------------===//
-#include "tsan_clock.h"
-#include "tsan_rtl.h"
-#include "sanitizer_common/sanitizer_placement_new.h"
-
-// SyncClock and ThreadClock implement vector clocks for sync variables
-// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
-// ThreadClock contains fixed-size vector clock for maximum number of threads.
-// SyncClock contains growable vector clock for currently necessary number of
-// threads.
-// Together they implement very simple model of operations, namely:
-//
-// void ThreadClock::acquire(const SyncClock *src) {
-// for (int i = 0; i < kMaxThreads; i++)
-// clock[i] = max(clock[i], src->clock[i]);
-// }
-//
-// void ThreadClock::release(SyncClock *dst) const {
-// for (int i = 0; i < kMaxThreads; i++)
-// dst->clock[i] = max(dst->clock[i], clock[i]);
-// }
-//
-// void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
-// for (int i = 0; i < kMaxThreads; i++) {
-// tmp = clock[i];
-// clock[i] = max(clock[i], sc->clock[i]);
-// sc->clock[i] = tmp;
-// }
-// }
-//
-// void ThreadClock::ReleaseStore(SyncClock *dst) const {
-// for (int i = 0; i < kMaxThreads; i++)
-// dst->clock[i] = clock[i];
-// }
-//
-// void ThreadClock::acq_rel(SyncClock *dst) {
-// acquire(dst);
-// release(dst);
-// }
-//
-// Conformance to this model is extensively verified in tsan_clock_test.cpp.
-// However, the implementation is significantly more complex. The complexity
-// allows to implement important classes of use cases in O(1) instead of O(N).
-//
-// The use cases are:
-// 1. Singleton/once atomic that has a single release-store operation followed
-// by zillions of acquire-loads (the acquire-load is O(1)).
-// 2. Thread-local mutex (both lock and unlock can be O(1)).
-// 3. Leaf mutex (unlock is O(1)).
-// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
-// 5. An atomic with a single writer (writes can be O(1)).
-// The implementation dynamically adopts to workload. So if an atomic is in
-// read-only phase, these reads will be O(1); if it later switches to read/write
-// phase, the implementation will correctly handle that by switching to O(N).
-//
-// Thread-safety note: all const operations on SyncClock's are conducted under
-// a shared lock; all non-const operations on SyncClock's are conducted under
-// an exclusive lock; ThreadClock's are private to respective threads and so
-// do not need any protection.
-//
-// Description of SyncClock state:
-// clk_ - variable size vector clock, low kClkBits hold timestamp,
-// the remaining bits hold "acquired" flag (the actual value is thread's
-// reused counter);
-// if acquired == thr->reused_, then the respective thread has already
-// acquired this clock (except possibly for dirty elements).
-// dirty_ - holds up to two indices in the vector clock that other threads
-// need to acquire regardless of "acquired" flag value;
-// release_store_tid_ - denotes that the clock state is a result of
-// release-store operation by the thread with release_store_tid_ index.
-// release_store_reused_ - reuse count of release_store_tid_.
-
-namespace __tsan {
-
-static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
- return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
-}
-
-// Drop reference to the first level block idx.
-static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
- ClockBlock *cb = ctx->clock_alloc.Map(idx);
- atomic_uint32_t *ref = ref_ptr(cb);
- u32 v = atomic_load(ref, memory_order_acquire);
- for (;;) {
- CHECK_GT(v, 0);
- if (v == 1)
- break;
- if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
- return;
- }
- // First level block owns second level blocks, so them as well.
- for (uptr i = 0; i < blocks; i++)
- ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
- ctx->clock_alloc.Free(c, idx);
-}
-
-ThreadClock::ThreadClock(unsigned tid, unsigned reused)
- : tid_(tid)
- , reused_(reused + 1) // 0 has special meaning
- , last_acquire_()
- , global_acquire_()
- , cached_idx_()
- , cached_size_()
- , cached_blocks_() {
- CHECK_LT(tid, kMaxTidInClock);
- CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
- nclk_ = tid_ + 1;
- internal_memset(clk_, 0, sizeof(clk_));
-}
-
-void ThreadClock::ResetCached(ClockCache *c) {
- if (cached_idx_) {
- UnrefClockBlock(c, cached_idx_, cached_blocks_);
- cached_idx_ = 0;
- cached_size_ = 0;
- cached_blocks_ = 0;
- }
-}
-
-void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
- DCHECK_LE(nclk_, kMaxTid);
- DCHECK_LE(src->size_, kMaxTid);
-
- // Check if it's empty -> no need to do anything.
- const uptr nclk = src->size_;
- if (nclk == 0)
- return;
-
- bool acquired = false;
- for (unsigned i = 0; i < kDirtyTids; i++) {
- SyncClock::Dirty dirty = src->dirty_[i];
- unsigned tid = dirty.tid();
- if (tid != kInvalidTid) {
- if (clk_[tid] < dirty.epoch) {
- clk_[tid] = dirty.epoch;
- acquired = true;
- }
- }
- }
-
- // Check if we've already acquired src after the last release operation on src
- if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
- // O(N) acquire.
- nclk_ = max(nclk_, nclk);
- u64 *dst_pos = &clk_[0];
- for (ClockElem &src_elem : *src) {
- u64 epoch = src_elem.epoch;
- if (*dst_pos < epoch) {
- *dst_pos = epoch;
- acquired = true;
- }
- dst_pos++;
- }
-
- // Remember that this thread has acquired this clock.
- if (nclk > tid_)
- src->elem(tid_).reused = reused_;
- }
-
- if (acquired) {
- last_acquire_ = clk_[tid_];
- ResetCached(c);
- }
-}
-
-void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
- DCHECK_LE(nclk_, kMaxTid);
- DCHECK_LE(sc->size_, kMaxTid);
-
- if (sc->size_ == 0) {
- // ReleaseStore will correctly set release_store_tid_,
- // which can be important for future operations.
- ReleaseStore(c, sc);
- return;
- }
-
- nclk_ = max(nclk_, (uptr) sc->size_);
-
- // Check if we need to resize sc.
- if (sc->size_ < nclk_)
- sc->Resize(c, nclk_);
-
- bool acquired = false;
-
- sc->Unshare(c);
- // Update sc->clk_.
- sc->FlushDirty();
- uptr i = 0;
- for (ClockElem &ce : *sc) {
- u64 tmp = clk_[i];
- if (clk_[i] < ce.epoch) {
- clk_[i] = ce.epoch;
- acquired = true;
- }
- ce.epoch = tmp;
- ce.reused = 0;
- i++;
- }
- sc->release_store_tid_ = kInvalidTid;
- sc->release_store_reused_ = 0;
-
- if (acquired) {
- last_acquire_ = clk_[tid_];
- ResetCached(c);
- }
-}
-
-void ThreadClock::release(ClockCache *c, SyncClock *dst) {
- DCHECK_LE(nclk_, kMaxTid);
- DCHECK_LE(dst->size_, kMaxTid);
-
- if (dst->size_ == 0) {
- // ReleaseStore will correctly set release_store_tid_,
- // which can be important for future operations.
- ReleaseStore(c, dst);
- return;
- }
-
- // Check if we need to resize dst.
- if (dst->size_ < nclk_)
- dst->Resize(c, nclk_);
-
- // Check if we had not acquired anything from other threads
- // since the last release on dst. If so, we need to update
- // only dst->elem(tid_).
- if (!HasAcquiredAfterRelease(dst)) {
- UpdateCurrentThread(c, dst);
- if (dst->release_store_tid_ != tid_ ||
- dst->release_store_reused_ != reused_)
- dst->release_store_tid_ = kInvalidTid;
- return;
- }
-
- // O(N) release.
- dst->Unshare(c);
- // First, remember whether we've acquired dst.
- bool acquired = IsAlreadyAcquired(dst);
- // Update dst->clk_.
- dst->FlushDirty();
- uptr i = 0;
- for (ClockElem &ce : *dst) {
- ce.epoch = max(ce.epoch, clk_[i]);
- ce.reused = 0;
- i++;
- }
- // Clear 'acquired' flag in the remaining elements.
- dst->release_store_tid_ = kInvalidTid;
- dst->release_store_reused_ = 0;
- // If we've acquired dst, remember this fact,
- // so that we don't need to acquire it on next acquire.
- if (acquired)
- dst->elem(tid_).reused = reused_;
-}
-
-void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
- DCHECK_LE(nclk_, kMaxTid);
- DCHECK_LE(dst->size_, kMaxTid);
-
- if (dst->size_ == 0 && cached_idx_ != 0) {
- // Reuse the cached clock.
- // Note: we could reuse/cache the cached clock in more cases:
- // we could update the existing clock and cache it, or replace it with the
- // currently cached clock and release the old one. And for a shared
- // existing clock, we could replace it with the currently cached;
- // or unshare, update and cache. But, for simplicity, we currently reuse
- // cached clock only when the target clock is empty.
- dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
- dst->tab_idx_ = cached_idx_;
- dst->size_ = cached_size_;
- dst->blocks_ = cached_blocks_;
- CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);
- // The cached clock is shared (immutable),
- // so this is where we store the current clock.
- dst->dirty_[0].set_tid(tid_);
- dst->dirty_[0].epoch = clk_[tid_];
- dst->release_store_tid_ = tid_;
- dst->release_store_reused_ = reused_;
- // Remember that we don't need to acquire it in future.
- dst->elem(tid_).reused = reused_;
- // Grab a reference.
- atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
- return;
- }
-
- // Check if we need to resize dst.
- if (dst->size_ < nclk_)
- dst->Resize(c, nclk_);
-
- if (dst->release_store_tid_ == tid_ &&
- dst->release_store_reused_ == reused_ &&
- !HasAcquiredAfterRelease(dst)) {
- UpdateCurrentThread(c, dst);
- return;
- }
-
- // O(N) release-store.
- dst->Unshare(c);
- // Note: dst can be larger than this ThreadClock.
- // This is fine since clk_ beyond size is all zeros.
- uptr i = 0;
- for (ClockElem &ce : *dst) {
- ce.epoch = clk_[i];
- ce.reused = 0;
- i++;
- }
- for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
- dst->release_store_tid_ = tid_;
- dst->release_store_reused_ = reused_;
- // Remember that we don't need to acquire it in future.
- dst->elem(tid_).reused = reused_;
-
- // If the resulting clock is cachable, cache it for future release operations.
- // The clock is always cachable if we released to an empty sync object.
- if (cached_idx_ == 0 && dst->Cachable()) {
- // Grab a reference to the ClockBlock.
- atomic_uint32_t *ref = ref_ptr(dst->tab_);
- if (atomic_load(ref, memory_order_acquire) == 1)
- atomic_store_relaxed(ref, 2);
- else
- atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
- cached_idx_ = dst->tab_idx_;
- cached_size_ = dst->size_;
- cached_blocks_ = dst->blocks_;
- }
-}
-
-void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
- acquire(c, dst);
- ReleaseStore(c, dst);
-}
-
-// Updates only single element related to the current thread in dst->clk_.
-void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
- // Update the threads time, but preserve 'acquired' flag.
- for (unsigned i = 0; i < kDirtyTids; i++) {
- SyncClock::Dirty *dirty = &dst->dirty_[i];
- const unsigned tid = dirty->tid();
- if (tid == tid_ || tid == kInvalidTid) {
- dirty->set_tid(tid_);
- dirty->epoch = clk_[tid_];
- return;
- }
- }
- // Reset all 'acquired' flags, O(N).
- // We are going to touch dst elements, so we need to unshare it.
- dst->Unshare(c);
- dst->elem(tid_).epoch = clk_[tid_];
- for (uptr i = 0; i < dst->size_; i++)
- dst->elem(i).reused = 0;
- dst->FlushDirty();
-}
-
-// Checks whether the current thread has already acquired src.
-bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
- if (src->elem(tid_).reused != reused_)
- return false;
- for (unsigned i = 0; i < kDirtyTids; i++) {
- SyncClock::Dirty dirty = src->dirty_[i];
- if (dirty.tid() != kInvalidTid) {
- if (clk_[dirty.tid()] < dirty.epoch)
- return false;
- }
- }
- return true;
-}
-
-// Checks whether the current thread has acquired anything
-// from other clocks after releasing to dst (directly or indirectly).
-bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
- const u64 my_epoch = dst->elem(tid_).epoch;
- return my_epoch <= last_acquire_ ||
- my_epoch <= atomic_load_relaxed(&global_acquire_);
-}
-
-// Sets a single element in the vector clock.
-// This function is called only from weird places like AcquireGlobal.
-void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
- DCHECK_LT(tid, kMaxTid);
- DCHECK_GE(v, clk_[tid]);
- clk_[tid] = v;
- if (nclk_ <= tid)
- nclk_ = tid + 1;
- last_acquire_ = clk_[tid_];
- ResetCached(c);
-}
-
-void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
- printf("clock=[");
- for (uptr i = 0; i < nclk_; i++)
- printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
- printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
-}
-
-SyncClock::SyncClock() {
- ResetImpl();
-}
-
-SyncClock::~SyncClock() {
- // Reset must be called before dtor.
- CHECK_EQ(size_, 0);
- CHECK_EQ(blocks_, 0);
- CHECK_EQ(tab_, 0);
- CHECK_EQ(tab_idx_, 0);
-}
-
-void SyncClock::Reset(ClockCache *c) {
- if (size_)
- UnrefClockBlock(c, tab_idx_, blocks_);
- ResetImpl();
-}
-
-void SyncClock::ResetImpl() {
- tab_ = 0;
- tab_idx_ = 0;
- size_ = 0;
- blocks_ = 0;
- release_store_tid_ = kInvalidTid;
- release_store_reused_ = 0;
- for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);
-}
-
-void SyncClock::Resize(ClockCache *c, uptr nclk) {
- Unshare(c);
- if (nclk <= capacity()) {
- // Memory is already allocated, just increase the size.
- size_ = nclk;
- return;
- }
- if (size_ == 0) {
- // Grow from 0 to one-level table.
- CHECK_EQ(size_, 0);
- CHECK_EQ(blocks_, 0);
- CHECK_EQ(tab_, 0);
- CHECK_EQ(tab_idx_, 0);
- tab_idx_ = ctx->clock_alloc.Alloc(c);
- tab_ = ctx->clock_alloc.Map(tab_idx_);
- internal_memset(tab_, 0, sizeof(*tab_));
- atomic_store_relaxed(ref_ptr(tab_), 1);
- size_ = 1;
- } else if (size_ > blocks_ * ClockBlock::kClockCount) {
- u32 idx = ctx->clock_alloc.Alloc(c);
- ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
- uptr top = size_ - blocks_ * ClockBlock::kClockCount;
- CHECK_LT(top, ClockBlock::kClockCount);
- const uptr move = top * sizeof(tab_->clock[0]);
- internal_memcpy(&new_cb->clock[0], tab_->clock, move);
- internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
- internal_memset(tab_->clock, 0, move);
- append_block(idx);
- }
- // At this point we have first level table allocated and all clock elements
- // are evacuated from it to a second level block.
- // Add second level tables as necessary.
- while (nclk > capacity()) {
- u32 idx = ctx->clock_alloc.Alloc(c);
- ClockBlock *cb = ctx->clock_alloc.Map(idx);
- internal_memset(cb, 0, sizeof(*cb));
- append_block(idx);
- }
- size_ = nclk;
-}
-
-// Flushes all dirty elements into the main clock array.
-void SyncClock::FlushDirty() {
- for (unsigned i = 0; i < kDirtyTids; i++) {
- Dirty *dirty = &dirty_[i];
- if (dirty->tid() != kInvalidTid) {
- CHECK_LT(dirty->tid(), size_);
- elem(dirty->tid()).epoch = dirty->epoch;
- dirty->set_tid(kInvalidTid);
- }
- }
-}
-
-bool SyncClock::IsShared() const {
- if (size_ == 0)
- return false;
- atomic_uint32_t *ref = ref_ptr(tab_);
- u32 v = atomic_load(ref, memory_order_acquire);
- CHECK_GT(v, 0);
- return v > 1;
-}
-
-// Unshares the current clock if it's shared.
-// Shared clocks are immutable, so they need to be unshared before any updates.
-// Note: this does not apply to dirty entries as they are not shared.
-void SyncClock::Unshare(ClockCache *c) {
- if (!IsShared())
- return;
- // First, copy current state into old.
- SyncClock old;
- old.tab_ = tab_;
- old.tab_idx_ = tab_idx_;
- old.size_ = size_;
- old.blocks_ = blocks_;
- old.release_store_tid_ = release_store_tid_;
- old.release_store_reused_ = release_store_reused_;
- for (unsigned i = 0; i < kDirtyTids; i++)
- old.dirty_[i] = dirty_[i];
- // Then, clear current object.
- ResetImpl();
- // Allocate brand new clock in the current object.
- Resize(c, old.size_);
- // Now copy state back into this object.
- Iter old_iter(&old);
- for (ClockElem &ce : *this) {
- ce = *old_iter;
- ++old_iter;
- }
- release_store_tid_ = old.release_store_tid_;
- release_store_reused_ = old.release_store_reused_;
- for (unsigned i = 0; i < kDirtyTids; i++)
- dirty_[i] = old.dirty_[i];
- // Drop reference to old and delete if necessary.
- old.Reset(c);
-}
-
-// Can we cache this clock for future release operations?
-ALWAYS_INLINE bool SyncClock::Cachable() const {
- if (size_ == 0)
- return false;
- for (unsigned i = 0; i < kDirtyTids; i++) {
- if (dirty_[i].tid() != kInvalidTid)
- return false;
- }
- return atomic_load_relaxed(ref_ptr(tab_)) == 1;
-}
-
-// elem linearizes the two-level structure into linear array.
-// Note: this is used only for one time accesses, vector operations use
-// the iterator as it is much faster.
-ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
- DCHECK_LT(tid, size_);
- const uptr block = tid / ClockBlock::kClockCount;
- DCHECK_LE(block, blocks_);
- tid %= ClockBlock::kClockCount;
- if (block == blocks_)
- return tab_->clock[tid];
- u32 idx = get_block(block);
- ClockBlock *cb = ctx->clock_alloc.Map(idx);
- return cb->clock[tid];
-}
-
-ALWAYS_INLINE uptr SyncClock::capacity() const {
- if (size_ == 0)
- return 0;
- uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
- // How many clock elements we can fit into the first level block.
- // +1 for ref counter.
- uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
- return blocks_ * ClockBlock::kClockCount + top;
-}
-
-ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
- DCHECK(size_);
- DCHECK_LT(bi, blocks_);
- return tab_->table[ClockBlock::kBlockIdx - bi];
-}
-
-ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
- uptr bi = blocks_++;
- CHECK_EQ(get_block(bi), 0);
- tab_->table[ClockBlock::kBlockIdx - bi] = idx;
-}
-
-// Used only by tests.
-u64 SyncClock::get(unsigned tid) const {
- for (unsigned i = 0; i < kDirtyTids; i++) {
- Dirty dirty = dirty_[i];
- if (dirty.tid() == tid)
- return dirty.epoch;
- }
- return elem(tid).epoch;
-}
-
-// Used only by Iter test.
-u64 SyncClock::get_clean(unsigned tid) const {
- return elem(tid).epoch;
-}
-
-void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
- printf("clock=[");
- for (uptr i = 0; i < size_; i++)
- printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
- printf("] reused=[");
- for (uptr i = 0; i < size_; i++)
- printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
- printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
- release_store_tid_, release_store_reused_, dirty_[0].tid(),
- dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);
-}
-
-void SyncClock::Iter::Next() {
- // Finished with the current block, move on to the next one.
- block_++;
- if (block_ < parent_->blocks_) {
- // Iterate over the next second level block.
- u32 idx = parent_->get_block(block_);
- ClockBlock *cb = ctx->clock_alloc.Map(idx);
- pos_ = &cb->clock[0];
- end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
- ClockBlock::kClockCount);
- return;
- }
- if (block_ == parent_->blocks_ &&
- parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
- // Iterate over elements in the first level block.
- pos_ = &parent_->tab_->clock[0];
- end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
- ClockBlock::kClockCount);
- return;
- }
- parent_ = nullptr; // denotes end
-}
-} // namespace __tsan
diff --git a/libsanitizer/tsan/tsan_clock.h b/libsanitizer/tsan/tsan_clock.h
deleted file mode 100644
index 11cbc0c..0000000
--- a/libsanitizer/tsan/tsan_clock.h
+++ /dev/null
@@ -1,293 +0,0 @@
-//===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-//===----------------------------------------------------------------------===//
-#ifndef TSAN_CLOCK_H
-#define TSAN_CLOCK_H
-
-#include "tsan_defs.h"
-#include "tsan_dense_alloc.h"
-
-namespace __tsan {
-
-typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc;
-typedef DenseSlabAllocCache ClockCache;
-
-// The clock that lives in sync variables (mutexes, atomics, etc).
-class SyncClock {
- public:
- SyncClock();
- ~SyncClock();
-
- uptr size() const;
-
- // These are used only in tests.
- u64 get(unsigned tid) const;
- u64 get_clean(unsigned tid) const;
-
- void Resize(ClockCache *c, uptr nclk);
- void Reset(ClockCache *c);
-
- void DebugDump(int(*printf)(const char *s, ...));
-
- // Clock element iterator.
- // Note: it iterates only over the table without regard to dirty entries.
- class Iter {
- public:
- explicit Iter(SyncClock* parent);
- Iter& operator++();
- bool operator!=(const Iter& other);
- ClockElem &operator*();
-
- private:
- SyncClock *parent_;
- // [pos_, end_) is the current continuous range of clock elements.
- ClockElem *pos_;
- ClockElem *end_;
- int block_; // Current number of second level block.
-
- NOINLINE void Next();
- };
-
- Iter begin();
- Iter end();
-
- private:
- friend class ThreadClock;
- friend class Iter;
- static const uptr kDirtyTids = 2;
-
- struct Dirty {
- u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; }
- void set_tid(u32 tid) {
- tid_ = tid == kInvalidTid ? kShortInvalidTid : tid;
- }
- u64 epoch : kClkBits;
-
- private:
- // Full kInvalidTid won't fit into Dirty::tid.
- static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1;
- u64 tid_ : 64 - kClkBits; // kInvalidId if not active
- };
-
- static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit");
-
- unsigned release_store_tid_;
- unsigned release_store_reused_;
- Dirty dirty_[kDirtyTids];
- // If size_ is 0, tab_ is nullptr.
- // If size <= 64 (kClockCount), tab_ contains pointer to an array with
- // 64 ClockElem's (ClockBlock::clock).
- // Otherwise, tab_ points to an array with up to 127 u32 elements,
- // each pointing to the second-level 512b block with 64 ClockElem's.
- // Unused space in the first level ClockBlock is used to store additional
- // clock elements.
- // The last u32 element in the first level ClockBlock is always used as
- // reference counter.
- //
- // See the following scheme for details.
- // All memory blocks are 512 bytes (allocated from ClockAlloc).
- // Clock (clk) elements are 64 bits.
- // Idx and ref are 32 bits.
- //
- // tab_
- // |
- // \/
- // +----------------------------------------------------+
- // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
- // +----------------------------------------------------+
- // | |
- // | \/
- // | +----------------+
- // | | clk0 ... clk63 |
- // | +----------------+
- // \/
- // +------------------+
- // | clk64 ... clk127 |
- // +------------------+
- //
- // Note: dirty entries, if active, always override what's stored in the clock.
- ClockBlock *tab_;
- u32 tab_idx_;
- u16 size_;
- u16 blocks_; // Number of second level blocks.
-
- void Unshare(ClockCache *c);
- bool IsShared() const;
- bool Cachable() const;
- void ResetImpl();
- void FlushDirty();
- uptr capacity() const;
- u32 get_block(uptr bi) const;
- void append_block(u32 idx);
- ClockElem &elem(unsigned tid) const;
-};
-
-// The clock that lives in threads.
-class ThreadClock {
- public:
- typedef DenseSlabAllocCache Cache;
-
- explicit ThreadClock(unsigned tid, unsigned reused = 0);
-
- u64 get(unsigned tid) const;
- void set(ClockCache *c, unsigned tid, u64 v);
- void set(u64 v);
- void tick();
- uptr size() const;
-
- void acquire(ClockCache *c, SyncClock *src);
- void releaseStoreAcquire(ClockCache *c, SyncClock *src);
- void release(ClockCache *c, SyncClock *dst);
- void acq_rel(ClockCache *c, SyncClock *dst);
- void ReleaseStore(ClockCache *c, SyncClock *dst);
- void ResetCached(ClockCache *c);
- void NoteGlobalAcquire(u64 v);
-
- void DebugReset();
- void DebugDump(int(*printf)(const char *s, ...));
-
- private:
- static const uptr kDirtyTids = SyncClock::kDirtyTids;
- // Index of the thread associated with he clock ("current thread").
- const unsigned tid_;
- const unsigned reused_; // tid_ reuse count.
- // Current thread time when it acquired something from other threads.
- u64 last_acquire_;
-
- // Last time another thread has done a global acquire of this thread's clock.
- // It helps to avoid problem described in:
- // https://github.com/golang/go/issues/39186
- // See test/tsan/java_finalizer2.cpp for a regression test.
- // Note the failuire is _extremely_ hard to hit, so if you are trying
- // to reproduce it, you may want to run something like:
- // $ go get golang.org/x/tools/cmd/stress
- // $ stress -p=64 ./a.out
- //
- // The crux of the problem is roughly as follows.
- // A number of O(1) optimizations in the clocks algorithm assume proper
- // transitive cumulative propagation of clock values. The AcquireGlobal
- // operation may produce an inconsistent non-linearazable view of
- // thread clocks. Namely, it may acquire a later value from a thread
- // with a higher ID, but fail to acquire an earlier value from a thread
- // with a lower ID. If a thread that executed AcquireGlobal then releases
- // to a sync clock, it will spoil the sync clock with the inconsistent
- // values. If another thread later releases to the sync clock, the optimized
- // algorithm may break.
- //
- // The exact sequence of events that leads to the failure.
- // - thread 1 executes AcquireGlobal
- // - thread 1 acquires value 1 for thread 2
- // - thread 2 increments clock to 2
- // - thread 2 releases to sync object 1
- // - thread 3 at time 1
- // - thread 3 acquires from sync object 1
- // - thread 3 increments clock to 2
- // - thread 1 acquires value 2 for thread 3
- // - thread 1 releases to sync object 2
- // - sync object 2 clock has 1 for thread 2 and 2 for thread 3
- // - thread 3 releases to sync object 2
- // - thread 3 sees value 2 in the clock for itself
- // and decides that it has already released to the clock
- // and did not acquire anything from other threads after that
- // (the last_acquire_ check in release operation)
- // - thread 3 does not update the value for thread 2 in the clock from 1 to 2
- // - thread 4 acquires from sync object 2
- // - thread 4 detects a false race with thread 2
- // as it should have been synchronized with thread 2 up to time 2,
- // but because of the broken clock it is now synchronized only up to time 1
- //
- // The global_acquire_ value helps to prevent this scenario.
- // Namely, thread 3 will not trust any own clock values up to global_acquire_
- // for the purposes of the last_acquire_ optimization.
- atomic_uint64_t global_acquire_;
-
- // Cached SyncClock (without dirty entries and release_store_tid_).
- // We reuse it for subsequent store-release operations without intervening
- // acquire operations. Since it is shared (and thus constant), clock value
- // for the current thread is then stored in dirty entries in the SyncClock.
- // We host a reference to the table while it is cached here.
- u32 cached_idx_;
- u16 cached_size_;
- u16 cached_blocks_;
-
- // Number of active elements in the clk_ table (the rest is zeros).
- uptr nclk_;
- u64 clk_[kMaxTidInClock]; // Fixed size vector clock.
-
- bool IsAlreadyAcquired(const SyncClock *src) const;
- bool HasAcquiredAfterRelease(const SyncClock *dst) const;
- void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
-};
-
-ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
- DCHECK_LT(tid, kMaxTidInClock);
- return clk_[tid];
-}
-
-ALWAYS_INLINE void ThreadClock::set(u64 v) {
- DCHECK_GE(v, clk_[tid_]);
- clk_[tid_] = v;
-}
-
-ALWAYS_INLINE void ThreadClock::tick() {
- clk_[tid_]++;
-}
-
-ALWAYS_INLINE uptr ThreadClock::size() const {
- return nclk_;
-}
-
-ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
- // Here we rely on the fact that AcquireGlobal is protected by
- // ThreadRegistryLock, thus only one thread at a time executes it
- // and values passed to this function should not go backwards.
- CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
- atomic_store_relaxed(&global_acquire_, v);
-}
-
-ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
- return Iter(this);
-}
-
-ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
- return Iter(nullptr);
-}
-
-ALWAYS_INLINE uptr SyncClock::size() const {
- return size_;
-}
-
-ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
- : parent_(parent)
- , pos_(nullptr)
- , end_(nullptr)
- , block_(-1) {
- if (parent)
- Next();
-}
-
-ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
- pos_++;
- if (UNLIKELY(pos_ >= end_))
- Next();
- return *this;
-}
-
-ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
- return parent_ != other.parent_;
-}
-
-ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
- return *pos_;
-}
-} // namespace __tsan
-
-#endif // TSAN_CLOCK_H
diff --git a/libsanitizer/tsan/tsan_debugging.cpp b/libsanitizer/tsan/tsan_debugging.cpp
index 1d3c384..1e61c31 100644
--- a/libsanitizer/tsan/tsan_debugging.cpp
+++ b/libsanitizer/tsan/tsan_debugging.cpp
@@ -157,7 +157,7 @@ int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
ReportMutex *mutex = rep->mutexes[idx];
*mutex_id = mutex->id;
*addr = (void *)mutex->addr;
- *destroyed = mutex->destroyed;
+ *destroyed = false;
if (mutex->stack) CopyTrace(mutex->stack->frames, trace, trace_size);
return 1;
}
diff --git a/libsanitizer/tsan/tsan_defs.h b/libsanitizer/tsan/tsan_defs.h
index fe0c1da..1ffa3d6 100644
--- a/libsanitizer/tsan/tsan_defs.h
+++ b/libsanitizer/tsan/tsan_defs.h
@@ -63,41 +63,14 @@ enum class Epoch : u16 {};
constexpr uptr kEpochBits = 14;
constexpr Epoch kEpochZero = static_cast<Epoch>(0);
constexpr Epoch kEpochOver = static_cast<Epoch>(1 << kEpochBits);
+constexpr Epoch kEpochLast = static_cast<Epoch>((1 << kEpochBits) - 1);
-const int kClkBits = 42;
-const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
-
-struct ClockElem {
- u64 epoch : kClkBits;
- u64 reused : 64 - kClkBits; // tid reuse count
-};
-
-struct ClockBlock {
- static const uptr kSize = 512;
- static const uptr kTableSize = kSize / sizeof(u32);
- static const uptr kClockCount = kSize / sizeof(ClockElem);
- static const uptr kRefIdx = kTableSize - 1;
- static const uptr kBlockIdx = kTableSize - 2;
-
- union {
- u32 table[kTableSize];
- ClockElem clock[kClockCount];
- };
+inline Epoch EpochInc(Epoch epoch) {
+ return static_cast<Epoch>(static_cast<u16>(epoch) + 1);
+}
- ClockBlock() {
- }
-};
+inline bool EpochOverflow(Epoch epoch) { return epoch == kEpochOver; }
-const int kTidBits = 13;
-// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
-// occupied by reference counter, so total number of elements we can store
-// in SyncClock is kClockCount * (kTableSize - 1).
-const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
-#if !SANITIZER_GO
-const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit.
-#else
-const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory.
-#endif
const uptr kShadowStackSize = 64 * 1024;
// Count of shadow values in a shadow cell.
@@ -107,7 +80,7 @@ const uptr kShadowCnt = 4;
const uptr kShadowCell = 8;
// Single shadow value.
-typedef u64 RawShadow;
+enum class RawShadow : u32 {};
const uptr kShadowSize = sizeof(RawShadow);
// Shadow memory is kShadowMultiplier times larger than user memory.
@@ -184,10 +157,13 @@ MD5Hash md5_hash(const void *data, uptr size);
struct Processor;
struct ThreadState;
class ThreadContext;
+struct TidSlot;
struct Context;
struct ReportStack;
class ReportDesc;
class RegionAlloc;
+struct Trace;
+struct TracePart;
typedef uptr AccessType;
@@ -198,6 +174,9 @@ enum : AccessType {
kAccessVptr = 1 << 2, // read or write of an object virtual table pointer
kAccessFree = 1 << 3, // synthetic memory access during memory freeing
kAccessExternalPC = 1 << 4, // access PC can have kExternalPCBit set
+ kAccessCheckOnly = 1 << 5, // check for races, but don't store
+ kAccessNoRodata = 1 << 6, // don't check for .rodata marker
+ kAccessSlotLocked = 1 << 7, // memory access with TidSlot locked
};
// Descriptor of user's memory block.
@@ -219,15 +198,18 @@ enum ExternalTag : uptr {
// as 16-bit values, see tsan_defs.h.
};
-enum MutexType {
- MutexTypeTrace = MutexLastCommon,
- MutexTypeReport,
+enum {
+ MutexTypeReport = MutexLastCommon,
MutexTypeSyncVar,
MutexTypeAnnotations,
MutexTypeAtExit,
MutexTypeFired,
MutexTypeRacy,
MutexTypeGlobalProc,
+ MutexTypeInternalAlloc,
+ MutexTypeTrace,
+ MutexTypeSlot,
+ MutexTypeSlots,
};
} // namespace __tsan
diff --git a/libsanitizer/tsan/tsan_dense_alloc.h b/libsanitizer/tsan/tsan_dense_alloc.h
index 9e15f74..7a39a39 100644
--- a/libsanitizer/tsan/tsan_dense_alloc.h
+++ b/libsanitizer/tsan/tsan_dense_alloc.h
@@ -104,6 +104,15 @@ class DenseSlabAlloc {
return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T);
}
+ template <typename Func>
+ void ForEach(Func func) {
+ SpinMutexLock lock(&mtx_);
+ uptr fillpos = atomic_load_relaxed(&fillpos_);
+ for (uptr l1 = 0; l1 < fillpos; l1++) {
+ for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]);
+ }
+ }
+
private:
T *map_[kL1Size];
SpinMutex mtx_;
diff --git a/libsanitizer/tsan/tsan_fd.cpp b/libsanitizer/tsan/tsan_fd.cpp
index 255ffa8..cf8f491 100644
--- a/libsanitizer/tsan/tsan_fd.cpp
+++ b/libsanitizer/tsan/tsan_fd.cpp
@@ -11,9 +11,12 @@
//===----------------------------------------------------------------------===//
#include "tsan_fd.h"
-#include "tsan_rtl.h"
+
#include <sanitizer_common/sanitizer_atomic.h>
+#include "tsan_interceptors.h"
+#include "tsan_rtl.h"
+
namespace __tsan {
const int kTableSizeL1 = 1024;
@@ -26,6 +29,9 @@ struct FdSync {
struct FdDesc {
FdSync *sync;
+ // This is used to establish write -> epoll_wait synchronization
+ // where epoll_wait receives notification about the write.
+ atomic_uintptr_t aux_sync; // FdSync*
Tid creation_tid;
StackID creation_stack;
};
@@ -100,6 +106,10 @@ static void init(ThreadState *thr, uptr pc, int fd, FdSync *s,
unref(thr, pc, d->sync);
d->sync = 0;
}
+ unref(thr, pc,
+ reinterpret_cast<FdSync *>(
+ atomic_load(&d->aux_sync, memory_order_relaxed)));
+ atomic_store(&d->aux_sync, 0, memory_order_relaxed);
if (flags()->io_sync == 0) {
unref(thr, pc, s);
} else if (flags()->io_sync == 1) {
@@ -110,12 +120,17 @@ static void init(ThreadState *thr, uptr pc, int fd, FdSync *s,
}
d->creation_tid = thr->tid;
d->creation_stack = CurrentStackId(thr, pc);
+ // This prevents false positives on fd_close_norace3.cpp test.
+ // The mechanics of the false positive are not completely clear,
+ // but it happens only if global reset is enabled (flush_memory_ms=1)
+ // and may be related to lost writes during asynchronous MADV_DONTNEED.
+ SlotLocker locker(thr);
if (write) {
// To catch races between fd usage and open.
MemoryRangeImitateWrite(thr, pc, (uptr)d, 8);
} else {
// See the dup-related comment in FdClose.
- MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead | kAccessSlotLocked);
}
}
@@ -177,6 +192,8 @@ void FdRelease(ThreadState *thr, uptr pc, int fd) {
MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
if (s)
Release(thr, pc, (uptr)s);
+ if (uptr aux_sync = atomic_load(&d->aux_sync, memory_order_acquire))
+ Release(thr, pc, aux_sync);
}
void FdAccess(ThreadState *thr, uptr pc, int fd) {
@@ -192,25 +209,39 @@ void FdClose(ThreadState *thr, uptr pc, int fd, bool write) {
if (bogusfd(fd))
return;
FdDesc *d = fddesc(thr, pc, fd);
- if (write) {
- // To catch races between fd usage and close.
- MemoryAccess(thr, pc, (uptr)d, 8, kAccessWrite);
- } else {
- // This path is used only by dup2/dup3 calls.
- // We do read instead of write because there is a number of legitimate
- // cases where write would lead to false positives:
- // 1. Some software dups a closed pipe in place of a socket before closing
- // the socket (to prevent races actually).
- // 2. Some daemons dup /dev/null in place of stdin/stdout.
- // On the other hand we have not seen cases when write here catches real
- // bugs.
- MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+ {
+ // Need to lock the slot to make MemoryAccess and MemoryResetRange atomic
+ // with respect to global reset. See the comment in MemoryRangeFreed.
+ SlotLocker locker(thr);
+ if (!MustIgnoreInterceptor(thr)) {
+ if (write) {
+ // To catch races between fd usage and close.
+ MemoryAccess(thr, pc, (uptr)d, 8,
+ kAccessWrite | kAccessCheckOnly | kAccessSlotLocked);
+ } else {
+ // This path is used only by dup2/dup3 calls.
+ // We do read instead of write because there is a number of legitimate
+ // cases where write would lead to false positives:
+ // 1. Some software dups a closed pipe in place of a socket before
+ // closing
+ // the socket (to prevent races actually).
+ // 2. Some daemons dup /dev/null in place of stdin/stdout.
+ // On the other hand we have not seen cases when write here catches real
+ // bugs.
+ MemoryAccess(thr, pc, (uptr)d, 8,
+ kAccessRead | kAccessCheckOnly | kAccessSlotLocked);
+ }
+ }
+ // We need to clear it, because if we do not intercept any call out there
+ // that creates fd, we will hit false postives.
+ MemoryResetRange(thr, pc, (uptr)d, 8);
}
- // We need to clear it, because if we do not intercept any call out there
- // that creates fd, we will hit false postives.
- MemoryResetRange(thr, pc, (uptr)d, 8);
unref(thr, pc, d->sync);
d->sync = 0;
+ unref(thr, pc,
+ reinterpret_cast<FdSync *>(
+ atomic_load(&d->aux_sync, memory_order_relaxed)));
+ atomic_store(&d->aux_sync, 0, memory_order_relaxed);
d->creation_tid = kInvalidTid;
d->creation_stack = kInvalidStackID;
}
@@ -269,6 +300,30 @@ void FdPollCreate(ThreadState *thr, uptr pc, int fd) {
init(thr, pc, fd, allocsync(thr, pc));
}
+void FdPollAdd(ThreadState *thr, uptr pc, int epfd, int fd) {
+ DPrintf("#%d: FdPollAdd(%d, %d)\n", thr->tid, epfd, fd);
+ if (bogusfd(epfd) || bogusfd(fd))
+ return;
+ FdDesc *d = fddesc(thr, pc, fd);
+ // Associate fd with epoll fd only once.
+ // While an fd can be associated with multiple epolls at the same time,
+ // or with different epolls during different phases of lifetime,
+ // synchronization semantics (and examples) of this are unclear.
+ // So we don't support this for now.
+ // If we change the association, it will also create lifetime management
+ // problem for FdRelease which accesses the aux_sync.
+ if (atomic_load(&d->aux_sync, memory_order_relaxed))
+ return;
+ FdDesc *epd = fddesc(thr, pc, epfd);
+ FdSync *s = epd->sync;
+ if (!s)
+ return;
+ uptr cmp = 0;
+ if (atomic_compare_exchange_strong(
+ &d->aux_sync, &cmp, reinterpret_cast<uptr>(s), memory_order_release))
+ ref(s);
+}
+
void FdSocketCreate(ThreadState *thr, uptr pc, int fd) {
DPrintf("#%d: FdSocketCreate(%d)\n", thr->tid, fd);
if (bogusfd(fd))
diff --git a/libsanitizer/tsan/tsan_fd.h b/libsanitizer/tsan/tsan_fd.h
index d964817..92625dc 100644
--- a/libsanitizer/tsan/tsan_fd.h
+++ b/libsanitizer/tsan/tsan_fd.h
@@ -49,6 +49,7 @@ void FdEventCreate(ThreadState *thr, uptr pc, int fd);
void FdSignalCreate(ThreadState *thr, uptr pc, int fd);
void FdInotifyCreate(ThreadState *thr, uptr pc, int fd);
void FdPollCreate(ThreadState *thr, uptr pc, int fd);
+void FdPollAdd(ThreadState *thr, uptr pc, int epfd, int fd);
void FdSocketCreate(ThreadState *thr, uptr pc, int fd);
void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd);
void FdSocketConnecting(ThreadState *thr, uptr pc, int fd);
diff --git a/libsanitizer/tsan/tsan_flags.cpp b/libsanitizer/tsan/tsan_flags.cpp
index ee89862..ee78f25 100644
--- a/libsanitizer/tsan/tsan_flags.cpp
+++ b/libsanitizer/tsan/tsan_flags.cpp
@@ -97,7 +97,7 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) {
ubsan_parser.ParseStringFromEnv("UBSAN_OPTIONS");
#endif
- // Sanity check.
+ // Check flags.
if (!f->report_bugs) {
f->report_thread_leaks = false;
f->report_destroy_locked = false;
@@ -110,12 +110,6 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) {
if (common_flags()->help) parser.PrintFlagDescriptions();
- if (f->history_size < 0 || f->history_size > 7) {
- Printf("ThreadSanitizer: incorrect value for history_size"
- " (must be [0..7])\n");
- Die();
- }
-
if (f->io_sync < 0 || f->io_sync > 2) {
Printf("ThreadSanitizer: incorrect value for io_sync"
" (must be [0..2])\n");
diff --git a/libsanitizer/tsan/tsan_flags.inc b/libsanitizer/tsan/tsan_flags.inc
index 7954a430..32cf3bb 100644
--- a/libsanitizer/tsan/tsan_flags.inc
+++ b/libsanitizer/tsan/tsan_flags.inc
@@ -43,6 +43,9 @@ TSAN_FLAG(
bool, force_seq_cst_atomics, false,
"If set, all atomics are effectively sequentially consistent (seq_cst), "
"regardless of what user actually specified.")
+TSAN_FLAG(bool, force_background_thread, false,
+ "If set, eagerly launch a background thread for memory reclamation "
+ "instead of waiting for a user call to pthread_create.")
TSAN_FLAG(bool, halt_on_error, false, "Exit after first reported error.")
TSAN_FLAG(int, atexit_sleep_ms, 1000,
"Sleep in main thread before exiting for that many ms "
@@ -59,14 +62,10 @@ TSAN_FLAG(bool, stop_on_start, false,
"Stops on start until __tsan_resume() is called (for debugging).")
TSAN_FLAG(bool, running_on_valgrind, false,
"Controls whether RunningOnValgrind() returns true or false.")
-// There are a lot of goroutines in Go, so we use smaller history.
TSAN_FLAG(
- int, history_size, SANITIZER_GO ? 1 : 3,
- "Per-thread history size, controls how many previous memory accesses "
- "are remembered per thread. Possible values are [0..7]. "
- "history_size=0 amounts to 32K memory accesses. Each next value doubles "
- "the amount of memory accesses, up to history_size=7 that amounts to "
- "4M memory accesses. The default value is 2 (128K memory accesses).")
+ uptr, history_size, 0,
+ "Per-thread history size,"
+ " controls how many extra previous memory accesses are remembered per thread.")
TSAN_FLAG(int, io_sync, 1,
"Controls level of synchronization implied by IO operations. "
"0 - no synchronization "
@@ -82,3 +81,6 @@ TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_MAC ? true : false,
"modules.")
TSAN_FLAG(bool, shared_ptr_interceptor, true,
"Track atomic reference counting in libc++ shared_ptr and weak_ptr.")
+TSAN_FLAG(bool, print_full_thread_history, false,
+ "If set, prints thread creation stacks for the threads involved in "
+ "the report and their ancestors up to the main thread.")
diff --git a/libsanitizer/tsan/tsan_interceptors.h b/libsanitizer/tsan/tsan_interceptors.h
index 61dbb81..3091ad8 100644
--- a/libsanitizer/tsan/tsan_interceptors.h
+++ b/libsanitizer/tsan/tsan_interceptors.h
@@ -36,6 +36,10 @@ inline bool in_symbolizer() {
}
#endif
+inline bool MustIgnoreInterceptor(ThreadState *thr) {
+ return !thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib;
+}
+
} // namespace __tsan
#define SCOPED_INTERCEPTOR_RAW(func, ...) \
@@ -60,10 +64,10 @@ inline bool in_symbolizer() {
# define CHECK_REAL_FUNC(func) DCHECK(REAL(func))
#endif
-#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
- SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
- CHECK_REAL_FUNC(func); \
- if (!thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib) \
+#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
+ SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
+ CHECK_REAL_FUNC(func); \
+ if (MustIgnoreInterceptor(thr)) \
return REAL(func)(__VA_ARGS__);
#define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \
@@ -74,6 +78,14 @@ inline bool in_symbolizer() {
#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
+#if SANITIZER_FREEBSD
+# define TSAN_INTERCEPTOR_FREEBSD_ALIAS(ret, func, ...) \
+ TSAN_INTERCEPTOR(ret, _pthread_##func, __VA_ARGS__) \
+ ALIAS(WRAPPER_NAME(pthread_##func));
+#else
+# define TSAN_INTERCEPTOR_FREEBSD_ALIAS(ret, func, ...)
+#endif
+
#if SANITIZER_NETBSD
# define TSAN_INTERCEPTOR_NETBSD_ALIAS(ret, func, ...) \
TSAN_INTERCEPTOR(ret, __libc_##func, __VA_ARGS__) \
diff --git a/libsanitizer/tsan/tsan_interceptors_posix.cpp b/libsanitizer/tsan/tsan_interceptors_posix.cpp
index 9a85ee0..60ca963 100644
--- a/libsanitizer/tsan/tsan_interceptors_posix.cpp
+++ b/libsanitizer/tsan/tsan_interceptors_posix.cpp
@@ -90,6 +90,7 @@ DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
extern "C" void *pthread_self();
extern "C" void _exit(int status);
#if !SANITIZER_NETBSD
@@ -176,6 +177,7 @@ struct ThreadSignalContext {
struct AtExitCtx {
void (*f)();
void *arg;
+ uptr pc;
};
// InterceptorContext holds all global data required for interceptors.
@@ -287,20 +289,25 @@ void ScopedInterceptor::DisableIgnoresImpl() {
}
#define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
+#else
+# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver)
+#endif
#if SANITIZER_FREEBSD
-# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
-# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func)
-# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func)
-#elif SANITIZER_NETBSD
-# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
-# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) \
- INTERCEPT_FUNCTION(__libc_##func)
-# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) \
- INTERCEPT_FUNCTION(__libc_thr_##func)
+# define TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(func) \
+ INTERCEPT_FUNCTION(_pthread_##func)
#else
-# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver)
-# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func)
-# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func)
+# define TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(func)
+#endif
+#if SANITIZER_NETBSD
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) \
+ INTERCEPT_FUNCTION(__libc_##func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) \
+ INTERCEPT_FUNCTION(__libc_thr_##func)
+#else
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func)
#endif
#define READ_STRING_OF_LEN(thr, pc, s, len, n) \
@@ -366,7 +373,10 @@ TSAN_INTERCEPTOR(int, pause, int fake) {
return BLOCK_REAL(pause)(fake);
}
-static void at_exit_wrapper() {
+// Note: we specifically call the function in such strange way
+// with "installed_at" because in reports it will appear between
+// callback frames and the frame that installed the callback.
+static void at_exit_callback_installed_at() {
AtExitCtx *ctx;
{
// Ensure thread-safety.
@@ -378,15 +388,21 @@ static void at_exit_wrapper() {
interceptor_ctx()->AtExitStack.PopBack();
}
- Acquire(cur_thread(), (uptr)0, (uptr)ctx);
+ ThreadState *thr = cur_thread();
+ Acquire(thr, ctx->pc, (uptr)ctx);
+ FuncEntry(thr, ctx->pc);
((void(*)())ctx->f)();
+ FuncExit(thr);
Free(ctx);
}
-static void cxa_at_exit_wrapper(void *arg) {
- Acquire(cur_thread(), 0, (uptr)arg);
+static void cxa_at_exit_callback_installed_at(void *arg) {
+ ThreadState *thr = cur_thread();
AtExitCtx *ctx = (AtExitCtx*)arg;
+ Acquire(thr, ctx->pc, (uptr)arg);
+ FuncEntry(thr, ctx->pc);
((void(*)(void *arg))ctx->f)(ctx->arg);
+ FuncExit(thr);
Free(ctx);
}
@@ -400,7 +416,7 @@ TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
// We want to setup the atexit callback even if we are in ignored lib
// or after fork.
SCOPED_INTERCEPTOR_RAW(atexit, f);
- return setup_at_exit_wrapper(thr, pc, (void(*)())f, 0, 0);
+ return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, 0, 0);
}
#endif
@@ -408,7 +424,7 @@ TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) {
if (in_symbolizer())
return 0;
SCOPED_TSAN_INTERCEPTOR(__cxa_atexit, f, arg, dso);
- return setup_at_exit_wrapper(thr, pc, (void(*)())f, arg, dso);
+ return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, arg, dso);
}
static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
@@ -416,6 +432,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
auto *ctx = New<AtExitCtx>();
ctx->f = f;
ctx->arg = arg;
+ ctx->pc = pc;
Release(thr, pc, (uptr)ctx);
// Memory allocation in __cxa_atexit will race with free during exit,
// because we do not see synchronization around atexit callback list.
@@ -431,25 +448,27 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
// due to atexit_mu held on exit from the calloc interceptor.
ScopedIgnoreInterceptors ignore;
- res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_wrapper, 0, 0);
+ res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_callback_installed_at,
+ 0, 0);
// Push AtExitCtx on the top of the stack of callback functions
if (!res) {
interceptor_ctx()->AtExitStack.PushBack(ctx);
}
} else {
- res = REAL(__cxa_atexit)(cxa_at_exit_wrapper, ctx, dso);
+ res = REAL(__cxa_atexit)(cxa_at_exit_callback_installed_at, ctx, dso);
}
ThreadIgnoreEnd(thr);
return res;
}
#if !SANITIZER_MAC && !SANITIZER_NETBSD
-static void on_exit_wrapper(int status, void *arg) {
+static void on_exit_callback_installed_at(int status, void *arg) {
ThreadState *thr = cur_thread();
- uptr pc = 0;
- Acquire(thr, pc, (uptr)arg);
AtExitCtx *ctx = (AtExitCtx*)arg;
+ Acquire(thr, ctx->pc, (uptr)arg);
+ FuncEntry(thr, ctx->pc);
((void(*)(int status, void *arg))ctx->f)(status, ctx->arg);
+ FuncExit(thr);
Free(ctx);
}
@@ -460,11 +479,12 @@ TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) {
auto *ctx = New<AtExitCtx>();
ctx->f = (void(*)())f;
ctx->arg = arg;
+ ctx->pc = GET_CALLER_PC();
Release(thr, pc, (uptr)ctx);
// Memory allocation in __cxa_atexit will race with free during exit,
// because we do not see synchronization around atexit callback list.
ThreadIgnoreBegin(thr, pc);
- int res = REAL(on_exit)(on_exit_wrapper, ctx);
+ int res = REAL(on_exit)(on_exit_callback_installed_at, ctx);
ThreadIgnoreEnd(thr);
return res;
}
@@ -880,10 +900,11 @@ static int guard_acquire(ThreadState *thr, uptr pc, atomic_uint32_t *g,
}
}
-static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g) {
+static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g,
+ u32 v) {
if (!thr->in_ignored_lib)
Release(thr, pc, (uptr)g);
- u32 old = atomic_exchange(g, kGuardDone, memory_order_release);
+ u32 old = atomic_exchange(g, v, memory_order_release);
if (old & kGuardWaiter)
FutexWake(g, 1 << 30);
}
@@ -913,12 +934,12 @@ STDCXX_INTERCEPTOR(int, __cxa_guard_acquire, atomic_uint32_t *g) {
STDCXX_INTERCEPTOR(void, __cxa_guard_release, atomic_uint32_t *g) {
SCOPED_INTERCEPTOR_RAW(__cxa_guard_release, g);
- guard_release(thr, pc, g);
+ guard_release(thr, pc, g, kGuardDone);
}
STDCXX_INTERCEPTOR(void, __cxa_guard_abort, atomic_uint32_t *g) {
SCOPED_INTERCEPTOR_RAW(__cxa_guard_abort, g);
- atomic_store(g, kGuardInit, memory_order_relaxed);
+ guard_release(thr, pc, g, kGuardInit);
}
namespace __tsan {
@@ -1515,12 +1536,12 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
// result in crashes due to too little stack space.
if (guard_acquire(thr, pc, a, !SANITIZER_MAC)) {
(*f)();
- guard_release(thr, pc, a);
+ guard_release(thr, pc, a, kGuardDone);
}
return 0;
}
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_GLIBC
TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf);
if (fd > 0)
@@ -1533,20 +1554,20 @@ TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
#endif
TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
-#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID || SANITIZER_NETBSD
- SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
+#if SANITIZER_GLIBC
+ SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf);
if (fd > 0)
FdAccess(thr, pc, fd);
- return REAL(fstat)(fd, buf);
+ return REAL(__fxstat)(0, fd, buf);
#else
- SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf);
+ SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
if (fd > 0)
FdAccess(thr, pc, fd);
- return REAL(__fxstat)(0, fd, buf);
+ return REAL(fstat)(fd, buf);
#endif
}
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_GLIBC
TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
if (fd > 0)
@@ -1558,7 +1579,7 @@ TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
#define TSAN_MAYBE_INTERCEPT___FXSTAT64
#endif
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_GLIBC
TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf);
if (fd > 0)
@@ -1665,11 +1686,10 @@ TSAN_INTERCEPTOR(int, eventfd, unsigned initval, int flags) {
#if SANITIZER_LINUX
TSAN_INTERCEPTOR(int, signalfd, int fd, void *mask, int flags) {
- SCOPED_TSAN_INTERCEPTOR(signalfd, fd, mask, flags);
- if (fd >= 0)
- FdClose(thr, pc, fd);
+ SCOPED_INTERCEPTOR_RAW(signalfd, fd, mask, flags);
+ FdClose(thr, pc, fd);
fd = REAL(signalfd)(fd, mask, flags);
- if (fd >= 0)
+ if (!MustIgnoreInterceptor(thr))
FdSignalCreate(thr, pc, fd);
return fd;
}
@@ -1746,17 +1766,16 @@ TSAN_INTERCEPTOR(int, listen, int fd, int backlog) {
}
TSAN_INTERCEPTOR(int, close, int fd) {
- SCOPED_TSAN_INTERCEPTOR(close, fd);
- if (fd >= 0)
+ SCOPED_INTERCEPTOR_RAW(close, fd);
+ if (!in_symbolizer())
FdClose(thr, pc, fd);
return REAL(close)(fd);
}
#if SANITIZER_LINUX
TSAN_INTERCEPTOR(int, __close, int fd) {
- SCOPED_TSAN_INTERCEPTOR(__close, fd);
- if (fd >= 0)
- FdClose(thr, pc, fd);
+ SCOPED_INTERCEPTOR_RAW(__close, fd);
+ FdClose(thr, pc, fd);
return REAL(__close)(fd);
}
#define TSAN_MAYBE_INTERCEPT___CLOSE TSAN_INTERCEPT(__close)
@@ -1767,13 +1786,10 @@ TSAN_INTERCEPTOR(int, __close, int fd) {
// glibc guts
#if SANITIZER_LINUX && !SANITIZER_ANDROID
TSAN_INTERCEPTOR(void, __res_iclose, void *state, bool free_addr) {
- SCOPED_TSAN_INTERCEPTOR(__res_iclose, state, free_addr);
+ SCOPED_INTERCEPTOR_RAW(__res_iclose, state, free_addr);
int fds[64];
int cnt = ExtractResolvFDs(state, fds, ARRAY_SIZE(fds));
- for (int i = 0; i < cnt; i++) {
- if (fds[i] > 0)
- FdClose(thr, pc, fds[i]);
- }
+ for (int i = 0; i < cnt; i++) FdClose(thr, pc, fds[i]);
REAL(__res_iclose)(state, free_addr);
}
#define TSAN_MAYBE_INTERCEPT___RES_ICLOSE TSAN_INTERCEPT(__res_iclose)
@@ -1854,7 +1870,7 @@ TSAN_INTERCEPTOR(int, rmdir, char *path) {
}
TSAN_INTERCEPTOR(int, closedir, void *dirp) {
- SCOPED_TSAN_INTERCEPTOR(closedir, dirp);
+ SCOPED_INTERCEPTOR_RAW(closedir, dirp);
if (dirp) {
int fd = dirfd(dirp);
FdClose(thr, pc, fd);
@@ -1885,8 +1901,10 @@ TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
FdAccess(thr, pc, epfd);
if (epfd >= 0 && fd >= 0)
FdAccess(thr, pc, fd);
- if (op == EPOLL_CTL_ADD && epfd >= 0)
+ if (op == EPOLL_CTL_ADD && epfd >= 0) {
+ FdPollAdd(thr, pc, epfd, fd);
FdRelease(thr, pc, epfd);
+ }
int res = REAL(epoll_ctl)(epfd, op, fd, ev);
return res;
}
@@ -1949,13 +1967,14 @@ TSAN_INTERCEPTOR(int, pthread_sigmask, int how, const __sanitizer_sigset_t *set,
namespace __tsan {
-static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) {
+static void ReportErrnoSpoiling(ThreadState *thr, uptr pc, int sig) {
VarSizeStackTrace stack;
// StackTrace::GetNestInstructionPc(pc) is used because return address is
// expected, OutputReport() will undo this.
ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack);
ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(ReportTypeErrnoInSignal);
+ rep.SetSigNum(sig);
if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
rep.AddStack(stack, true);
OutputReport(thr, rep);
@@ -1965,6 +1984,7 @@ static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) {
static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
int sig, __sanitizer_siginfo *info,
void *uctx) {
+ CHECK(thr->slot);
__sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions;
if (acquire)
Acquire(thr, 0, (uptr)&sigactions[sig]);
@@ -2021,7 +2041,7 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
// signal; and it looks too fragile to intercept all ways to reraise a signal.
if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM &&
errno != 99)
- ReportErrnoSpoiling(thr, pc);
+ ReportErrnoSpoiling(thr, pc, sig);
errno = saved_errno;
}
@@ -2132,11 +2152,11 @@ TSAN_INTERCEPTOR(int, pthread_kill, void *tid, int sig) {
ThreadSignalContext *sctx = SigCtx(thr);
CHECK_NE(sctx, 0);
int prev = sctx->int_signal_send;
- if (tid == pthread_self()) {
+ bool self = pthread_equal(tid, pthread_self());
+ if (self)
sctx->int_signal_send = sig;
- }
int res = REAL(pthread_kill)(tid, sig);
- if (tid == pthread_self()) {
+ if (self) {
CHECK_EQ(sctx->int_signal_send, sig);
sctx->int_signal_send = prev;
}
@@ -2193,6 +2213,7 @@ void atfork_child() {
FdOnFork(thr, pc);
}
+#if !SANITIZER_IOS
TSAN_INTERCEPTOR(int, vfork, int fake) {
// Some programs (e.g. openjdk) call close for all file descriptors
// in the child process. Under tsan it leads to false positives, because
@@ -2209,6 +2230,7 @@ TSAN_INTERCEPTOR(int, vfork, int fake) {
// Instead we simply turn vfork into fork.
return WRAP(fork)(fake);
}
+#endif
#if SANITIZER_LINUX
TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags,
@@ -2252,7 +2274,7 @@ struct dl_iterate_phdr_data {
};
static bool IsAppNotRodata(uptr addr) {
- return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata;
+ return IsAppMem(addr) && *MemToShadow(addr) != Shadow::kRodata;
}
static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
@@ -2358,9 +2380,18 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
#define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file) \
if (file) { \
int fd = fileno_unlocked(file); \
- if (fd >= 0) FdClose(thr, pc, fd); \
+ FdClose(thr, pc, fd); \
}
+#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+ ({ \
+ CheckNoDeepBind(filename, flag); \
+ ThreadIgnoreBegin(thr, 0); \
+ void *res = REAL(dlopen)(filename, flag); \
+ ThreadIgnoreEnd(thr); \
+ res; \
+ })
+
#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \
libignore()->OnLibraryLoaded(filename)
@@ -2391,8 +2422,11 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
-#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
- __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
+ if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+ COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name); \
+ else \
+ __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
#define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)
@@ -2553,7 +2587,7 @@ static USED void syscall_release(uptr pc, uptr addr) {
}
static void syscall_fd_close(uptr pc, int fd) {
- TSAN_SYSCALL();
+ auto *thr = cur_thread();
FdClose(thr, pc, fd);
}
@@ -2688,6 +2722,26 @@ TSAN_INTERCEPTOR(void, thr_exit, tid_t *state) {
#define TSAN_MAYBE_INTERCEPT_THR_EXIT
#endif
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_init, void *c, void *a)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_destroy, void *c)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_signal, void *c)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_broadcast, void *c)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_wait, void *c, void *m)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_init, void *m, void *a)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_destroy, void *m)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_lock, void *m)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_trylock, void *m)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_unlock, void *m)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_init, void *l, void *a)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_destroy, void *l)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_rdlock, void *l)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_tryrdlock, void *l)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_wrlock, void *l)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_trywrlock, void *l)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_unlock, void *l)
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, once, void *o, void (*i)())
+TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, sigmask, int f, void *n, void *o)
+
TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_init, void *c, void *a)
TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_signal, void *c)
TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_broadcast, void *c)
@@ -2916,6 +2970,26 @@ void InitializeInterceptors() {
}
#endif
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_init);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_destroy);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_signal);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_broadcast);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_wait);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_init);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_destroy);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_lock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_trylock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_unlock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_init);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_destroy);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_rdlock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_tryrdlock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_wrlock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_trywrlock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_unlock);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(once);
+ TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(sigmask);
+
TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_init);
TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_signal);
TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_broadcast);
diff --git a/libsanitizer/tsan/tsan_interface.cpp b/libsanitizer/tsan/tsan_interface.cpp
index 0487151..e6c4bf2 100644
--- a/libsanitizer/tsan/tsan_interface.cpp
+++ b/libsanitizer/tsan/tsan_interface.cpp
@@ -26,20 +26,6 @@ void __tsan_flush_memory() {
FlushShadowMemory();
}
-void __tsan_read16(void *addr) {
- uptr pc = CALLERPC;
- ThreadState *thr = cur_thread();
- MemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
- MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
-}
-
-void __tsan_write16(void *addr) {
- uptr pc = CALLERPC;
- ThreadState *thr = cur_thread();
- MemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
- MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
-}
-
void __tsan_read16_pc(void *addr, void *pc) {
uptr pc_no_pac = STRIP_PAC_PC(pc);
ThreadState *thr = cur_thread();
diff --git a/libsanitizer/tsan/tsan_interface.inc b/libsanitizer/tsan/tsan_interface.inc
index 0031800..b0a424f 100644
--- a/libsanitizer/tsan/tsan_interface.inc
+++ b/libsanitizer/tsan/tsan_interface.inc
@@ -34,6 +34,10 @@ void __tsan_read8(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
}
+void __tsan_read16(void *addr) {
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+}
+
void __tsan_write1(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
}
@@ -50,6 +54,10 @@ void __tsan_write8(void *addr) {
MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
}
+void __tsan_write16(void *addr) {
+ MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+}
+
void __tsan_read1_pc(void *addr, void *pc) {
MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
}
diff --git a/libsanitizer/tsan/tsan_interface_atomic.cpp b/libsanitizer/tsan/tsan_interface_atomic.cpp
index 24ba3bb..f794a2f 100644
--- a/libsanitizer/tsan/tsan_interface_atomic.cpp
+++ b/libsanitizer/tsan/tsan_interface_atomic.cpp
@@ -235,8 +235,9 @@ static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
T v = NoTsanAtomicLoad(a, mo);
SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a);
if (s) {
- ReadLock l(&s->mtx);
- AcquireImpl(thr, pc, &s->clock);
+ SlotLocker locker(thr);
+ ReadLock lock(&s->mtx);
+ thr->clock.Acquire(s->clock);
// Re-read under sync mutex because we need a consistent snapshot
// of the value and the clock we acquire.
v = NoTsanAtomicLoad(a, mo);
@@ -270,14 +271,14 @@ static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
NoTsanAtomicStore(a, v, mo);
return;
}
- __sync_synchronize();
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- ReleaseStoreImpl(thr, pc, &s->clock);
- NoTsanAtomicStore(a, v, mo);
+ SlotLocker locker(thr);
+ {
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ Lock lock(&s->mtx);
+ thr->clock.ReleaseStore(&s->clock);
+ NoTsanAtomicStore(a, v, mo);
+ }
+ IncrementEpoch(thr);
}
template <typename T, T (*F)(volatile T *v, T op)>
@@ -285,18 +286,21 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
if (LIKELY(mo == mo_relaxed))
return F(a, v);
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- if (IsAcqRelOrder(mo))
- AcquireReleaseImpl(thr, pc, &s->clock);
- else if (IsReleaseOrder(mo))
- ReleaseImpl(thr, pc, &s->clock);
- else if (IsAcquireOrder(mo))
- AcquireImpl(thr, pc, &s->clock);
- return F(a, v);
+ SlotLocker locker(thr);
+ {
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ RWLock lock(&s->mtx, IsReleaseOrder(mo));
+ if (IsAcqRelOrder(mo))
+ thr->clock.ReleaseAcquire(&s->clock);
+ else if (IsReleaseOrder(mo))
+ thr->clock.Release(&s->clock);
+ else if (IsAcquireOrder(mo))
+ thr->clock.Acquire(s->clock);
+ v = F(a, v);
+ }
+ if (IsReleaseOrder(mo))
+ IncrementEpoch(thr);
+ return v;
}
template<typename T>
@@ -416,27 +420,28 @@ static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v,
*c = pr;
return false;
}
-
+ SlotLocker locker(thr);
bool release = IsReleaseOrder(mo);
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- RWLock l(&s->mtx, release);
- T cc = *c;
- T pr = func_cas(a, cc, v);
- bool success = pr == cc;
- if (!success) {
- *c = pr;
- mo = fmo;
+ bool success;
+ {
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ RWLock lock(&s->mtx, release);
+ T cc = *c;
+ T pr = func_cas(a, cc, v);
+ success = pr == cc;
+ if (!success) {
+ *c = pr;
+ mo = fmo;
+ }
+ if (success && IsAcqRelOrder(mo))
+ thr->clock.ReleaseAcquire(&s->clock);
+ else if (success && IsReleaseOrder(mo))
+ thr->clock.Release(&s->clock);
+ else if (IsAcquireOrder(mo))
+ thr->clock.Acquire(s->clock);
}
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
-
- if (success && IsAcqRelOrder(mo))
- AcquireReleaseImpl(thr, pc, &s->clock);
- else if (success && IsReleaseOrder(mo))
- ReleaseImpl(thr, pc, &s->clock);
- else if (IsAcquireOrder(mo))
- AcquireImpl(thr, pc, &s->clock);
+ if (success && release)
+ IncrementEpoch(thr);
return success;
}
diff --git a/libsanitizer/tsan/tsan_interface_java.cpp b/libsanitizer/tsan/tsan_interface_java.cpp
index c090c1f..7c15a16 100644
--- a/libsanitizer/tsan/tsan_interface_java.cpp
+++ b/libsanitizer/tsan/tsan_interface_java.cpp
@@ -106,7 +106,7 @@ void __tsan_java_free(jptr ptr, jptr size) {
DCHECK_GE(ptr, jctx->heap_begin);
DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
- ctx->metamap.FreeRange(thr->proc(), ptr, size);
+ ctx->metamap.FreeRange(thr->proc(), ptr, size, false);
}
void __tsan_java_move(jptr src, jptr dst, jptr size) {
@@ -133,7 +133,7 @@ void __tsan_java_move(jptr src, jptr dst, jptr size) {
// support that anymore as it contains addresses of accesses.
RawShadow *d = MemToShadow(dst);
RawShadow *dend = MemToShadow(dst + size);
- internal_memset(d, 0, (dend - d) * sizeof(*d));
+ ShadowSet(d, dend, Shadow::kEmpty);
}
jptr __tsan_java_find(jptr *from_ptr, jptr to) {
diff --git a/libsanitizer/tsan/tsan_mman.cpp b/libsanitizer/tsan/tsan_mman.cpp
index f1b6768..0937e52 100644
--- a/libsanitizer/tsan/tsan_mman.cpp
+++ b/libsanitizer/tsan/tsan_mman.cpp
@@ -20,18 +20,6 @@
#include "tsan_report.h"
#include "tsan_flags.h"
-// May be overriden by front-end.
-SANITIZER_WEAK_DEFAULT_IMPL
-void __sanitizer_malloc_hook(void *ptr, uptr size) {
- (void)ptr;
- (void)size;
-}
-
-SANITIZER_WEAK_DEFAULT_IMPL
-void __sanitizer_free_hook(void *ptr) {
- (void)ptr;
-}
-
namespace __tsan {
struct MapUnmapCallback {
@@ -69,8 +57,17 @@ Allocator *allocator() {
struct GlobalProc {
Mutex mtx;
Processor *proc;
-
- GlobalProc() : mtx(MutexTypeGlobalProc), proc(ProcCreate()) {}
+ // This mutex represents the internal allocator combined for
+ // the purposes of deadlock detection. The internal allocator
+ // uses multiple mutexes, moreover they are locked only occasionally
+ // and they are spin mutexes which don't support deadlock detection.
+ // So we use this fake mutex to serve as a substitute for these mutexes.
+ CheckedMutex internal_alloc_mtx;
+
+ GlobalProc()
+ : mtx(MutexTypeGlobalProc),
+ proc(ProcCreate()),
+ internal_alloc_mtx(MutexTypeInternalAlloc) {}
};
static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64);
@@ -78,6 +75,11 @@ GlobalProc *global_proc() {
return reinterpret_cast<GlobalProc*>(&global_proc_placeholder);
}
+static void InternalAllocAccess() {
+ global_proc()->internal_alloc_mtx.Lock();
+ global_proc()->internal_alloc_mtx.Unlock();
+}
+
ScopedGlobalProcessor::ScopedGlobalProcessor() {
GlobalProc *gp = global_proc();
ThreadState *thr = cur_thread();
@@ -110,6 +112,24 @@ ScopedGlobalProcessor::~ScopedGlobalProcessor() {
gp->mtx.Unlock();
}
+void AllocatorLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ global_proc()->internal_alloc_mtx.Lock();
+ InternalAllocatorLock();
+}
+
+void AllocatorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ InternalAllocatorUnlock();
+ global_proc()->internal_alloc_mtx.Unlock();
+}
+
+void GlobalProcessorLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ global_proc()->mtx.Lock();
+}
+
+void GlobalProcessorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ global_proc()->mtx.Unlock();
+}
+
static constexpr uptr kMaxAllowedMallocSize = 1ull << 40;
static uptr max_user_defined_malloc_size;
@@ -166,6 +186,12 @@ void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz, uptr align,
GET_STACK_TRACE_FATAL(thr, pc);
ReportAllocationSizeTooBig(sz, malloc_limit, &stack);
}
+ if (UNLIKELY(IsRssLimitExceeded())) {
+ if (AllocatorMayReturnNull())
+ return nullptr;
+ GET_STACK_TRACE_FATAL(thr, pc);
+ ReportRssLimitExceeded(&stack);
+ }
void *p = allocator()->Allocate(&thr->proc()->alloc_cache, sz, align);
if (UNLIKELY(!p)) {
SetAllocatorOutOfMemory();
@@ -219,8 +245,17 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) {
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p);
+ // Note: this can run before thread initialization/after finalization.
+ // As a result this is not necessarily synchronized with DoReset,
+ // which iterates over and resets all sync objects,
+ // but it is fine to create new MBlocks in this context.
ctx->metamap.AllocBlock(thr, pc, p, sz);
- if (write && thr->ignore_reads_and_writes == 0)
+ // If this runs before thread initialization/after finalization
+ // and we don't have trace initialized, we can't imitate writes.
+ // In such case just reset the shadow range, it is fine since
+ // it affects only a small fraction of special objects.
+ if (write && thr->ignore_reads_and_writes == 0 &&
+ atomic_load_relaxed(&thr->trace_pos))
MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
else
MemoryResetRange(thr, pc, (uptr)p, sz);
@@ -228,7 +263,14 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
CHECK_NE(p, (void*)0);
- uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
+ if (!thr->slot) {
+ // Very early/late in thread lifetime, or during fork.
+ UNUSED uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, false);
+ DPrintf("#%d: free(0x%zx, %zu) (no slot)\n", thr->tid, p, sz);
+ return;
+ }
+ SlotLocker locker(thr);
+ uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, true);
DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz);
if (write && thr->ignore_reads_and_writes == 0)
MemoryRangeFreed(thr, pc, (uptr)p, sz);
@@ -310,7 +352,7 @@ void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) {
}
uptr user_alloc_usable_size(const void *p) {
- if (p == 0)
+ if (p == 0 || !IsAppMem((uptr)p))
return 0;
MBlock *b = ctx->metamap.GetBlock((uptr)p);
if (!b)
@@ -324,7 +366,6 @@ void invoke_malloc_hook(void *ptr, uptr size) {
ThreadState *thr = cur_thread();
if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
return;
- __sanitizer_malloc_hook(ptr, size);
RunMallocHooks(ptr, size);
}
@@ -332,7 +373,6 @@ void invoke_free_hook(void *ptr) {
ThreadState *thr = cur_thread();
if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
return;
- __sanitizer_free_hook(ptr);
RunFreeHooks(ptr);
}
@@ -342,6 +382,7 @@ void *Alloc(uptr sz) {
thr->nomalloc = 0; // CHECK calls internal_malloc().
CHECK(0);
}
+ InternalAllocAccess();
return InternalAlloc(sz, &thr->proc()->internal_alloc_cache);
}
@@ -351,6 +392,7 @@ void FreeImpl(void *p) {
thr->nomalloc = 0; // CHECK calls internal_malloc().
CHECK(0);
}
+ InternalAllocAccess();
InternalFree(p, &thr->proc()->internal_alloc_cache);
}
@@ -393,8 +435,6 @@ uptr __sanitizer_get_allocated_size(const void *p) {
void __tsan_on_thread_idle() {
ThreadState *thr = cur_thread();
- thr->clock.ResetCached(&thr->proc()->clock_cache);
- thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
allocator()->SwallowCache(&thr->proc()->alloc_cache);
internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache);
ctx->metamap.OnProcIdle(thr->proc());
diff --git a/libsanitizer/tsan/tsan_mman.h b/libsanitizer/tsan/tsan_mman.h
index efea5e5..2095f28 100644
--- a/libsanitizer/tsan/tsan_mman.h
+++ b/libsanitizer/tsan/tsan_mman.h
@@ -24,6 +24,10 @@ void ReplaceSystemMalloc();
void AllocatorProcStart(Processor *proc);
void AllocatorProcFinish(Processor *proc);
void AllocatorPrintStats();
+void AllocatorLock();
+void AllocatorUnlock();
+void GlobalProcessorLock();
+void GlobalProcessorUnlock();
// For user allocations.
void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz,
diff --git a/libsanitizer/tsan/tsan_mutexset.cpp b/libsanitizer/tsan/tsan_mutexset.cpp
index 7351796..3a75b80a 100644
--- a/libsanitizer/tsan/tsan_mutexset.cpp
+++ b/libsanitizer/tsan/tsan_mutexset.cpp
@@ -19,57 +19,7 @@ namespace __tsan {
MutexSet::MutexSet() {
}
-void MutexSet::Add(u64 id, bool write, u64 epoch) {
- // Look up existing mutex with the same id.
- for (uptr i = 0; i < size_; i++) {
- if (descs_[i].id == id) {
- descs_[i].count++;
- descs_[i].epoch = epoch;
- return;
- }
- }
- // On overflow, find the oldest mutex and drop it.
- if (size_ == kMaxSize) {
- u64 minepoch = (u64)-1;
- u64 mini = (u64)-1;
- for (uptr i = 0; i < size_; i++) {
- if (descs_[i].epoch < minepoch) {
- minepoch = descs_[i].epoch;
- mini = i;
- }
- }
- RemovePos(mini);
- CHECK_EQ(size_, kMaxSize - 1);
- }
- // Add new mutex descriptor.
- descs_[size_].addr = 0;
- descs_[size_].stack_id = kInvalidStackID;
- descs_[size_].id = id;
- descs_[size_].write = write;
- descs_[size_].epoch = epoch;
- descs_[size_].seq = seq_++;
- descs_[size_].count = 1;
- size_++;
-}
-
-void MutexSet::Del(u64 id, bool write) {
- for (uptr i = 0; i < size_; i++) {
- if (descs_[i].id == id) {
- if (--descs_[i].count == 0)
- RemovePos(i);
- return;
- }
- }
-}
-
-void MutexSet::Remove(u64 id) {
- for (uptr i = 0; i < size_; i++) {
- if (descs_[i].id == id) {
- RemovePos(i);
- return;
- }
- }
-}
+void MutexSet::Reset() { internal_memset(this, 0, sizeof(*this)); }
void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
// Look up existing mutex with the same id.
@@ -93,9 +43,7 @@ void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
// Add new mutex descriptor.
descs_[size_].addr = addr;
descs_[size_].stack_id = stack_id;
- descs_[size_].id = 0;
descs_[size_].write = write;
- descs_[size_].epoch = 0;
descs_[size_].seq = seq_++;
descs_[size_].count = 1;
size_++;
diff --git a/libsanitizer/tsan/tsan_mutexset.h b/libsanitizer/tsan/tsan_mutexset.h
index 93776a6..aabd361 100644
--- a/libsanitizer/tsan/tsan_mutexset.h
+++ b/libsanitizer/tsan/tsan_mutexset.h
@@ -25,8 +25,6 @@ class MutexSet {
struct Desc {
uptr addr;
StackID stack_id;
- u64 id;
- u64 epoch;
u32 seq;
u32 count;
bool write;
@@ -40,10 +38,7 @@ class MutexSet {
};
MutexSet();
- // The 'id' is obtained from SyncVar::GetId().
- void Add(u64 id, bool write, u64 epoch);
- void Del(u64 id, bool write);
- void Remove(u64 id); // Removes the mutex completely (if it's destroyed).
+ void Reset();
void AddAddr(uptr addr, StackID stack_id, bool write);
void DelAddr(uptr addr, bool destroy = false);
uptr Size() const;
@@ -82,9 +77,7 @@ class DynamicMutexSet {
// in different goroutine).
#if SANITIZER_GO
MutexSet::MutexSet() {}
-void MutexSet::Add(u64 id, bool write, u64 epoch) {}
-void MutexSet::Del(u64 id, bool write) {}
-void MutexSet::Remove(u64 id) {}
+void MutexSet::Reset() {}
void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {}
void MutexSet::DelAddr(uptr addr, bool destroy) {}
uptr MutexSet::Size() const { return 0; }
diff --git a/libsanitizer/tsan/tsan_platform.h b/libsanitizer/tsan/tsan_platform.h
index 7ff0aca..233bf0a 100644
--- a/libsanitizer/tsan/tsan_platform.h
+++ b/libsanitizer/tsan/tsan_platform.h
@@ -18,8 +18,8 @@
# error "Only 64-bit is supported"
#endif
+#include "sanitizer_common/sanitizer_common.h"
#include "tsan_defs.h"
-#include "tsan_trace.h"
namespace __tsan {
@@ -40,14 +40,12 @@ enum {
C/C++ on linux/x86_64 and freebsd/x86_64
0000 0000 1000 - 0080 0000 0000: main binary and/or MAP_32BIT mappings (512GB)
0040 0000 0000 - 0100 0000 0000: -
-0100 0000 0000 - 2000 0000 0000: shadow
-2000 0000 0000 - 3000 0000 0000: -
+0100 0000 0000 - 1000 0000 0000: shadow
+1000 0000 0000 - 3000 0000 0000: -
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
4000 0000 0000 - 5500 0000 0000: -
5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels
-5680 0000 0000 - 6000 0000 0000: -
-6000 0000 0000 - 6200 0000 0000: traces
-6200 0000 0000 - 7d00 0000 0000: -
+5680 0000 0000 - 7d00 0000 0000: -
7b00 0000 0000 - 7c00 0000 0000: heap
7c00 0000 0000 - 7e80 0000 0000: -
7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
@@ -67,10 +65,8 @@ C/C++ on netbsd/amd64 can reuse the same mapping:
struct Mapping48AddressSpace {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x340000000000ull;
- static const uptr kTraceMemBeg = 0x600000000000ull;
- static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
- static const uptr kShadowEnd = 0x200000000000ull;
+ static const uptr kShadowEnd = 0x100000000000ull;
static const uptr kHeapMemBeg = 0x7b0000000000ull;
static const uptr kHeapMemEnd = 0x7c0000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -89,14 +85,13 @@ struct Mapping48AddressSpace {
C/C++ on linux/mips64 (40-bit VMA)
0000 0000 00 - 0100 0000 00: - (4 GB)
0100 0000 00 - 0200 0000 00: main binary (4 GB)
-0200 0000 00 - 2000 0000 00: - (120 GB)
-2000 0000 00 - 4000 0000 00: shadow (128 GB)
+0200 0000 00 - 1200 0000 00: - (64 GB)
+1200 0000 00 - 2200 0000 00: shadow (64 GB)
+2200 0000 00 - 4000 0000 00: - (120 GB)
4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects) (64 GB)
5000 0000 00 - aa00 0000 00: - (360 GB)
aa00 0000 00 - ab00 0000 00: main binary (PIE) (4 GB)
-ab00 0000 00 - b000 0000 00: - (20 GB)
-b000 0000 00 - b200 0000 00: traces (8 GB)
-b200 0000 00 - fe00 0000 00: - (304 GB)
+ab00 0000 00 - fe00 0000 00: - (332 GB)
fe00 0000 00 - ff00 0000 00: heap (4 GB)
ff00 0000 00 - ff80 0000 00: - (2 GB)
ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB)
@@ -104,10 +99,8 @@ ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB)
struct MappingMips64_40 {
static const uptr kMetaShadowBeg = 0x4000000000ull;
static const uptr kMetaShadowEnd = 0x5000000000ull;
- static const uptr kTraceMemBeg = 0xb000000000ull;
- static const uptr kTraceMemEnd = 0xb200000000ull;
- static const uptr kShadowBeg = 0x2000000000ull;
- static const uptr kShadowEnd = 0x4000000000ull;
+ static const uptr kShadowBeg = 0x1200000000ull;
+ static const uptr kShadowEnd = 0x2200000000ull;
static const uptr kHeapMemBeg = 0xfe00000000ull;
static const uptr kHeapMemEnd = 0xff00000000ull;
static const uptr kLoAppMemBeg = 0x0100000000ull;
@@ -128,12 +121,10 @@ C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM)
0100 0000 00 - 0200 0000 00: main binary, modules, thread stacks (4 GB)
0200 0000 00 - 0300 0000 00: heap (4 GB)
0300 0000 00 - 0400 0000 00: - (4 GB)
-0400 0000 00 - 0c00 0000 00: shadow memory (32 GB)
-0c00 0000 00 - 0d00 0000 00: - (4 GB)
+0400 0000 00 - 0800 0000 00: shadow memory (16 GB)
+0800 0000 00 - 0d00 0000 00: - (20 GB)
0d00 0000 00 - 0e00 0000 00: metainfo (4 GB)
-0e00 0000 00 - 0f00 0000 00: - (4 GB)
-0f00 0000 00 - 0fc0 0000 00: traces (3 GB)
-0fc0 0000 00 - 1000 0000 00: -
+0e00 0000 00 - 1000 0000 00: -
*/
struct MappingAppleAarch64 {
static const uptr kLoAppMemBeg = 0x0100000000ull;
@@ -141,16 +132,14 @@ struct MappingAppleAarch64 {
static const uptr kHeapMemBeg = 0x0200000000ull;
static const uptr kHeapMemEnd = 0x0300000000ull;
static const uptr kShadowBeg = 0x0400000000ull;
- static const uptr kShadowEnd = 0x0c00000000ull;
+ static const uptr kShadowEnd = 0x0800000000ull;
static const uptr kMetaShadowBeg = 0x0d00000000ull;
static const uptr kMetaShadowEnd = 0x0e00000000ull;
- static const uptr kTraceMemBeg = 0x0f00000000ull;
- static const uptr kTraceMemEnd = 0x0fc0000000ull;
static const uptr kHiAppMemBeg = 0x0fc0000000ull;
static const uptr kHiAppMemEnd = 0x0fc0000000ull;
static const uptr kShadowMsk = 0x0ull;
static const uptr kShadowXor = 0x0ull;
- static const uptr kShadowAdd = 0x0ull;
+ static const uptr kShadowAdd = 0x0200000000ull;
static const uptr kVdsoBeg = 0x7000000000000000ull;
static const uptr kMidAppMemBeg = 0;
static const uptr kMidAppMemEnd = 0;
@@ -159,29 +148,25 @@ struct MappingAppleAarch64 {
/*
C/C++ on linux/aarch64 (39-bit VMA)
0000 0010 00 - 0100 0000 00: main binary
-0100 0000 00 - 0800 0000 00: -
-0800 0000 00 - 2000 0000 00: shadow memory
+0100 0000 00 - 0400 0000 00: -
+0400 0000 00 - 1000 0000 00: shadow memory
2000 0000 00 - 3100 0000 00: -
3100 0000 00 - 3400 0000 00: metainfo
3400 0000 00 - 5500 0000 00: -
5500 0000 00 - 5600 0000 00: main binary (PIE)
-5600 0000 00 - 6000 0000 00: -
-6000 0000 00 - 6200 0000 00: traces
-6200 0000 00 - 7d00 0000 00: -
+5600 0000 00 - 7c00 0000 00: -
7c00 0000 00 - 7d00 0000 00: heap
7d00 0000 00 - 7fff ffff ff: modules and main thread stack
*/
struct MappingAarch64_39 {
static const uptr kLoAppMemBeg = 0x0000001000ull;
static const uptr kLoAppMemEnd = 0x0100000000ull;
- static const uptr kShadowBeg = 0x0800000000ull;
- static const uptr kShadowEnd = 0x2000000000ull;
+ static const uptr kShadowBeg = 0x0400000000ull;
+ static const uptr kShadowEnd = 0x1000000000ull;
static const uptr kMetaShadowBeg = 0x3100000000ull;
static const uptr kMetaShadowEnd = 0x3400000000ull;
static const uptr kMidAppMemBeg = 0x5500000000ull;
- static const uptr kMidAppMemEnd = 0x5600000000ull;
- static const uptr kTraceMemBeg = 0x6000000000ull;
- static const uptr kTraceMemEnd = 0x6200000000ull;
+ static const uptr kMidAppMemEnd = 0x5600000000ull;
static const uptr kHeapMemBeg = 0x7c00000000ull;
static const uptr kHeapMemEnd = 0x7d00000000ull;
static const uptr kHiAppMemBeg = 0x7e00000000ull;
@@ -195,15 +180,13 @@ struct MappingAarch64_39 {
/*
C/C++ on linux/aarch64 (42-bit VMA)
00000 0010 00 - 01000 0000 00: main binary
-01000 0000 00 - 10000 0000 00: -
-10000 0000 00 - 20000 0000 00: shadow memory
-20000 0000 00 - 26000 0000 00: -
+01000 0000 00 - 08000 0000 00: -
+08000 0000 00 - 10000 0000 00: shadow memory
+10000 0000 00 - 26000 0000 00: -
26000 0000 00 - 28000 0000 00: metainfo
28000 0000 00 - 2aa00 0000 00: -
2aa00 0000 00 - 2ab00 0000 00: main binary (PIE)
-2ab00 0000 00 - 36200 0000 00: -
-36200 0000 00 - 36240 0000 00: traces
-36240 0000 00 - 3e000 0000 00: -
+2ab00 0000 00 - 3e000 0000 00: -
3e000 0000 00 - 3f000 0000 00: heap
3f000 0000 00 - 3ffff ffff ff: modules and main thread stack
*/
@@ -211,14 +194,12 @@ struct MappingAarch64_42 {
static const uptr kBroken = kBrokenReverseMapping;
static const uptr kLoAppMemBeg = 0x00000001000ull;
static const uptr kLoAppMemEnd = 0x01000000000ull;
- static const uptr kShadowBeg = 0x10000000000ull;
- static const uptr kShadowEnd = 0x20000000000ull;
+ static const uptr kShadowBeg = 0x08000000000ull;
+ static const uptr kShadowEnd = 0x10000000000ull;
static const uptr kMetaShadowBeg = 0x26000000000ull;
static const uptr kMetaShadowEnd = 0x28000000000ull;
static const uptr kMidAppMemBeg = 0x2aa00000000ull;
- static const uptr kMidAppMemEnd = 0x2ab00000000ull;
- static const uptr kTraceMemBeg = 0x36200000000ull;
- static const uptr kTraceMemEnd = 0x36400000000ull;
+ static const uptr kMidAppMemEnd = 0x2ab00000000ull;
static const uptr kHeapMemBeg = 0x3e000000000ull;
static const uptr kHeapMemEnd = 0x3f000000000ull;
static const uptr kHiAppMemBeg = 0x3f000000000ull;
@@ -232,14 +213,12 @@ struct MappingAarch64_42 {
struct MappingAarch64_48 {
static const uptr kLoAppMemBeg = 0x0000000001000ull;
static const uptr kLoAppMemEnd = 0x0000200000000ull;
- static const uptr kShadowBeg = 0x0002000000000ull;
- static const uptr kShadowEnd = 0x0004000000000ull;
+ static const uptr kShadowBeg = 0x0001000000000ull;
+ static const uptr kShadowEnd = 0x0002000000000ull;
static const uptr kMetaShadowBeg = 0x0005000000000ull;
static const uptr kMetaShadowEnd = 0x0006000000000ull;
static const uptr kMidAppMemBeg = 0x0aaaa00000000ull;
- static const uptr kMidAppMemEnd = 0x0aaaf00000000ull;
- static const uptr kTraceMemBeg = 0x0f06000000000ull;
- static const uptr kTraceMemEnd = 0x0f06200000000ull;
+ static const uptr kMidAppMemEnd = 0x0aaaf00000000ull;
static const uptr kHeapMemBeg = 0x0ffff00000000ull;
static const uptr kHeapMemEnd = 0x0ffff00000000ull;
static const uptr kHiAppMemBeg = 0x0ffff00000000ull;
@@ -257,9 +236,7 @@ C/C++ on linux/powerpc64 (44-bit VMA)
0001 0000 0000 - 0b00 0000 0000: shadow
0b00 0000 0000 - 0b00 0000 0000: -
0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects)
-0d00 0000 0000 - 0d00 0000 0000: -
-0d00 0000 0000 - 0f00 0000 0000: traces
-0f00 0000 0000 - 0f00 0000 0000: -
+0d00 0000 0000 - 0f00 0000 0000: -
0f00 0000 0000 - 0f50 0000 0000: heap
0f50 0000 0000 - 0f60 0000 0000: -
0f60 0000 0000 - 1000 0000 0000: modules and main thread stack
@@ -269,8 +246,6 @@ struct MappingPPC64_44 {
kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity;
static const uptr kMetaShadowBeg = 0x0b0000000000ull;
static const uptr kMetaShadowEnd = 0x0d0000000000ull;
- static const uptr kTraceMemBeg = 0x0d0000000000ull;
- static const uptr kTraceMemEnd = 0x0f0000000000ull;
static const uptr kShadowBeg = 0x000100000000ull;
static const uptr kShadowEnd = 0x0b0000000000ull;
static const uptr kLoAppMemBeg = 0x000000000100ull;
@@ -291,23 +266,19 @@ struct MappingPPC64_44 {
C/C++ on linux/powerpc64 (46-bit VMA)
0000 0000 1000 - 0100 0000 0000: main binary
0100 0000 0000 - 0200 0000 0000: -
-0100 0000 0000 - 1000 0000 0000: shadow
-1000 0000 0000 - 1000 0000 0000: -
-1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
-2000 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 2200 0000 0000: traces
-2200 0000 0000 - 3d00 0000 0000: -
+0100 0000 0000 - 0800 0000 0000: shadow
+0800 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 1200 0000 0000: metainfo (memory blocks and sync objects)
+1200 0000 0000 - 3d00 0000 0000: -
3d00 0000 0000 - 3e00 0000 0000: heap
3e00 0000 0000 - 3e80 0000 0000: -
3e80 0000 0000 - 4000 0000 0000: modules and main thread stack
*/
struct MappingPPC64_46 {
static const uptr kMetaShadowBeg = 0x100000000000ull;
- static const uptr kMetaShadowEnd = 0x200000000000ull;
- static const uptr kTraceMemBeg = 0x200000000000ull;
- static const uptr kTraceMemEnd = 0x220000000000ull;
+ static const uptr kMetaShadowEnd = 0x120000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
- static const uptr kShadowEnd = 0x100000000000ull;
+ static const uptr kShadowEnd = 0x080000000000ull;
static const uptr kHeapMemBeg = 0x3d0000000000ull;
static const uptr kHeapMemEnd = 0x3e0000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -326,23 +297,19 @@ struct MappingPPC64_46 {
C/C++ on linux/powerpc64 (47-bit VMA)
0000 0000 1000 - 0100 0000 0000: main binary
0100 0000 0000 - 0200 0000 0000: -
-0100 0000 0000 - 1000 0000 0000: shadow
-1000 0000 0000 - 1000 0000 0000: -
-1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
-2000 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 2200 0000 0000: traces
-2200 0000 0000 - 7d00 0000 0000: -
+0100 0000 0000 - 0800 0000 0000: shadow
+0800 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 1200 0000 0000: metainfo (memory blocks and sync objects)
+1200 0000 0000 - 7d00 0000 0000: -
7d00 0000 0000 - 7e00 0000 0000: heap
7e00 0000 0000 - 7e80 0000 0000: -
7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
*/
struct MappingPPC64_47 {
static const uptr kMetaShadowBeg = 0x100000000000ull;
- static const uptr kMetaShadowEnd = 0x200000000000ull;
- static const uptr kTraceMemBeg = 0x200000000000ull;
- static const uptr kTraceMemEnd = 0x220000000000ull;
+ static const uptr kMetaShadowEnd = 0x120000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
- static const uptr kShadowEnd = 0x100000000000ull;
+ static const uptr kShadowEnd = 0x080000000000ull;
static const uptr kHeapMemBeg = 0x7d0000000000ull;
static const uptr kHeapMemEnd = 0x7e0000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -362,22 +329,18 @@ C/C++ on linux/s390x
While the kernel provides a 64-bit address space, we have to restrict ourselves
to 48 bits due to how e.g. SyncVar::GetId() works.
0000 0000 1000 - 0e00 0000 0000: binary, modules, stacks - 14 TiB
-0e00 0000 0000 - 4000 0000 0000: -
-4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
-8000 0000 0000 - 9000 0000 0000: -
+0e00 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 4000 0000 0000: shadow - 32TiB (2 * app)
+4000 0000 0000 - 9000 0000 0000: -
9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app)
-9800 0000 0000 - a000 0000 0000: -
-a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
-b000 0000 0000 - be00 0000 0000: -
+9800 0000 0000 - be00 0000 0000: -
be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator)
*/
struct MappingS390x {
static const uptr kMetaShadowBeg = 0x900000000000ull;
static const uptr kMetaShadowEnd = 0x980000000000ull;
- static const uptr kTraceMemBeg = 0xa00000000000ull;
- static const uptr kTraceMemEnd = 0xb00000000000ull;
- static const uptr kShadowBeg = 0x400000000000ull;
- static const uptr kShadowEnd = 0x800000000000ull;
+ static const uptr kShadowBeg = 0x200000000000ull;
+ static const uptr kShadowEnd = 0x400000000000ull;
static const uptr kHeapMemBeg = 0xbe0000000000ull;
static const uptr kHeapMemEnd = 0xc00000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -397,21 +360,17 @@ struct MappingS390x {
0000 1000 0000 - 00c0 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 2380 0000 0000: shadow
-2380 0000 0000 - 3000 0000 0000: -
+2000 0000 0000 - 21c0 0000 0000: shadow
+21c0 0000 0000 - 3000 0000 0000: -
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 6000 0000 0000: -
-6000 0000 0000 - 6200 0000 0000: traces
-6200 0000 0000 - 8000 0000 0000: -
+4000 0000 0000 - 8000 0000 0000: -
*/
struct MappingGo48 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
- static const uptr kTraceMemBeg = 0x600000000000ull;
- static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
- static const uptr kShadowEnd = 0x238000000000ull;
+ static const uptr kShadowEnd = 0x21c000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x00e000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -431,8 +390,8 @@ struct MappingGo48 {
0000 1000 0000 - 00f8 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 0100 0000 0000: -
-0100 0000 0000 - 0500 0000 0000: shadow
-0500 0000 0000 - 0700 0000 0000: traces
+0100 0000 0000 - 0300 0000 0000: shadow
+0300 0000 0000 - 0700 0000 0000: -
0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects)
07d0 0000 0000 - 8000 0000 0000: -
*/
@@ -440,10 +399,8 @@ struct MappingGo48 {
struct MappingGoWindows {
static const uptr kMetaShadowBeg = 0x070000000000ull;
static const uptr kMetaShadowEnd = 0x077000000000ull;
- static const uptr kTraceMemBeg = 0x050000000000ull;
- static const uptr kTraceMemEnd = 0x070000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
- static const uptr kShadowEnd = 0x050000000000ull;
+ static const uptr kShadowEnd = 0x030000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x00e000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -463,21 +420,17 @@ struct MappingGoWindows {
0000 1000 0000 - 00c0 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 2380 0000 0000: shadow
-2380 0000 0000 - 2400 0000 0000: -
-2400 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects)
-3400 0000 0000 - 3600 0000 0000: -
-3600 0000 0000 - 3800 0000 0000: traces
-3800 0000 0000 - 4000 0000 0000: -
+2000 0000 0000 - 21c0 0000 0000: shadow
+21c0 0000 0000 - 2400 0000 0000: -
+2400 0000 0000 - 2470 0000 0000: metainfo (memory blocks and sync objects)
+2470 0000 0000 - 4000 0000 0000: -
*/
struct MappingGoPPC64_46 {
static const uptr kMetaShadowBeg = 0x240000000000ull;
- static const uptr kMetaShadowEnd = 0x340000000000ull;
- static const uptr kTraceMemBeg = 0x360000000000ull;
- static const uptr kTraceMemEnd = 0x380000000000ull;
+ static const uptr kMetaShadowEnd = 0x247000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
- static const uptr kShadowEnd = 0x238000000000ull;
+ static const uptr kShadowEnd = 0x21c000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x00e000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -497,21 +450,17 @@ struct MappingGoPPC64_46 {
0000 1000 0000 - 00c0 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 3000 0000 0000: shadow
-3000 0000 0000 - 3000 0000 0000: -
-3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 6000 0000 0000: -
-6000 0000 0000 - 6200 0000 0000: traces
-6200 0000 0000 - 8000 0000 0000: -
+2000 0000 0000 - 2800 0000 0000: shadow
+2800 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects)
+3200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGoPPC64_47 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
- static const uptr kMetaShadowEnd = 0x400000000000ull;
- static const uptr kTraceMemBeg = 0x600000000000ull;
- static const uptr kTraceMemEnd = 0x620000000000ull;
+ static const uptr kMetaShadowEnd = 0x320000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
- static const uptr kShadowEnd = 0x300000000000ull;
+ static const uptr kShadowEnd = 0x280000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x00e000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -531,20 +480,16 @@ struct MappingGoPPC64_47 {
0000 1000 0000 - 00c0 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 3000 0000 0000: shadow
-3000 0000 0000 - 3000 0000 0000: -
-3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 6000 0000 0000: -
-6000 0000 0000 - 6200 0000 0000: traces
-6200 0000 0000 - 8000 0000 0000: -
+2000 0000 0000 - 2800 0000 0000: shadow
+2800 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects)
+3200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGoAarch64 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
- static const uptr kMetaShadowEnd = 0x400000000000ull;
- static const uptr kTraceMemBeg = 0x600000000000ull;
- static const uptr kTraceMemEnd = 0x620000000000ull;
+ static const uptr kMetaShadowEnd = 0x320000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
- static const uptr kShadowEnd = 0x300000000000ull;
+ static const uptr kShadowEnd = 0x280000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x00e000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -565,20 +510,16 @@ Go on linux/mips64 (47-bit VMA)
0000 1000 0000 - 00c0 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 3000 0000 0000: shadow
-3000 0000 0000 - 3000 0000 0000: -
-3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 6000 0000 0000: -
-6000 0000 0000 - 6200 0000 0000: traces
-6200 0000 0000 - 8000 0000 0000: -
+2000 0000 0000 - 2800 0000 0000: shadow
+2800 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects)
+3200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGoMips64_47 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
- static const uptr kMetaShadowEnd = 0x400000000000ull;
- static const uptr kTraceMemBeg = 0x600000000000ull;
- static const uptr kTraceMemEnd = 0x620000000000ull;
+ static const uptr kMetaShadowEnd = 0x320000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
- static const uptr kShadowEnd = 0x300000000000ull;
+ static const uptr kShadowEnd = 0x280000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x00e000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -597,19 +538,15 @@ struct MappingGoMips64_47 {
Go on linux/s390x
0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB
1000 0000 0000 - 4000 0000 0000: -
-4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
-8000 0000 0000 - 9000 0000 0000: -
+4000 0000 0000 - 6000 0000 0000: shadow - 64TiB (4 * app)
+6000 0000 0000 - 9000 0000 0000: -
9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app)
-9800 0000 0000 - a000 0000 0000: -
-a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
*/
struct MappingGoS390x {
static const uptr kMetaShadowBeg = 0x900000000000ull;
static const uptr kMetaShadowEnd = 0x980000000000ull;
- static const uptr kTraceMemBeg = 0xa00000000000ull;
- static const uptr kTraceMemEnd = 0xb00000000000ull;
static const uptr kShadowBeg = 0x400000000000ull;
- static const uptr kShadowEnd = 0x800000000000ull;
+ static const uptr kShadowEnd = 0x600000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
static const uptr kLoAppMemEnd = 0x100000000000ull;
static const uptr kMidAppMemBeg = 0;
@@ -648,11 +585,11 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) {
return Func::template Apply<MappingGo48>(arg);
# endif
#else // SANITIZER_GO
-# if defined(__x86_64__) || SANITIZER_IOSSIM || SANITIZER_MAC && !SANITIZER_IOS
- return Func::template Apply<Mapping48AddressSpace>(arg);
-# elif defined(__aarch64__) && defined(__APPLE__)
+# if SANITIZER_IOS && !SANITIZER_IOSSIM
return Func::template Apply<MappingAppleAarch64>(arg);
-# elif defined(__aarch64__) && !defined(__APPLE__)
+# elif defined(__x86_64__) || SANITIZER_MAC
+ return Func::template Apply<Mapping48AddressSpace>(arg);
+# elif defined(__aarch64__)
switch (vmaSize) {
case 39:
return Func::template Apply<MappingAarch64_39>(arg);
@@ -715,8 +652,6 @@ enum MappingType {
kShadowEnd,
kMetaShadowBeg,
kMetaShadowEnd,
- kTraceMemBeg,
- kTraceMemEnd,
kVdsoBeg,
};
@@ -750,10 +685,6 @@ struct MappingField {
return Mapping::kMetaShadowBeg;
case kMetaShadowEnd:
return Mapping::kMetaShadowEnd;
- case kTraceMemBeg:
- return Mapping::kTraceMemBeg;
- case kTraceMemEnd:
- return Mapping::kTraceMemEnd;
}
Die();
}
@@ -792,11 +723,6 @@ uptr MetaShadowBeg(void) { return SelectMapping<MappingField>(kMetaShadowBeg); }
ALWAYS_INLINE
uptr MetaShadowEnd(void) { return SelectMapping<MappingField>(kMetaShadowEnd); }
-ALWAYS_INLINE
-uptr TraceMemBeg(void) { return SelectMapping<MappingField>(kTraceMemBeg); }
-ALWAYS_INLINE
-uptr TraceMemEnd(void) { return SelectMapping<MappingField>(kTraceMemEnd); }
-
struct IsAppMemImpl {
template <typename Mapping>
static bool Apply(uptr mem) {
@@ -934,43 +860,10 @@ inline uptr RestoreAddr(uptr addr) {
return SelectMapping<RestoreAddrImpl>(addr);
}
-// The additional page is to catch shadow stack overflow as paging fault.
-// Windows wants 64K alignment for mmaps.
-const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace)
- + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1);
-
-struct GetThreadTraceImpl {
- template <typename Mapping>
- static uptr Apply(uptr tid) {
- uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize;
- DCHECK_LT(p, Mapping::kTraceMemEnd);
- return p;
- }
-};
-
-ALWAYS_INLINE
-uptr GetThreadTrace(int tid) { return SelectMapping<GetThreadTraceImpl>(tid); }
-
-struct GetThreadTraceHeaderImpl {
- template <typename Mapping>
- static uptr Apply(uptr tid) {
- uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize +
- kTraceSize * sizeof(Event);
- DCHECK_LT(p, Mapping::kTraceMemEnd);
- return p;
- }
-};
-
-ALWAYS_INLINE
-uptr GetThreadTraceHeader(int tid) {
- return SelectMapping<GetThreadTraceHeaderImpl>(tid);
-}
-
void InitializePlatform();
void InitializePlatformEarly();
void CheckAndProtect();
void InitializeShadowMemoryPlatform();
-void FlushShadowMemory();
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns);
int ExtractResolvFDs(void *state, int *fds, int nfd);
int ExtractRecvmsgFDs(void *msg, int *fds, int nfd);
diff --git a/libsanitizer/tsan/tsan_platform_linux.cpp b/libsanitizer/tsan/tsan_platform_linux.cpp
index 73ec148..17dbdff 100644
--- a/libsanitizer/tsan/tsan_platform_linux.cpp
+++ b/libsanitizer/tsan/tsan_platform_linux.cpp
@@ -94,7 +94,6 @@ enum {
MemMeta,
MemFile,
MemMmap,
- MemTrace,
MemHeap,
MemOther,
MemCount,
@@ -112,8 +111,6 @@ void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) {
mem[file ? MemFile : MemMmap] += rss;
else if (p >= HeapMemBeg() && p < HeapMemEnd())
mem[MemHeap] += rss;
- else if (p >= TraceMemBeg() && p < TraceMemEnd())
- mem[MemTrace] += rss;
else
mem[MemOther] += rss;
}
@@ -126,42 +123,33 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
StackDepotStats stacks = StackDepotGetStats();
uptr nthread, nlive;
ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
+ uptr trace_mem;
+ {
+ Lock l(&ctx->slot_mtx);
+ trace_mem = ctx->trace_part_total_allocated * sizeof(TracePart);
+ }
uptr internal_stats[AllocatorStatCount];
internal_allocator()->GetStats(internal_stats);
// All these are allocated from the common mmap region.
- mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated +
- internal_stats[AllocatorStatMapped];
+ mem[MemMmap] -= meta.mem_block + meta.sync_obj + trace_mem +
+ stacks.allocated + internal_stats[AllocatorStatMapped];
if (s64(mem[MemMmap]) < 0)
mem[MemMmap] = 0;
internal_snprintf(
buf, buf_size,
- "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
- " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
- " stacks=%zd[%zd] nthr=%zd/%zd\n",
- uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20,
- mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20,
- mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20,
+ "==%zu== %llus [%zu]: RSS %zd MB: shadow:%zd meta:%zd file:%zd"
+ " mmap:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
+ " trace:%zu stacks=%zd threads=%zu/%zu\n",
+ internal_getpid(), uptime_ns / (1000 * 1000 * 1000), ctx->global_epoch,
+ mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20,
+ mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemHeap] >> 20,
mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20,
- meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20,
- stacks.n_uniq_ids, nlive, nthread);
-}
-
-# if SANITIZER_LINUX
-void FlushShadowMemoryCallback(
- const SuspendedThreadsList &suspended_threads_list,
- void *argument) {
- ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd());
-}
-#endif
-
-void FlushShadowMemory() {
-#if SANITIZER_LINUX
- StopTheWorld(FlushShadowMemoryCallback, 0);
-#endif
+ meta.mem_block >> 20, meta.sync_obj >> 20, trace_mem >> 20,
+ stacks.allocated >> 20, nlive, nthread);
}
#if !SANITIZER_GO
-// Mark shadow for .rodata sections with the special kShadowRodata marker.
+// Mark shadow for .rodata sections with the special Shadow::kRodata marker.
// Accesses to .rodata can't race, so this saves time, memory and trace space.
static void MapRodata() {
// First create temp file.
@@ -182,13 +170,13 @@ static void MapRodata() {
return;
internal_unlink(name); // Unlink it now, so that we can reuse the buffer.
fd_t fd = openrv;
- // Fill the file with kShadowRodata.
+ // Fill the file with Shadow::kRodata.
const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow);
InternalMmapVector<RawShadow> marker(kMarkerSize);
// volatile to prevent insertion of memset
for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize;
p++)
- *p = kShadowRodata;
+ *p = Shadow::kRodata;
internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow));
// Map the file into memory.
uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE,
diff --git a/libsanitizer/tsan/tsan_platform_mac.cpp b/libsanitizer/tsan/tsan_platform_mac.cpp
index 3faa2d0..44b98d4 100644
--- a/libsanitizer/tsan/tsan_platform_mac.cpp
+++ b/libsanitizer/tsan/tsan_platform_mac.cpp
@@ -25,6 +25,7 @@
#include "tsan_rtl.h"
#include "tsan_flags.h"
+#include <limits.h>
#include <mach/mach.h>
#include <pthread.h>
#include <signal.h>
@@ -45,76 +46,86 @@
namespace __tsan {
#if !SANITIZER_GO
-static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) {
- atomic_uintptr_t *a = (atomic_uintptr_t *)dst;
- void *val = (void *)atomic_load_relaxed(a);
- atomic_signal_fence(memory_order_acquire); // Turns the previous load into
- // acquire wrt signals.
- if (UNLIKELY(val == nullptr)) {
- val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, -1, 0);
- CHECK(val);
- void *cmp = nullptr;
- if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val,
- memory_order_acq_rel)) {
- internal_munmap(val, size);
- val = cmp;
- }
- }
- return val;
+static char main_thread_state[sizeof(ThreadState)] ALIGNED(
+ SANITIZER_CACHE_LINE_SIZE);
+static ThreadState *dead_thread_state;
+static pthread_key_t thread_state_key;
+
+// We rely on the following documented, but Darwin-specific behavior to keep the
+// reference to the ThreadState object alive in TLS:
+// pthread_key_create man page:
+// If, after all the destructors have been called for all non-NULL values with
+// associated destructors, there are still some non-NULL values with
+// associated destructors, then the process is repeated. If, after at least
+// [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for
+// outstanding non-NULL values, there are still some non-NULL values with
+// associated destructors, the implementation stops calling destructors.
+static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations");
+static void ThreadStateDestructor(void *thr) {
+ int res = pthread_setspecific(thread_state_key, thr);
+ CHECK_EQ(res, 0);
}
-// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is
-// problematic, because there are several places where interceptors are called
-// when TLVs are not accessible (early process startup, thread cleanup, ...).
-// The following provides a "poor man's TLV" implementation, where we use the
-// shadow memory of the pointer returned by pthread_self() to store a pointer to
-// the ThreadState object. The main thread's ThreadState is stored separately
-// in a static variable, because we need to access it even before the
-// shadow memory is set up.
-static uptr main_thread_identity = 0;
-ALIGNED(64) static char main_thread_state[sizeof(ThreadState)];
-static ThreadState *main_thread_state_loc = (ThreadState *)main_thread_state;
-
-// We cannot use pthread_self() before libpthread has been initialized. Our
-// current heuristic for guarding this is checking `main_thread_identity` which
-// is only assigned in `__tsan::InitializePlatform`.
-static ThreadState **cur_thread_location() {
- if (main_thread_identity == 0)
- return &main_thread_state_loc;
- uptr thread_identity = (uptr)pthread_self();
- if (thread_identity == main_thread_identity)
- return &main_thread_state_loc;
- return (ThreadState **)MemToShadow(thread_identity);
+static void InitializeThreadStateStorage() {
+ int res;
+ CHECK_EQ(thread_state_key, 0);
+ res = pthread_key_create(&thread_state_key, ThreadStateDestructor);
+ CHECK_EQ(res, 0);
+ res = pthread_setspecific(thread_state_key, main_thread_state);
+ CHECK_EQ(res, 0);
+
+ auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+ dts->fast_state.SetIgnoreBit();
+ dts->ignore_interceptors = 1;
+ dts->is_dead = true;
+ const_cast<Tid &>(dts->tid) = kInvalidTid;
+ res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ); // immutable
+ CHECK_EQ(res, 0);
+ dead_thread_state = dts;
}
ThreadState *cur_thread() {
- return (ThreadState *)SignalSafeGetOrAllocate(
- (uptr *)cur_thread_location(), sizeof(ThreadState));
+ // Some interceptors get called before libpthread has been initialized and in
+ // these cases we must avoid calling any pthread APIs.
+ if (UNLIKELY(!thread_state_key)) {
+ return (ThreadState *)main_thread_state;
+ }
+
+ // We only reach this line after InitializeThreadStateStorage() ran, i.e,
+ // after TSan (and therefore libpthread) have been initialized.
+ ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+ if (UNLIKELY(!thr)) {
+ thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+ int res = pthread_setspecific(thread_state_key, thr);
+ CHECK_EQ(res, 0);
+ }
+ return thr;
}
void set_cur_thread(ThreadState *thr) {
- *cur_thread_location() = thr;
+ int res = pthread_setspecific(thread_state_key, thr);
+ CHECK_EQ(res, 0);
}
-// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call
-// munmap first and then clear `fake_tls`; if we receive a signal in between,
-// handler will try to access the unmapped ThreadState.
void cur_thread_finalize() {
- ThreadState **thr_state_loc = cur_thread_location();
- if (thr_state_loc == &main_thread_state_loc) {
+ ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+ CHECK(thr);
+ if (thr == (ThreadState *)main_thread_state) {
// Calling dispatch_main() or xpc_main() actually invokes pthread_exit to
// exit the main thread. Let's keep the main thread's ThreadState.
return;
}
- internal_munmap(*thr_state_loc, sizeof(ThreadState));
- *thr_state_loc = nullptr;
+ // Intercepted functions can still get called after cur_thread_finalize()
+ // (called from DestroyThreadState()), so put a fake thread state for "dead"
+ // threads. An alternative solution would be to release the ThreadState
+ // object from THREAD_DESTROY (which is delivered later and on the parent
+ // thread) instead of THREAD_TERMINATE.
+ int res = pthread_setspecific(thread_state_key, dead_thread_state);
+ CHECK_EQ(res, 0);
+ UnmapOrDie(thr, sizeof(ThreadState));
}
#endif
-void FlushShadowMemory() {
-}
-
static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
vm_address_t address = start;
vm_address_t end_address = end;
@@ -142,12 +153,10 @@ static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
uptr shadow_res, shadow_dirty;
uptr meta_res, meta_dirty;
- uptr trace_res, trace_dirty;
RegionMemUsage(ShadowBeg(), ShadowEnd(), &shadow_res, &shadow_dirty);
RegionMemUsage(MetaShadowBeg(), MetaShadowEnd(), &meta_res, &meta_dirty);
- RegionMemUsage(TraceMemBeg(), TraceMemEnd(), &trace_res, &trace_dirty);
-#if !SANITIZER_GO
+# if !SANITIZER_GO
uptr low_res, low_dirty;
uptr high_res, high_dirty;
uptr heap_res, heap_dirty;
@@ -166,7 +175,6 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
buf, buf_size,
"shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
"meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
- "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
# if !SANITIZER_GO
"low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
"high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
@@ -179,7 +187,6 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
"------------------------------\n",
ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024,
MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024,
- TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024,
# if !SANITIZER_GO
LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024,
HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024,
@@ -222,11 +229,10 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
ThreadStart(thr, tid, GetTid(), ThreadType::Worker);
}
} else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
- if (thread == pthread_self()) {
- ThreadState *thr = cur_thread();
- if (thr->tctx) {
- DestroyThreadState();
- }
+ CHECK_EQ(thread, pthread_self());
+ ThreadState *thr = cur_thread();
+ if (thr->tctx) {
+ DestroyThreadState();
}
}
@@ -253,8 +259,7 @@ void InitializePlatform() {
#if !SANITIZER_GO
CheckAndProtect();
- CHECK_EQ(main_thread_identity, 0);
- main_thread_identity = (uptr)pthread_self();
+ InitializeThreadStateStorage();
prev_pthread_introspection_hook =
pthread_introspection_hook_install(&my_pthread_introspection_hook);
@@ -286,24 +291,11 @@ uptr ExtractLongJmpSp(uptr *env) {
extern "C" void __tsan_tls_initialization() {}
void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
- // The pointer to the ThreadState object is stored in the shadow memory
- // of the tls.
- uptr tls_end = tls_addr + tls_size;
- uptr thread_identity = (uptr)pthread_self();
const uptr pc = StackTrace::GetNextInstructionPc(
reinterpret_cast<uptr>(__tsan_tls_initialization));
- if (thread_identity == main_thread_identity) {
- MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
- } else {
- uptr thr_state_start = thread_identity;
- uptr thr_state_end = thr_state_start + sizeof(uptr);
- CHECK_GE(thr_state_start, tls_addr);
- CHECK_LE(thr_state_start, tls_addr + tls_size);
- CHECK_GE(thr_state_end, tls_addr);
- CHECK_LE(thr_state_end, tls_addr + tls_size);
- MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr);
- MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end);
- }
+ // Unlike Linux, we only store a pointer to the ThreadState object in TLS;
+ // just mark the entire range as written to.
+ MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
}
#endif
diff --git a/libsanitizer/tsan/tsan_platform_posix.cpp b/libsanitizer/tsan/tsan_platform_posix.cpp
index 763ac44..71874aa 100644
--- a/libsanitizer/tsan/tsan_platform_posix.cpp
+++ b/libsanitizer/tsan/tsan_platform_posix.cpp
@@ -110,27 +110,23 @@ void CheckAndProtect() {
Die();
}
-# if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS
+# if SANITIZER_IOS && !SANITIZER_IOSSIM
ProtectRange(HeapMemEnd(), ShadowBeg());
ProtectRange(ShadowEnd(), MetaShadowBeg());
- ProtectRange(MetaShadowEnd(), TraceMemBeg());
-#else
+ ProtectRange(MetaShadowEnd(), HiAppMemBeg());
+# else
ProtectRange(LoAppMemEnd(), ShadowBeg());
ProtectRange(ShadowEnd(), MetaShadowBeg());
if (MidAppMemBeg()) {
ProtectRange(MetaShadowEnd(), MidAppMemBeg());
- ProtectRange(MidAppMemEnd(), TraceMemBeg());
+ ProtectRange(MidAppMemEnd(), HeapMemBeg());
} else {
- ProtectRange(MetaShadowEnd(), TraceMemBeg());
+ ProtectRange(MetaShadowEnd(), HeapMemBeg());
}
- // Memory for traces is mapped lazily in MapThreadTrace.
- // Protect the whole range for now, so that user does not map something here.
- ProtectRange(TraceMemBeg(), TraceMemEnd());
- ProtectRange(TraceMemEnd(), HeapMemBeg());
ProtectRange(HeapEnd(), HiAppMemBeg());
-#endif
+# endif
-#if defined(__s390x__)
+# if defined(__s390x__)
// Protect the rest of the address space.
const uptr user_addr_max_l4 = 0x0020000000000000ull;
const uptr user_addr_max_l5 = 0xfffffffffffff000ull;
diff --git a/libsanitizer/tsan/tsan_platform_windows.cpp b/libsanitizer/tsan/tsan_platform_windows.cpp
index fea8937..eb8f354 100644
--- a/libsanitizer/tsan/tsan_platform_windows.cpp
+++ b/libsanitizer/tsan/tsan_platform_windows.cpp
@@ -20,9 +20,6 @@
namespace __tsan {
-void FlushShadowMemory() {
-}
-
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {}
void InitializePlatformEarly() {
diff --git a/libsanitizer/tsan/tsan_report.cpp b/libsanitizer/tsan/tsan_report.cpp
index a926c37..9f15127 100644
--- a/libsanitizer/tsan/tsan_report.cpp
+++ b/libsanitizer/tsan/tsan_report.cpp
@@ -126,7 +126,7 @@ static void PrintMutexSet(Vector<ReportMopMutex> const& mset) {
if (i == 0)
Printf(" (mutexes:");
const ReportMopMutex m = mset[i];
- Printf(" %s M%llu", m.write ? "write" : "read", m.id);
+ Printf(" %s M%u", m.write ? "write" : "read", m.id);
Printf(i == mset.Size() - 1 ? ")" : ",");
}
}
@@ -211,29 +211,23 @@ static void PrintLocation(const ReportLocation *loc) {
static void PrintMutexShort(const ReportMutex *rm, const char *after) {
Decorator d;
- Printf("%sM%lld%s%s", d.Mutex(), rm->id, d.Default(), after);
+ Printf("%sM%d%s%s", d.Mutex(), rm->id, d.Default(), after);
}
static void PrintMutexShortWithAddress(const ReportMutex *rm,
const char *after) {
Decorator d;
- Printf("%sM%lld (%p)%s%s", d.Mutex(), rm->id,
+ Printf("%sM%d (%p)%s%s", d.Mutex(), rm->id,
reinterpret_cast<void *>(rm->addr), d.Default(), after);
}
static void PrintMutex(const ReportMutex *rm) {
Decorator d;
- if (rm->destroyed) {
- Printf("%s", d.Mutex());
- Printf(" Mutex M%llu is already destroyed.\n\n", rm->id);
- Printf("%s", d.Default());
- } else {
- Printf("%s", d.Mutex());
- Printf(" Mutex M%llu (%p) created at:\n", rm->id,
- reinterpret_cast<void *>(rm->addr));
- Printf("%s", d.Default());
- PrintStack(rm->stack);
- }
+ Printf("%s", d.Mutex());
+ Printf(" Mutex M%u (%p) created at:\n", rm->id,
+ reinterpret_cast<void *>(rm->addr));
+ Printf("%s", d.Default());
+ PrintStack(rm->stack);
}
static void PrintThread(const ReportThread *rt) {
@@ -312,6 +306,9 @@ void PrintReport(const ReportDesc *rep) {
(int)internal_getpid());
Printf("%s", d.Default());
+ if (rep->typ == ReportTypeErrnoInSignal)
+ Printf(" Signal %u handler invoked at:\n", rep->signum);
+
if (rep->typ == ReportTypeDeadlock) {
char thrbuf[kThreadBufSize];
Printf(" Cycle in lock order graph: ");
@@ -460,12 +457,12 @@ void PrintReport(const ReportDesc *rep) {
} else if (rep->typ == ReportTypeDeadlock) {
Printf("WARNING: DEADLOCK\n");
for (uptr i = 0; i < rep->mutexes.Size(); i++) {
- Printf("Goroutine %d lock mutex %llu while holding mutex %llu:\n", 999,
+ Printf("Goroutine %d lock mutex %u while holding mutex %u:\n", 999,
rep->mutexes[i]->id,
rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
PrintStack(rep->stacks[2*i]);
Printf("\n");
- Printf("Mutex %llu was previously locked here:\n",
+ Printf("Mutex %u was previously locked here:\n",
rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
PrintStack(rep->stacks[2*i + 1]);
Printf("\n");
diff --git a/libsanitizer/tsan/tsan_report.h b/libsanitizer/tsan/tsan_report.h
index d68c2db..718eacd 100644
--- a/libsanitizer/tsan/tsan_report.h
+++ b/libsanitizer/tsan/tsan_report.h
@@ -43,7 +43,7 @@ struct ReportStack {
};
struct ReportMopMutex {
- u64 id;
+ int id;
bool write;
};
@@ -91,9 +91,8 @@ struct ReportThread {
};
struct ReportMutex {
- u64 id;
+ int id;
uptr addr;
- bool destroyed;
ReportStack *stack;
};
@@ -109,6 +108,7 @@ class ReportDesc {
Vector<Tid> unique_tids;
ReportStack *sleep;
int count;
+ int signum = 0;
ReportDesc();
~ReportDesc();
diff --git a/libsanitizer/tsan/tsan_rtl.cpp b/libsanitizer/tsan/tsan_rtl.cpp
index 46dec04..1d6fc72 100644
--- a/libsanitizer/tsan/tsan_rtl.cpp
+++ b/libsanitizer/tsan/tsan_rtl.cpp
@@ -16,6 +16,7 @@
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_file.h"
+#include "sanitizer_common/sanitizer_interface_internal.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "sanitizer_common/sanitizer_placement_new.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
@@ -34,6 +35,9 @@ extern "C" void __tsan_resume() {
__tsan_resumed = 1;
}
+SANITIZER_WEAK_DEFAULT_IMPL
+void __tsan_test_only_on_fork() {}
+
namespace __tsan {
#if !SANITIZER_GO
@@ -54,109 +58,355 @@ Context *ctx;
bool OnFinalize(bool failed);
void OnInitialize();
#else
-#include <dlfcn.h>
SANITIZER_WEAK_CXX_DEFAULT_IMPL
bool OnFinalize(bool failed) {
-#if !SANITIZER_GO
+# if !SANITIZER_GO
if (on_finalize)
return on_finalize(failed);
-#endif
+# endif
return failed;
}
+
SANITIZER_WEAK_CXX_DEFAULT_IMPL
void OnInitialize() {
-#if !SANITIZER_GO
+# if !SANITIZER_GO
if (on_initialize)
on_initialize();
-#endif
+# endif
}
#endif
-static ThreadContextBase *CreateThreadContext(Tid tid) {
- // Map thread trace when context is created.
- char name[50];
- internal_snprintf(name, sizeof(name), "trace %u", tid);
- MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name);
- const uptr hdr = GetThreadTraceHeader(tid);
- internal_snprintf(name, sizeof(name), "trace header %u", tid);
- MapThreadTrace(hdr, sizeof(Trace), name);
- new((void*)hdr) Trace();
- // We are going to use only a small part of the trace with the default
- // value of history_size. However, the constructor writes to the whole trace.
- // Release the unused part.
- uptr hdr_end = hdr + sizeof(Trace);
- hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts());
- hdr_end = RoundUp(hdr_end, GetPageSizeCached());
- if (hdr_end < hdr + sizeof(Trace)) {
- ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace));
- uptr unused = hdr + sizeof(Trace) - hdr_end;
- if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) {
- Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end,
- unused);
- CHECK("unable to mprotect" && 0);
+static TracePart* TracePartAlloc(ThreadState* thr) {
+ TracePart* part = nullptr;
+ {
+ Lock lock(&ctx->slot_mtx);
+ uptr max_parts = Trace::kMinParts + flags()->history_size;
+ Trace* trace = &thr->tctx->trace;
+ if (trace->parts_allocated == max_parts ||
+ ctx->trace_part_finished_excess) {
+ part = ctx->trace_part_recycle.PopFront();
+ DPrintf("#%d: TracePartAlloc: part=%p\n", thr->tid, part);
+ if (part && part->trace) {
+ Trace* trace1 = part->trace;
+ Lock trace_lock(&trace1->mtx);
+ part->trace = nullptr;
+ TracePart* part1 = trace1->parts.PopFront();
+ CHECK_EQ(part, part1);
+ if (trace1->parts_allocated > trace1->parts.Size()) {
+ ctx->trace_part_finished_excess +=
+ trace1->parts_allocated - trace1->parts.Size();
+ trace1->parts_allocated = trace1->parts.Size();
+ }
+ }
+ }
+ if (trace->parts_allocated < max_parts) {
+ trace->parts_allocated++;
+ if (ctx->trace_part_finished_excess)
+ ctx->trace_part_finished_excess--;
}
+ if (!part)
+ ctx->trace_part_total_allocated++;
+ else if (ctx->trace_part_recycle_finished)
+ ctx->trace_part_recycle_finished--;
}
- return New<ThreadContext>(tid);
+ if (!part)
+ part = new (MmapOrDie(sizeof(*part), "TracePart")) TracePart();
+ return part;
}
+static void TracePartFree(TracePart* part) SANITIZER_REQUIRES(ctx->slot_mtx) {
+ DCHECK(part->trace);
+ part->trace = nullptr;
+ ctx->trace_part_recycle.PushFront(part);
+}
+
+void TraceResetForTesting() {
+ Lock lock(&ctx->slot_mtx);
+ while (auto* part = ctx->trace_part_recycle.PopFront()) {
+ if (auto trace = part->trace)
+ CHECK_EQ(trace->parts.PopFront(), part);
+ UnmapOrDie(part, sizeof(*part));
+ }
+ ctx->trace_part_total_allocated = 0;
+ ctx->trace_part_recycle_finished = 0;
+ ctx->trace_part_finished_excess = 0;
+}
+
+static void DoResetImpl(uptr epoch) {
+ ThreadRegistryLock lock0(&ctx->thread_registry);
+ Lock lock1(&ctx->slot_mtx);
+ CHECK_EQ(ctx->global_epoch, epoch);
+ ctx->global_epoch++;
+ CHECK(!ctx->resetting);
+ ctx->resetting = true;
+ for (u32 i = ctx->thread_registry.NumThreadsLocked(); i--;) {
+ ThreadContext* tctx = (ThreadContext*)ctx->thread_registry.GetThreadLocked(
+ static_cast<Tid>(i));
+ // Potentially we could purge all ThreadStatusDead threads from the
+ // registry. Since we reset all shadow, they can't race with anything
+ // anymore. However, their tid's can still be stored in some aux places
+ // (e.g. tid of thread that created something).
+ auto trace = &tctx->trace;
+ Lock lock(&trace->mtx);
+ bool attached = tctx->thr && tctx->thr->slot;
+ auto parts = &trace->parts;
+ bool local = false;
+ while (!parts->Empty()) {
+ auto part = parts->Front();
+ local = local || part == trace->local_head;
+ if (local)
+ CHECK(!ctx->trace_part_recycle.Queued(part));
+ else
+ ctx->trace_part_recycle.Remove(part);
+ if (attached && parts->Size() == 1) {
+ // The thread is running and this is the last/current part.
+ // Set the trace position to the end of the current part
+ // to force the thread to call SwitchTracePart and re-attach
+ // to a new slot and allocate a new trace part.
+ // Note: the thread is concurrently modifying the position as well,
+ // so this is only best-effort. The thread can only modify position
+ // within this part, because switching parts is protected by
+ // slot/trace mutexes that we hold here.
+ atomic_store_relaxed(
+ &tctx->thr->trace_pos,
+ reinterpret_cast<uptr>(&part->events[TracePart::kSize]));
+ break;
+ }
+ parts->Remove(part);
+ TracePartFree(part);
+ }
+ CHECK_LE(parts->Size(), 1);
+ trace->local_head = parts->Front();
+ if (tctx->thr && !tctx->thr->slot) {
+ atomic_store_relaxed(&tctx->thr->trace_pos, 0);
+ tctx->thr->trace_prev_pc = 0;
+ }
+ if (trace->parts_allocated > trace->parts.Size()) {
+ ctx->trace_part_finished_excess +=
+ trace->parts_allocated - trace->parts.Size();
+ trace->parts_allocated = trace->parts.Size();
+ }
+ }
+ while (ctx->slot_queue.PopFront()) {
+ }
+ for (auto& slot : ctx->slots) {
+ slot.SetEpoch(kEpochZero);
+ slot.journal.Reset();
+ slot.thr = nullptr;
+ ctx->slot_queue.PushBack(&slot);
+ }
+
+ DPrintf("Resetting shadow...\n");
+ if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(),
+ "shadow")) {
+ Printf("failed to reset shadow memory\n");
+ Die();
+ }
+ DPrintf("Resetting meta shadow...\n");
+ ctx->metamap.ResetClocks();
+ ctx->resetting = false;
+}
+
+// Clang does not understand locking all slots in the loop:
+// error: expecting mutex 'slot.mtx' to be held at start of each loop
+void DoReset(ThreadState* thr, uptr epoch) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ {
+ for (auto& slot : ctx->slots) {
+ slot.mtx.Lock();
+ if (UNLIKELY(epoch == 0))
+ epoch = ctx->global_epoch;
+ if (UNLIKELY(epoch != ctx->global_epoch)) {
+ // Epoch can't change once we've locked the first slot.
+ CHECK_EQ(slot.sid, 0);
+ slot.mtx.Unlock();
+ return;
+ }
+ }
+ }
+ DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch);
+ DoResetImpl(epoch);
+ for (auto& slot : ctx->slots) slot.mtx.Unlock();
+}
+
+void FlushShadowMemory() { DoReset(nullptr, 0); }
+
+static TidSlot* FindSlotAndLock(ThreadState* thr)
+ SANITIZER_ACQUIRE(thr->slot->mtx) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ CHECK(!thr->slot);
+ TidSlot* slot = nullptr;
+ for (;;) {
+ uptr epoch;
+ {
+ Lock lock(&ctx->slot_mtx);
+ epoch = ctx->global_epoch;
+ if (slot) {
+ // This is an exhausted slot from the previous iteration.
+ if (ctx->slot_queue.Queued(slot))
+ ctx->slot_queue.Remove(slot);
+ thr->slot_locked = false;
+ slot->mtx.Unlock();
+ }
+ for (;;) {
+ slot = ctx->slot_queue.PopFront();
+ if (!slot)
+ break;
+ if (slot->epoch() != kEpochLast) {
+ ctx->slot_queue.PushBack(slot);
+ break;
+ }
+ }
+ }
+ if (!slot) {
+ DoReset(thr, epoch);
+ continue;
+ }
+ slot->mtx.Lock();
+ CHECK(!thr->slot_locked);
+ thr->slot_locked = true;
+ if (slot->thr) {
+ DPrintf("#%d: preempting sid=%d tid=%d\n", thr->tid, (u32)slot->sid,
+ slot->thr->tid);
+ slot->SetEpoch(slot->thr->fast_state.epoch());
+ slot->thr = nullptr;
+ }
+ if (slot->epoch() != kEpochLast)
+ return slot;
+ }
+}
+
+void SlotAttachAndLock(ThreadState* thr) {
+ TidSlot* slot = FindSlotAndLock(thr);
+ DPrintf("#%d: SlotAttach: slot=%u\n", thr->tid, static_cast<int>(slot->sid));
+ CHECK(!slot->thr);
+ CHECK(!thr->slot);
+ slot->thr = thr;
+ thr->slot = slot;
+ Epoch epoch = EpochInc(slot->epoch());
+ CHECK(!EpochOverflow(epoch));
+ slot->SetEpoch(epoch);
+ thr->fast_state.SetSid(slot->sid);
+ thr->fast_state.SetEpoch(epoch);
+ if (thr->slot_epoch != ctx->global_epoch) {
+ thr->slot_epoch = ctx->global_epoch;
+ thr->clock.Reset();
#if !SANITIZER_GO
-static const u32 kThreadQuarantineSize = 16;
-#else
-static const u32 kThreadQuarantineSize = 64;
+ thr->last_sleep_stack_id = kInvalidStackID;
+ thr->last_sleep_clock.Reset();
+#endif
+ }
+ thr->clock.Set(slot->sid, epoch);
+ slot->journal.PushBack({thr->tid, epoch});
+}
+
+static void SlotDetachImpl(ThreadState* thr, bool exiting) {
+ TidSlot* slot = thr->slot;
+ thr->slot = nullptr;
+ if (thr != slot->thr) {
+ slot = nullptr; // we don't own the slot anymore
+ if (thr->slot_epoch != ctx->global_epoch) {
+ TracePart* part = nullptr;
+ auto* trace = &thr->tctx->trace;
+ {
+ Lock l(&trace->mtx);
+ auto* parts = &trace->parts;
+ // The trace can be completely empty in an unlikely event
+ // the thread is preempted right after it acquired the slot
+ // in ThreadStart and did not trace any events yet.
+ CHECK_LE(parts->Size(), 1);
+ part = parts->PopFront();
+ thr->tctx->trace.local_head = nullptr;
+ atomic_store_relaxed(&thr->trace_pos, 0);
+ thr->trace_prev_pc = 0;
+ }
+ if (part) {
+ Lock l(&ctx->slot_mtx);
+ TracePartFree(part);
+ }
+ }
+ return;
+ }
+ CHECK(exiting || thr->fast_state.epoch() == kEpochLast);
+ slot->SetEpoch(thr->fast_state.epoch());
+ slot->thr = nullptr;
+}
+
+void SlotDetach(ThreadState* thr) {
+ Lock lock(&thr->slot->mtx);
+ SlotDetachImpl(thr, true);
+}
+
+void SlotLock(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK(!thr->slot_locked);
+#if SANITIZER_DEBUG
+ // Check these mutexes are not locked.
+ // We can call DoReset from SlotAttachAndLock, which will lock
+ // these mutexes, but it happens only every once in a while.
+ { ThreadRegistryLock lock(&ctx->thread_registry); }
+ { Lock lock(&ctx->slot_mtx); }
#endif
+ TidSlot* slot = thr->slot;
+ slot->mtx.Lock();
+ thr->slot_locked = true;
+ if (LIKELY(thr == slot->thr && thr->fast_state.epoch() != kEpochLast))
+ return;
+ SlotDetachImpl(thr, false);
+ thr->slot_locked = false;
+ slot->mtx.Unlock();
+ SlotAttachAndLock(thr);
+}
+
+void SlotUnlock(ThreadState* thr) {
+ DCHECK(thr->slot_locked);
+ thr->slot_locked = false;
+ thr->slot->mtx.Unlock();
+}
Context::Context()
: initialized(),
report_mtx(MutexTypeReport),
nreported(),
- thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize,
- kMaxTidReuse),
+ thread_registry([](Tid tid) -> ThreadContextBase* {
+ return new (Alloc(sizeof(ThreadContext))) ThreadContext(tid);
+ }),
racy_mtx(MutexTypeRacy),
racy_stacks(),
racy_addresses(),
fired_suppressions_mtx(MutexTypeFired),
- clock_alloc(LINKER_INITIALIZED, "clock allocator") {
+ slot_mtx(MutexTypeSlots),
+ resetting() {
fired_suppressions.reserve(8);
+ for (uptr i = 0; i < ARRAY_SIZE(slots); i++) {
+ TidSlot* slot = &slots[i];
+ slot->sid = static_cast<Sid>(i);
+ slot_queue.PushBack(slot);
+ }
+ global_epoch = 1;
}
+TidSlot::TidSlot() : mtx(MutexTypeSlot) {}
+
// The objects are allocated in TLS, so one may rely on zero-initialization.
-ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
- unsigned reuse_count, uptr stk_addr, uptr stk_size,
- uptr tls_addr, uptr tls_size)
- : fast_state(tid, epoch)
- // Do not touch these, rely on zero initialization,
- // they may be accessed before the ctor.
- // , ignore_reads_and_writes()
- // , ignore_interceptors()
- ,
- clock(tid, reuse_count)
-#if !SANITIZER_GO
- ,
- jmp_bufs()
-#endif
- ,
- tid(tid),
- unique_id(unique_id),
- stk_addr(stk_addr),
- stk_size(stk_size),
- tls_addr(tls_addr),
- tls_size(tls_size)
-#if !SANITIZER_GO
- ,
- last_sleep_clock(tid)
-#endif
-{
+ThreadState::ThreadState(Tid tid)
+ // Do not touch these, rely on zero initialization,
+ // they may be accessed before the ctor.
+ // ignore_reads_and_writes()
+ // ignore_interceptors()
+ : tid(tid) {
CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
#if !SANITIZER_GO
- shadow_stack_pos = shadow_stack;
- shadow_stack_end = shadow_stack + kShadowStackSize;
+ // C/C++ uses fixed size shadow stack.
+ const int kInitStackSize = kShadowStackSize;
+ shadow_stack = static_cast<uptr*>(
+ MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack"));
+ SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack),
+ kInitStackSize * sizeof(uptr));
#else
- // Setup dynamic shadow stack.
+ // Go uses malloc-allocated shadow stack with dynamic size.
const int kInitStackSize = 8;
- shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr));
+ shadow_stack = static_cast<uptr*>(Alloc(kInitStackSize * sizeof(uptr)));
+#endif
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kInitStackSize;
-#endif
}
#if !SANITIZER_GO
@@ -168,11 +418,11 @@ void MemoryProfiler(u64 uptime) {
WriteToFile(ctx->memprof_fd, buf.data(), internal_strlen(buf.data()));
}
-void InitializeMemoryProfiler() {
+static bool InitializeMemoryProfiler() {
ctx->memprof_fd = kInvalidFd;
const char *fname = flags()->profile_memory;
if (!fname || !fname[0])
- return;
+ return false;
if (internal_strcmp(fname, "stdout") == 0) {
ctx->memprof_fd = 1;
} else if (internal_strcmp(fname, "stderr") == 0) {
@@ -184,11 +434,11 @@ void InitializeMemoryProfiler() {
if (ctx->memprof_fd == kInvalidFd) {
Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
filename.data());
- return;
+ return false;
}
}
MemoryProfiler(0);
- MaybeSpawnBackgroundThread();
+ return true;
}
static void *BackgroundThread(void *arg) {
@@ -200,33 +450,34 @@ static void *BackgroundThread(void *arg) {
const u64 kMs2Ns = 1000 * 1000;
const u64 start = NanoTime();
- u64 last_flush = NanoTime();
+ u64 last_flush = start;
uptr last_rss = 0;
- for (int i = 0;
- atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0;
- i++) {
+ while (!atomic_load_relaxed(&ctx->stop_background_thread)) {
SleepForMillis(100);
u64 now = NanoTime();
// Flush memory if requested.
if (flags()->flush_memory_ms > 0) {
if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) {
- VPrintf(1, "ThreadSanitizer: periodic memory flush\n");
+ VReport(1, "ThreadSanitizer: periodic memory flush\n");
FlushShadowMemory();
- last_flush = NanoTime();
+ now = last_flush = NanoTime();
}
}
if (flags()->memory_limit_mb > 0) {
uptr rss = GetRSS();
uptr limit = uptr(flags()->memory_limit_mb) << 20;
- VPrintf(1, "ThreadSanitizer: memory flush check"
- " RSS=%llu LAST=%llu LIMIT=%llu\n",
+ VReport(1,
+ "ThreadSanitizer: memory flush check"
+ " RSS=%llu LAST=%llu LIMIT=%llu\n",
(u64)rss >> 20, (u64)last_rss >> 20, (u64)limit >> 20);
if (2 * rss > limit + last_rss) {
- VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n");
+ VReport(1, "ThreadSanitizer: flushing memory due to RSS\n");
FlushShadowMemory();
rss = GetRSS();
- VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20);
+ now = NanoTime();
+ VReport(1, "ThreadSanitizer: memory flushed RSS=%llu\n",
+ (u64)rss >> 20);
}
last_rss = rss;
}
@@ -267,11 +518,43 @@ void DontNeedShadowFor(uptr addr, uptr size) {
}
#if !SANITIZER_GO
+// We call UnmapShadow before the actual munmap, at that point we don't yet
+// know if the provided address/size are sane. We can't call UnmapShadow
+// after the actual munmap becuase at that point the memory range can
+// already be reused for something else, so we can't rely on the munmap
+// return value to understand is the values are sane.
+// While calling munmap with insane values (non-canonical address, negative
+// size, etc) is an error, the kernel won't crash. We must also try to not
+// crash as the failure mode is very confusing (paging fault inside of the
+// runtime on some derived shadow address).
+static bool IsValidMmapRange(uptr addr, uptr size) {
+ if (size == 0)
+ return true;
+ if (static_cast<sptr>(size) < 0)
+ return false;
+ if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
+ return false;
+ // Check that if the start of the region belongs to one of app ranges,
+ // end of the region belongs to the same region.
+ const uptr ranges[][2] = {
+ {LoAppMemBeg(), LoAppMemEnd()},
+ {MidAppMemBeg(), MidAppMemEnd()},
+ {HiAppMemBeg(), HiAppMemEnd()},
+ };
+ for (auto range : ranges) {
+ if (addr >= range[0] && addr < range[1])
+ return addr + size <= range[1];
+ }
+ return false;
+}
+
void UnmapShadow(ThreadState *thr, uptr addr, uptr size) {
- if (size == 0) return;
+ if (size == 0 || !IsValidMmapRange(addr, size))
+ return;
DontNeedShadowFor(addr, size);
ScopedGlobalProcessor sgp;
- ctx->metamap.ResetRange(thr->proc(), addr, size);
+ SlotLocker locker(thr, true);
+ ctx->metamap.ResetRange(thr->proc(), addr, size, true);
}
#endif
@@ -317,18 +600,6 @@ void MapShadow(uptr addr, uptr size) {
addr + size, meta_begin, meta_end);
}
-void MapThreadTrace(uptr addr, uptr size, const char *name) {
- DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size);
- CHECK_GE(addr, TraceMemBeg());
- CHECK_LE(addr + size, TraceMemEnd());
- CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment
- if (!MmapFixedSuperNoReserve(addr, size, name)) {
- Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n",
- addr, size);
- Die();
- }
-}
-
#if !SANITIZER_GO
static void OnStackUnwind(const SignalContext &sig, const void *,
BufferedStackTrace *stack) {
@@ -347,8 +618,11 @@ void CheckUnwind() {
// since we are going to die soon.
ScopedIgnoreInterceptors ignore;
#if !SANITIZER_GO
- cur_thread()->ignore_sync++;
- cur_thread()->ignore_reads_and_writes++;
+ ThreadState* thr = cur_thread();
+ thr->nomalloc = false;
+ thr->ignore_sync++;
+ thr->ignore_reads_and_writes++;
+ atomic_store_relaxed(&thr->in_signal_handler, 0);
#endif
PrintCurrentStackSlow(StackTrace::GetCurrentPc());
}
@@ -403,22 +677,23 @@ void Initialize(ThreadState *thr) {
Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer);
#endif
- VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n",
+ VPrintf(1, "***** Running under ThreadSanitizer v3 (pid %d) *****\n",
(int)internal_getpid());
// Initialize thread 0.
- Tid tid = ThreadCreate(thr, 0, 0, true);
+ Tid tid = ThreadCreate(nullptr, 0, 0, true);
CHECK_EQ(tid, kMainTid);
ThreadStart(thr, tid, GetTid(), ThreadType::Regular);
#if TSAN_CONTAINS_UBSAN
__ubsan::InitAsPlugin();
#endif
- ctx->initialized = true;
#if !SANITIZER_GO
Symbolizer::LateInitialize();
- InitializeMemoryProfiler();
+ if (InitializeMemoryProfiler() || flags()->force_background_thread)
+ MaybeSpawnBackgroundThread();
#endif
+ ctx->initialized = true;
if (flags()->stop_on_start) {
Printf("ThreadSanitizer is suspended at startup (pid %d)."
@@ -444,7 +719,6 @@ void MaybeSpawnBackgroundThread() {
#endif
}
-
int Finalize(ThreadState *thr) {
bool failed = false;
@@ -452,12 +726,12 @@ int Finalize(ThreadState *thr) {
DumpProcessMap();
if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1)
- SleepForMillis(flags()->atexit_sleep_ms);
+ internal_usleep(u64(flags()->atexit_sleep_ms) * 1000);
- // Wait for pending reports.
- ctx->report_mtx.Lock();
- { ScopedErrorReportLock l; }
- ctx->report_mtx.Unlock();
+ {
+ // Wait for pending reports.
+ ScopedErrorReportLock lock;
+ }
#if !SANITIZER_GO
if (Verbosity()) AllocatorPrintStats();
@@ -483,10 +757,16 @@ int Finalize(ThreadState *thr) {
}
#if !SANITIZER_GO
-void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
+void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ GlobalProcessorLock();
+ // Detaching from the slot makes OnUserFree skip writing to the shadow.
+ // The slot will be locked so any attempts to use it will deadlock anyway.
+ SlotDetach(thr);
+ for (auto& slot : ctx->slots) slot.mtx.Lock();
ctx->thread_registry.Lock();
- ctx->report_mtx.Lock();
+ ctx->slot_mtx.Lock();
ScopedErrorReportLock::Lock();
+ AllocatorLock();
// Suppress all reports in the pthread_atfork callbacks.
// Reports will deadlock on the report_mtx.
// We could ignore sync operations as well,
@@ -495,29 +775,38 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports++;
// On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
// we'll assert in CheckNoLocks() unless we ignore interceptors.
+ // On OS X libSystem_atfork_prepare/parent/child callbacks are called
+ // after/before our callbacks and they call free.
thr->ignore_interceptors++;
+ // Disables memory write in OnUserAlloc/Free.
+ thr->ignore_reads_and_writes++;
+
+ __tsan_test_only_on_fork();
}
-void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
+static void ForkAfter(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports--; // Enabled in ForkBefore.
thr->ignore_interceptors--;
+ thr->ignore_reads_and_writes--;
+ AllocatorUnlock();
ScopedErrorReportLock::Unlock();
- ctx->report_mtx.Unlock();
+ ctx->slot_mtx.Unlock();
ctx->thread_registry.Unlock();
+ for (auto& slot : ctx->slots) slot.mtx.Unlock();
+ SlotAttachAndLock(thr);
+ SlotUnlock(thr);
+ GlobalProcessorUnlock();
}
-void ForkChildAfter(ThreadState *thr, uptr pc,
- bool start_thread) NO_THREAD_SAFETY_ANALYSIS {
- thr->suppress_reports--; // Enabled in ForkBefore.
- thr->ignore_interceptors--;
- ScopedErrorReportLock::Unlock();
- ctx->report_mtx.Unlock();
- ctx->thread_registry.Unlock();
+void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); }
- uptr nthread = 0;
- ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */);
- VPrintf(1, "ThreadSanitizer: forked new process with pid %d,"
- " parent had %d threads\n", (int)internal_getpid(), (int)nthread);
+void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) {
+ ForkAfter(thr);
+ u32 nthread = ctx->thread_registry.OnFork(thr->tid);
+ VPrintf(1,
+ "ThreadSanitizer: forked new process with pid %d,"
+ " parent had %d threads\n",
+ (int)internal_getpid(), (int)nthread);
if (nthread == 1) {
if (start_thread)
StartBackgroundThread();
@@ -527,6 +816,7 @@ void ForkChildAfter(ThreadState *thr, uptr pc,
// ignores for everything in the hope that we will exec soon.
ctx->after_multithreaded_fork = true;
thr->ignore_interceptors++;
+ thr->suppress_reports++;
ThreadIgnoreBegin(thr, pc);
ThreadIgnoreSyncBegin(thr, pc);
}
@@ -548,8 +838,10 @@ void GrowShadowStack(ThreadState *thr) {
#endif
StackID CurrentStackId(ThreadState *thr, uptr pc) {
+#if !SANITIZER_GO
if (!thr->is_inited) // May happen during bootstrap.
return kInvalidStackID;
+#endif
if (pc != 0) {
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -567,53 +859,72 @@ StackID CurrentStackId(ThreadState *thr, uptr pc) {
return id;
}
-namespace v3 {
-
-NOINLINE
-void TraceSwitchPart(ThreadState *thr) {
+static bool TraceSkipGap(ThreadState* thr) {
Trace *trace = &thr->tctx->trace;
Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
DCHECK_EQ(reinterpret_cast<uptr>(pos + 1) & TracePart::kAlignment, 0);
auto *part = trace->parts.Back();
- DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos);
- if (part) {
- // We can get here when we still have space in the current trace part.
- // The fast-path check in TraceAcquire has false positives in the middle of
- // the part. Check if we are indeed at the end of the current part or not,
- // and fill any gaps with NopEvent's.
- Event *end = &part->events[TracePart::kSize];
- DCHECK_GE(pos, &part->events[0]);
- DCHECK_LE(pos, end);
- if (pos + 1 < end) {
- if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) ==
- TracePart::kAlignment)
- *pos++ = NopEvent;
+ DPrintf("#%d: TraceSwitchPart enter trace=%p parts=%p-%p pos=%p\n", thr->tid,
+ trace, trace->parts.Front(), part, pos);
+ if (!part)
+ return false;
+ // We can get here when we still have space in the current trace part.
+ // The fast-path check in TraceAcquire has false positives in the middle of
+ // the part. Check if we are indeed at the end of the current part or not,
+ // and fill any gaps with NopEvent's.
+ Event* end = &part->events[TracePart::kSize];
+ DCHECK_GE(pos, &part->events[0]);
+ DCHECK_LE(pos, end);
+ if (pos + 1 < end) {
+ if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) ==
+ TracePart::kAlignment)
*pos++ = NopEvent;
- DCHECK_LE(pos + 2, end);
- atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos));
- // Ensure we setup trace so that the next TraceAcquire
- // won't detect trace part end.
- Event *ev;
- CHECK(TraceAcquire(thr, &ev));
- return;
- }
- // We are indeed at the end.
- for (; pos < end; pos++) *pos = NopEvent;
+ *pos++ = NopEvent;
+ DCHECK_LE(pos + 2, end);
+ atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos));
+ return true;
}
+ // We are indeed at the end.
+ for (; pos < end; pos++) *pos = NopEvent;
+ return false;
+}
+
+NOINLINE
+void TraceSwitchPart(ThreadState* thr) {
+ if (TraceSkipGap(thr))
+ return;
#if !SANITIZER_GO
if (ctx->after_multithreaded_fork) {
// We just need to survive till exec.
- CHECK(part);
- atomic_store_relaxed(&thr->trace_pos,
- reinterpret_cast<uptr>(&part->events[0]));
- return;
+ TracePart* part = thr->tctx->trace.parts.Back();
+ if (part) {
+ atomic_store_relaxed(&thr->trace_pos,
+ reinterpret_cast<uptr>(&part->events[0]));
+ return;
+ }
}
#endif
- part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart();
+ TraceSwitchPartImpl(thr);
+}
+
+void TraceSwitchPartImpl(ThreadState* thr) {
+ SlotLocker locker(thr, true);
+ Trace* trace = &thr->tctx->trace;
+ TracePart* part = TracePartAlloc(thr);
part->trace = trace;
thr->trace_prev_pc = 0;
+ TracePart* recycle = nullptr;
+ // Keep roughly half of parts local to the thread
+ // (not queued into the recycle queue).
+ uptr local_parts = (Trace::kMinParts + flags()->history_size + 1) / 2;
{
Lock lock(&trace->mtx);
+ if (trace->parts.Empty())
+ trace->local_head = part;
+ if (trace->parts.Size() >= local_parts) {
+ recycle = trace->local_head;
+ trace->local_head = trace->parts.Next(recycle);
+ }
trace->parts.PushBack(part);
atomic_store_relaxed(&thr->trace_pos,
reinterpret_cast<uptr>(&part->events[0]));
@@ -621,60 +932,49 @@ void TraceSwitchPart(ThreadState *thr) {
// Make this part self-sufficient by restoring the current stack
// and mutex set in the beginning of the trace.
TraceTime(thr);
- for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++)
- CHECK(TryTraceFunc(thr, *pos));
+ {
+ // Pathologically large stacks may not fit into the part.
+ // In these cases we log only fixed number of top frames.
+ const uptr kMaxFrames = 1000;
+ // Check that kMaxFrames won't consume the whole part.
+ static_assert(kMaxFrames < TracePart::kSize / 2, "kMaxFrames is too big");
+ uptr* pos = Max(&thr->shadow_stack[0], thr->shadow_stack_pos - kMaxFrames);
+ for (; pos < thr->shadow_stack_pos; pos++) {
+ if (TryTraceFunc(thr, *pos))
+ continue;
+ CHECK(TraceSkipGap(thr));
+ CHECK(TryTraceFunc(thr, *pos));
+ }
+ }
for (uptr i = 0; i < thr->mset.Size(); i++) {
MutexSet::Desc d = thr->mset.Get(i);
- TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0,
- d.addr, d.stack_id);
+ for (uptr i = 0; i < d.count; i++)
+ TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0,
+ d.addr, d.stack_id);
}
+ {
+ Lock lock(&ctx->slot_mtx);
+ // There is a small chance that the slot may be not queued at this point.
+ // This can happen if the slot has kEpochLast epoch and another thread
+ // in FindSlotAndLock discovered that it's exhausted and removed it from
+ // the slot queue. kEpochLast can happen in 2 cases: (1) if TraceSwitchPart
+ // was called with the slot locked and epoch already at kEpochLast,
+ // or (2) if we've acquired a new slot in SlotLock in the beginning
+ // of the function and the slot was at kEpochLast - 1, so after increment
+ // in SlotAttachAndLock it become kEpochLast.
+ if (ctx->slot_queue.Queued(thr->slot)) {
+ ctx->slot_queue.Remove(thr->slot);
+ ctx->slot_queue.PushBack(thr->slot);
+ }
+ if (recycle)
+ ctx->trace_part_recycle.PushBack(recycle);
+ }
+ DPrintf("#%d: TraceSwitchPart exit parts=%p-%p pos=0x%zx\n", thr->tid,
+ trace->parts.Front(), trace->parts.Back(),
+ atomic_load_relaxed(&thr->trace_pos));
}
-} // namespace v3
-
-void TraceSwitch(ThreadState *thr) {
-#if !SANITIZER_GO
- if (ctx->after_multithreaded_fork)
- return;
-#endif
- thr->nomalloc++;
- Trace *thr_trace = ThreadTrace(thr->tid);
- Lock l(&thr_trace->mtx);
- unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts();
- TraceHeader *hdr = &thr_trace->headers[trace];
- hdr->epoch0 = thr->fast_state.epoch();
- ObtainCurrentStack(thr, 0, &hdr->stack0);
- hdr->mset0 = thr->mset;
- thr->nomalloc--;
-}
-
-Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); }
-
-uptr TraceTopPC(ThreadState *thr) {
- Event *events = (Event*)GetThreadTrace(thr->tid);
- uptr pc = events[thr->fast_state.GetTracePos()];
- return pc;
-}
-
-uptr TraceSize() {
- return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1));
-}
-
-uptr TraceParts() {
- return TraceSize() / kTracePartSize;
-}
-
-#if !SANITIZER_GO
-extern "C" void __tsan_trace_switch() {
- TraceSwitch(cur_thread());
-}
-
-extern "C" void __tsan_report_race() {
- ReportRace(cur_thread());
-}
-#endif
-
-void ThreadIgnoreBegin(ThreadState *thr, uptr pc) {
+void ThreadIgnoreBegin(ThreadState* thr, uptr pc) {
DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid);
thr->ignore_reads_and_writes++;
CHECK_GT(thr->ignore_reads_and_writes, 0);
@@ -734,7 +1034,6 @@ void build_consistency_debug() {}
#else
void build_consistency_release() {}
#endif
-
} // namespace __tsan
#if SANITIZER_CHECK_DEADLOCKS
@@ -742,18 +1041,27 @@ namespace __sanitizer {
using namespace __tsan;
MutexMeta mutex_meta[] = {
{MutexInvalid, "Invalid", {}},
- {MutexThreadRegistry, "ThreadRegistry", {}},
- {MutexTypeTrace, "Trace", {MutexLeaf}},
- {MutexTypeReport, "Report", {MutexTypeSyncVar}},
- {MutexTypeSyncVar, "SyncVar", {}},
+ {MutexThreadRegistry,
+ "ThreadRegistry",
+ {MutexTypeSlots, MutexTypeTrace, MutexTypeReport}},
+ {MutexTypeReport, "Report", {MutexTypeTrace}},
+ {MutexTypeSyncVar, "SyncVar", {MutexTypeReport, MutexTypeTrace}},
{MutexTypeAnnotations, "Annotations", {}},
- {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}},
+ {MutexTypeAtExit, "AtExit", {}},
{MutexTypeFired, "Fired", {MutexLeaf}},
{MutexTypeRacy, "Racy", {MutexLeaf}},
- {MutexTypeGlobalProc, "GlobalProc", {}},
+ {MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}},
+ {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}},
+ {MutexTypeTrace, "Trace", {}},
+ {MutexTypeSlot,
+ "Slot",
+ {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry,
+ MutexTypeSlots}},
+ {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}},
{},
};
void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); }
+
} // namespace __sanitizer
#endif
diff --git a/libsanitizer/tsan/tsan_rtl.h b/libsanitizer/tsan/tsan_rtl.h
index eab8370..b472c0f 100644
--- a/libsanitizer/tsan/tsan_rtl.h
+++ b/libsanitizer/tsan/tsan_rtl.h
@@ -34,10 +34,10 @@
#include "sanitizer_common/sanitizer_suppressions.h"
#include "sanitizer_common/sanitizer_thread_registry.h"
#include "sanitizer_common/sanitizer_vector.h"
-#include "tsan_clock.h"
#include "tsan_defs.h"
#include "tsan_flags.h"
#include "tsan_ignoreset.h"
+#include "tsan_ilist.h"
#include "tsan_mman.h"
#include "tsan_mutexset.h"
#include "tsan_platform.h"
@@ -46,6 +46,7 @@
#include "tsan_stack_trace.h"
#include "tsan_sync.h"
#include "tsan_trace.h"
+#include "tsan_vector_clock.h"
#if SANITIZER_WORDSIZE != 64
# error "ThreadSanitizer is supported only on 64-bit platforms"
@@ -116,7 +117,6 @@ struct Processor {
#endif
DenseSlabAllocCache block_cache;
DenseSlabAllocCache sync_cache;
- DenseSlabAllocCache clock_cache;
DDPhysicalThread *dd_pt;
};
@@ -130,67 +130,85 @@ struct ScopedGlobalProcessor {
};
#endif
+struct TidEpoch {
+ Tid tid;
+ Epoch epoch;
+};
+
+struct TidSlot {
+ Mutex mtx;
+ Sid sid;
+ atomic_uint32_t raw_epoch;
+ ThreadState *thr;
+ Vector<TidEpoch> journal;
+ INode node;
+
+ Epoch epoch() const {
+ return static_cast<Epoch>(atomic_load(&raw_epoch, memory_order_relaxed));
+ }
+
+ void SetEpoch(Epoch v) {
+ atomic_store(&raw_epoch, static_cast<u32>(v), memory_order_relaxed);
+ }
+
+ TidSlot();
+} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
+
// This struct is stored in TLS.
struct ThreadState {
FastState fast_state;
- // Synch epoch represents the threads's epoch before the last synchronization
- // action. It allows to reduce number of shadow state updates.
- // For example, fast_synch_epoch=100, last write to addr X was at epoch=150,
- // if we are processing write to X from the same thread at epoch=200,
- // we do nothing, because both writes happen in the same 'synch epoch'.
- // That is, if another memory access does not race with the former write,
- // it does not race with the latter as well.
- // QUESTION: can we can squeeze this into ThreadState::Fast?
- // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are
- // taken by epoch between synchs.
- // This way we can save one load from tls.
- u64 fast_synch_epoch;
+ int ignore_sync;
+#if !SANITIZER_GO
+ int ignore_interceptors;
+#endif
+ uptr *shadow_stack_pos;
+
+ // Current position in tctx->trace.Back()->events (Event*).
+ atomic_uintptr_t trace_pos;
+ // PC of the last memory access, used to compute PC deltas in the trace.
+ uptr trace_prev_pc;
+
// Technically `current` should be a separate THREADLOCAL variable;
// but it is placed here in order to share cache line with previous fields.
ThreadState* current;
+
+ atomic_sint32_t pending_signals;
+
+ VectorClock clock;
+
// This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
// We do not distinguish beteween ignoring reads and writes
// for better performance.
int ignore_reads_and_writes;
- atomic_sint32_t pending_signals;
- int ignore_sync;
int suppress_reports;
// Go does not support ignores.
#if !SANITIZER_GO
IgnoreSet mop_ignore_set;
IgnoreSet sync_ignore_set;
- // C/C++ uses fixed size shadow stack.
- uptr shadow_stack[kShadowStackSize];
-#else
- // Go uses malloc-allocated shadow stack with dynamic size.
- uptr *shadow_stack;
#endif
+ uptr *shadow_stack;
uptr *shadow_stack_end;
- uptr *shadow_stack_pos;
- RawShadow *racy_shadow_addr;
- RawShadow racy_state[2];
- MutexSet mset;
- ThreadClock clock;
#if !SANITIZER_GO
Vector<JmpBuf> jmp_bufs;
- int ignore_interceptors;
-#endif
- const Tid tid;
- const int unique_id;
- bool in_symbolizer;
+ int in_symbolizer;
bool in_ignored_lib;
bool is_inited;
+#endif
+ MutexSet mset;
bool is_dead;
- bool is_freeing;
- bool is_vptr_access;
- const uptr stk_addr;
- const uptr stk_size;
- const uptr tls_addr;
- const uptr tls_size;
+ const Tid tid;
+ uptr stk_addr;
+ uptr stk_size;
+ uptr tls_addr;
+ uptr tls_size;
ThreadContext *tctx;
DDLogicalThread *dd_lt;
+ TidSlot *slot;
+ uptr slot_epoch;
+ bool slot_locked;
+
// Current wired Processor, or nullptr. Required to handle any events.
Processor *proc1;
#if !SANITIZER_GO
@@ -204,7 +222,7 @@ struct ThreadState {
#if !SANITIZER_GO
StackID last_sleep_stack_id;
- ThreadClock last_sleep_clock;
+ VectorClock last_sleep_clock;
#endif
// Set in regions of runtime that must be signal-safe and fork-safe.
@@ -213,16 +231,7 @@ struct ThreadState {
const ReportDesc *current_report;
- // Current position in tctx->trace.Back()->events (Event*).
- atomic_uintptr_t trace_pos;
- // PC of the last memory access, used to compute PC deltas in the trace.
- uptr trace_prev_pc;
- Sid sid;
- Epoch epoch;
-
- explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
- unsigned reuse_count, uptr stk_addr, uptr stk_size,
- uptr tls_addr, uptr tls_size);
+ explicit ThreadState(Tid tid);
} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
#if !SANITIZER_GO
@@ -256,14 +265,9 @@ class ThreadContext final : public ThreadContextBase {
~ThreadContext();
ThreadState *thr;
StackID creation_stack_id;
- SyncClock sync;
- // Epoch at which the thread had started.
- // If we see an event from the thread stamped by an older epoch,
- // the event is from a dead thread that shared tid with this thread.
- u64 epoch0;
- u64 epoch1;
-
- v3::Trace trace;
+ VectorClock *sync;
+ uptr sync_epoch;
+ Trace trace;
// Override superclass callbacks.
void OnDead() override;
@@ -318,12 +322,22 @@ struct Context {
InternalMmapVector<FiredSuppression> fired_suppressions;
DDetector *dd;
- ClockAlloc clock_alloc;
-
Flags flags;
fd_t memprof_fd;
+ // The last slot index (kFreeSid) is used to denote freed memory.
+ TidSlot slots[kThreadSlotCount - 1];
+
+ // Protects global_epoch, slot_queue, trace_part_recycle.
Mutex slot_mtx;
+ uptr global_epoch; // guarded by slot_mtx and by all slot mutexes
+ bool resetting; // global reset is in progress
+ IList<TidSlot, &TidSlot::node> slot_queue SANITIZER_GUARDED_BY(slot_mtx);
+ IList<TraceHeader, &TraceHeader::global, TracePart> trace_part_recycle
+ SANITIZER_GUARDED_BY(slot_mtx);
+ uptr trace_part_total_allocated SANITIZER_GUARDED_BY(slot_mtx);
+ uptr trace_part_recycle_finished SANITIZER_GUARDED_BY(slot_mtx);
+ uptr trace_part_finished_excess SANITIZER_GUARDED_BY(slot_mtx);
};
extern Context *ctx; // The one and the only global runtime context.
@@ -352,17 +366,17 @@ uptr TagFromShadowStackFrame(uptr pc);
class ScopedReportBase {
public:
- void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack,
- const MutexSet *mset);
+ void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, Tid tid,
+ StackTrace stack, const MutexSet *mset);
void AddStack(StackTrace stack, bool suppressable = false);
void AddThread(const ThreadContext *tctx, bool suppressable = false);
- void AddThread(Tid unique_tid, bool suppressable = false);
+ void AddThread(Tid tid, bool suppressable = false);
void AddUniqueTid(Tid unique_tid);
- void AddMutex(const SyncVar *s);
- u64 AddMutex(u64 id);
+ int AddMutex(uptr addr, StackID creation_stack_id);
void AddLocation(uptr addr, uptr size);
void AddSleep(StackID stack_id);
void SetCount(int count);
+ void SetSigNum(int sig);
const ReportDesc *GetReport() const;
@@ -376,8 +390,6 @@ class ScopedReportBase {
// at best it will cause deadlocks on internal mutexes.
ScopedIgnoreInterceptors ignore_interceptors_;
- void AddDeadMutex(u64 id);
-
ScopedReportBase(const ScopedReportBase &) = delete;
void operator=(const ScopedReportBase &) = delete;
};
@@ -393,8 +405,6 @@ class ScopedReport : public ScopedReportBase {
bool ShouldReport(ThreadState *thr, ReportType typ);
ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
-void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
- MutexSet *mset, uptr *tag = nullptr);
// The stack could look like:
// <start> | <main> | <foo> | tag | <bar>
@@ -442,7 +452,8 @@ void ForkBefore(ThreadState *thr, uptr pc);
void ForkParentAfter(ThreadState *thr, uptr pc);
void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread);
-void ReportRace(ThreadState *thr);
+void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
+ AccessType typ);
bool OutputReport(ThreadState *thr, const ScopedReport &srep);
bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
bool IsExpectedReport(uptr addr, uptr size);
@@ -472,55 +483,28 @@ int Finalize(ThreadState *thr);
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write);
void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write);
-void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
- int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic);
-void MemoryAccessImpl(ThreadState *thr, uptr addr,
- int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
- u64 *shadow_mem, Shadow cur);
-void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
- uptr size, bool is_write);
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ);
void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
AccessType typ);
-
-const int kSizeLog1 = 0;
-const int kSizeLog2 = 1;
-const int kSizeLog4 = 2;
-const int kSizeLog8 = 3;
+// This creates 2 non-inlined specialized versions of MemoryAccessRange.
+template <bool is_read>
+void MemoryAccessRangeT(ThreadState *thr, uptr pc, uptr addr, uptr size);
ALWAYS_INLINE
-void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
- AccessType typ) {
- int size_log;
- switch (size) {
- case 1:
- size_log = kSizeLog1;
- break;
- case 2:
- size_log = kSizeLog2;
- break;
- case 4:
- size_log = kSizeLog4;
- break;
- default:
- DCHECK_EQ(size, 8);
- size_log = kSizeLog8;
- break;
- }
- bool is_write = !(typ & kAccessRead);
- bool is_atomic = typ & kAccessAtomic;
- if (typ & kAccessVptr)
- thr->is_vptr_access = true;
- if (typ & kAccessFree)
- thr->is_freeing = true;
- MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic);
- if (typ & kAccessVptr)
- thr->is_vptr_access = false;
- if (typ & kAccessFree)
- thr->is_freeing = false;
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ bool is_write) {
+ if (size == 0)
+ return;
+ if (is_write)
+ MemoryAccessRangeT<false>(thr, pc, addr, size);
+ else
+ MemoryAccessRangeT<true>(thr, pc, addr, size);
}
-void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void ShadowSet(RawShadow *p, RawShadow *end, RawShadow v);
void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
uptr size);
@@ -530,9 +514,6 @@ void ThreadIgnoreEnd(ThreadState *thr);
void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
void ThreadIgnoreSyncEnd(ThreadState *thr);
-void FuncEntry(ThreadState *thr, uptr pc);
-void FuncExit(ThreadState *thr);
-
Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadType thread_type);
@@ -578,60 +559,7 @@ void Release(ThreadState *thr, uptr pc, uptr addr);
void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr);
void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
void AfterSleep(ThreadState *thr, uptr pc);
-void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
-void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
-void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
-void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c);
-void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
-
-// The hacky call uses custom calling convention and an assembly thunk.
-// It is considerably faster that a normal call for the caller
-// if it is not executed (it is intended for slow paths from hot functions).
-// The trick is that the call preserves all registers and the compiler
-// does not treat it as a call.
-// If it does not work for you, use normal call.
-#if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_MAC
-// The caller may not create the stack frame for itself at all,
-// so we create a reserve stack frame for it (1024b must be enough).
-#define HACKY_CALL(f) \
- __asm__ __volatile__("sub $1024, %%rsp;" \
- CFI_INL_ADJUST_CFA_OFFSET(1024) \
- ".hidden " #f "_thunk;" \
- "call " #f "_thunk;" \
- "add $1024, %%rsp;" \
- CFI_INL_ADJUST_CFA_OFFSET(-1024) \
- ::: "memory", "cc");
-#else
-#define HACKY_CALL(f) f()
-#endif
-
-void TraceSwitch(ThreadState *thr);
-uptr TraceTopPC(ThreadState *thr);
-uptr TraceSize();
-uptr TraceParts();
-Trace *ThreadTrace(Tid tid);
-
-extern "C" void __tsan_trace_switch();
-void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
- EventType typ, u64 addr) {
- if (!kCollectHistory)
- return;
- DCHECK_GE((int)typ, 0);
- DCHECK_LE((int)typ, 7);
- DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
- u64 pos = fs.GetTracePos();
- if (UNLIKELY((pos % kTracePartSize) == 0)) {
-#if !SANITIZER_GO
- HACKY_CALL(__tsan_trace_switch);
-#else
- TraceSwitch(thr);
-#endif
- }
- Event *trace = (Event*)GetThreadTrace(fs.tid());
- Event *evp = &trace[pos];
- Event ev = (u64)addr | ((u64)typ << kEventPCBits);
- *evp = ev;
-}
+void IncrementEpoch(ThreadState *thr);
#if !SANITIZER_GO
uptr ALWAYS_INLINE HeapEnd() {
@@ -639,6 +567,13 @@ uptr ALWAYS_INLINE HeapEnd() {
}
#endif
+void SlotAttachAndLock(ThreadState *thr) SANITIZER_ACQUIRE(thr->slot->mtx);
+void SlotDetach(ThreadState *thr);
+void SlotLock(ThreadState *thr) SANITIZER_ACQUIRE(thr->slot->mtx);
+void SlotUnlock(ThreadState *thr) SANITIZER_RELEASE(thr->slot->mtx);
+void DoReset(ThreadState *thr, uptr epoch);
+void FlushShadowMemory();
+
ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags);
void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber);
void FiberSwitch(ThreadState *thr, uptr pc, ThreadState *fiber, unsigned flags);
@@ -649,6 +584,43 @@ enum FiberSwitchFlags {
FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync
};
+class SlotLocker {
+ public:
+ ALWAYS_INLINE
+ SlotLocker(ThreadState *thr, bool recursive = false)
+ : thr_(thr), locked_(recursive ? thr->slot_locked : false) {
+ if (!locked_)
+ SlotLock(thr_);
+ }
+
+ ALWAYS_INLINE
+ ~SlotLocker() {
+ if (!locked_)
+ SlotUnlock(thr_);
+ }
+
+ private:
+ ThreadState *thr_;
+ bool locked_;
+};
+
+class SlotUnlocker {
+ public:
+ SlotUnlocker(ThreadState *thr) : thr_(thr), locked_(thr->slot_locked) {
+ if (locked_)
+ SlotUnlock(thr_);
+ }
+
+ ~SlotUnlocker() {
+ if (locked_)
+ SlotLock(thr_);
+ }
+
+ private:
+ ThreadState *thr_;
+ bool locked_;
+};
+
ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) {
if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals)))
ProcessPendingSignalsImpl(thr);
@@ -667,16 +639,19 @@ void LazyInitialize(ThreadState *thr) {
#endif
}
-namespace v3 {
-
+void TraceResetForTesting();
void TraceSwitchPart(ThreadState *thr);
-bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
- uptr size, AccessType typ, VarSizeStackTrace *pstk,
+void TraceSwitchPartImpl(ThreadState *thr);
+bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size,
+ AccessType typ, Tid *ptid, VarSizeStackTrace *pstk,
MutexSet *pmset, uptr *ptag);
template <typename EventT>
ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr,
EventT **ev) {
+ // TraceSwitchPart accesses shadow_stack, but it's called infrequently,
+ // so we check it here proactively.
+ DCHECK(thr->shadow_stack);
Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
#if SANITIZER_DEBUG
// TraceSwitch acquires these mutexes,
@@ -747,20 +722,16 @@ void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
void TraceMutexUnlock(ThreadState *thr, uptr addr);
void TraceTime(ThreadState *thr);
-} // namespace v3
+void TraceRestartFuncExit(ThreadState *thr);
+void TraceRestartFuncEntry(ThreadState *thr, uptr pc);
void GrowShadowStack(ThreadState *thr);
ALWAYS_INLINE
void FuncEntry(ThreadState *thr, uptr pc) {
- DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc);
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
- }
-
- // Shadow stack maintenance can be replaced with
- // stack unwinding during trace switch (which presumably must be faster).
+ DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.sid(), (void *)pc);
+ if (UNLIKELY(!TryTraceFunc(thr, pc)))
+ return TraceRestartFuncEntry(thr, pc);
DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -774,12 +745,9 @@ void FuncEntry(ThreadState *thr, uptr pc) {
ALWAYS_INLINE
void FuncExit(ThreadState *thr) {
- DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
- }
-
+ DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.sid());
+ if (UNLIKELY(!TryTraceFunc(thr, 0)))
+ return TraceRestartFuncExit(thr);
DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -791,7 +759,6 @@ void FuncExit(ThreadState *thr) {
extern void (*on_initialize)(void);
extern int (*on_finalize)(int);
#endif
-
} // namespace __tsan
#endif // TSAN_RTL_H
diff --git a/libsanitizer/tsan/tsan_rtl_access.cpp b/libsanitizer/tsan/tsan_rtl_access.cpp
index 7365fda..7d771bfa 100644
--- a/libsanitizer/tsan/tsan_rtl_access.cpp
+++ b/libsanitizer/tsan/tsan_rtl_access.cpp
@@ -15,15 +15,13 @@
namespace __tsan {
-namespace v3 {
-
-ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
+ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
uptr addr, uptr size,
AccessType typ) {
DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
if (!kCollectHistory)
return true;
- EventAccess *ev;
+ EventAccess* ev;
if (UNLIKELY(!TraceAcquire(thr, &ev)))
return false;
u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
@@ -40,25 +38,27 @@ ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
TraceRelease(thr, ev);
return true;
}
- auto *evex = reinterpret_cast<EventAccessExt *>(ev);
+ auto* evex = reinterpret_cast<EventAccessExt*>(ev);
evex->is_access = 0;
evex->is_func = 0;
evex->type = EventType::kAccessExt;
evex->is_read = !!(typ & kAccessRead);
evex->is_atomic = !!(typ & kAccessAtomic);
evex->size_log = size_log;
+ // Note: this is important, see comment in EventAccessExt.
+ evex->_ = 0;
evex->addr = CompressAddr(addr);
evex->pc = pc;
TraceRelease(thr, evex);
return true;
}
-ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
- uptr addr, uptr size,
- AccessType typ) {
+ALWAYS_INLINE
+bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
+ AccessType typ) {
if (!kCollectHistory)
return true;
- EventAccessRange *ev;
+ EventAccessRange* ev;
if (UNLIKELY(!TraceAcquire(thr, &ev)))
return false;
thr->trace_prev_pc = pc;
@@ -75,7 +75,7 @@ ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
return true;
}
-void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
AccessType typ) {
if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
return;
@@ -84,7 +84,7 @@ void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
DCHECK(res);
}
-void TraceFunc(ThreadState *thr, uptr pc) {
+void TraceFunc(ThreadState* thr, uptr pc) {
if (LIKELY(TryTraceFunc(thr, pc)))
return;
TraceSwitchPart(thr);
@@ -92,7 +92,17 @@ void TraceFunc(ThreadState *thr, uptr pc) {
DCHECK(res);
}
-void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
+NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
+ TraceSwitchPart(thr);
+ FuncEntry(thr, pc);
+}
+
+NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
+ TraceSwitchPart(thr);
+ FuncExit(thr);
+}
+
+void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
StackID stk) {
DCHECK(type == EventType::kLock || type == EventType::kRLock);
if (!kCollectHistory)
@@ -109,7 +119,7 @@ void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
TraceEvent(thr, ev);
}
-void TraceMutexUnlock(ThreadState *thr, uptr addr) {
+void TraceMutexUnlock(ThreadState* thr, uptr addr) {
if (!kCollectHistory)
return;
EventUnlock ev;
@@ -121,396 +131,523 @@ void TraceMutexUnlock(ThreadState *thr, uptr addr) {
TraceEvent(thr, ev);
}
-void TraceTime(ThreadState *thr) {
+void TraceTime(ThreadState* thr) {
if (!kCollectHistory)
return;
+ FastState fast_state = thr->fast_state;
EventTime ev;
ev.is_access = 0;
ev.is_func = 0;
ev.type = EventType::kTime;
- ev.sid = static_cast<u64>(thr->sid);
- ev.epoch = static_cast<u64>(thr->epoch);
+ ev.sid = static_cast<u64>(fast_state.sid());
+ ev.epoch = static_cast<u64>(fast_state.epoch());
ev._ = 0;
TraceEvent(thr, ev);
}
-} // namespace v3
+ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) {
+ return static_cast<RawShadow>(
+ atomic_load((atomic_uint32_t*)p, memory_order_relaxed));
+}
-ALWAYS_INLINE
-Shadow LoadShadow(u64 *p) {
- u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed);
- return Shadow(raw);
+ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) {
+ atomic_store((atomic_uint32_t*)sp, static_cast<u32>(s), memory_order_relaxed);
}
-ALWAYS_INLINE
-void StoreShadow(u64 *sp, u64 s) {
- atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed);
+NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
+ Shadow old,
+ AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+ // For the free shadow markers the first element (that contains kFreeSid)
+ // triggers the race, but the second element contains info about the freeing
+ // thread, take it.
+ if (old.sid() == kFreeSid)
+ old = Shadow(LoadShadow(&shadow_mem[1]));
+ // This prevents trapping on this address in future.
+ for (uptr i = 0; i < kShadowCnt; i++)
+ StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
+ // See the comment in MemoryRangeFreed as to why the slot is locked
+ // for free memory accesses. ReportRace must not be called with
+ // the slot locked because of the fork. But MemoryRangeFreed is not
+ // called during fork because fork sets ignore_reads_and_writes,
+ // so simply unlocking the slot should be fine.
+ if (typ & kAccessSlotLocked)
+ SlotUnlock(thr);
+ ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
+ if (typ & kAccessSlotLocked)
+ SlotLock(thr);
}
+#if !TSAN_VECTORIZE
ALWAYS_INLINE
-void StoreIfNotYetStored(u64 *sp, u64 *s) {
- StoreShadow(sp, *s);
- *s = 0;
+bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
+ AccessType typ) {
+ for (uptr i = 0; i < kShadowCnt; i++) {
+ auto old = LoadShadow(&s[i]);
+ if (!(typ & kAccessRead)) {
+ if (old == cur.raw())
+ return true;
+ continue;
+ }
+ auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
+ static_cast<u32>(Shadow::kRodata));
+ if (masked == cur.raw())
+ return true;
+ if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
+ if (old == Shadow::kRodata)
+ return true;
+ }
+ }
+ return false;
}
-extern "C" void __tsan_report_race();
-
ALWAYS_INLINE
-void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) {
- thr->racy_state[0] = cur.raw();
- thr->racy_state[1] = old.raw();
- thr->racy_shadow_addr = shadow_mem;
-#if !SANITIZER_GO
- HACKY_CALL(__tsan_report_race);
-#else
- ReportRace(thr);
-#endif
+bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
+ int unused0, int unused1, AccessType typ) {
+ bool stored = false;
+ for (uptr idx = 0; idx < kShadowCnt; idx++) {
+ RawShadow* sp = &shadow_mem[idx];
+ Shadow old(LoadShadow(sp));
+ if (LIKELY(old.raw() == Shadow::kEmpty)) {
+ if (!(typ & kAccessCheckOnly) && !stored)
+ StoreShadow(sp, cur.raw());
+ return false;
+ }
+ if (LIKELY(!(cur.access() & old.access())))
+ continue;
+ if (LIKELY(cur.sid() == old.sid())) {
+ if (!(typ & kAccessCheckOnly) &&
+ LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
+ StoreShadow(sp, cur.raw());
+ stored = true;
+ }
+ continue;
+ }
+ if (LIKELY(old.IsBothReadsOrAtomic(typ)))
+ continue;
+ if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
+ continue;
+ DoReportRace(thr, shadow_mem, cur, old, typ);
+ return true;
+ }
+ // We did not find any races and had already stored
+ // the current access info, so we are done.
+ if (LIKELY(stored))
+ return false;
+ // Choose a random candidate slot and replace it.
+ uptr index =
+ atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
+ StoreShadow(&shadow_mem[index], cur.raw());
+ return false;
}
-static inline bool HappensBefore(Shadow old, ThreadState *thr) {
- return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
-}
+# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
-ALWAYS_INLINE
-void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog,
- bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem,
- Shadow cur) {
- // This potentially can live in an MMX/SSE scratch register.
- // The required intrinsics are:
- // __m128i _mm_move_epi64(__m128i*);
- // _mm_storel_epi64(u64*, __m128i);
- u64 store_word = cur.raw();
- bool stored = false;
+#else /* !TSAN_VECTORIZE */
- // scan all the shadow values and dispatch to 4 categories:
- // same, replace, candidate and race (see comments below).
- // we consider only 3 cases regarding access sizes:
- // equal, intersect and not intersect. initially I considered
- // larger and smaller as well, it allowed to replace some
- // 'candidates' with 'same' or 'replace', but I think
- // it's just not worth it (performance- and complexity-wise).
-
- Shadow old(0);
-
- // It release mode we manually unroll the loop,
- // because empirically gcc generates better code this way.
- // However, we can't afford unrolling in debug mode, because the function
- // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
- // threads, which is not enough for the unrolled loop.
-#if SANITIZER_DEBUG
- for (int idx = 0; idx < 4; idx++) {
-# include "tsan_update_shadow_word.inc"
- }
-#else
- int idx = 0;
-# include "tsan_update_shadow_word.inc"
- idx = 1;
- if (stored) {
-# include "tsan_update_shadow_word.inc"
- } else {
-# include "tsan_update_shadow_word.inc"
- }
- idx = 2;
- if (stored) {
-# include "tsan_update_shadow_word.inc"
- } else {
-# include "tsan_update_shadow_word.inc"
+ALWAYS_INLINE
+bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
+ m128 access, AccessType typ) {
+ // Note: we could check if there is a larger access of the same type,
+ // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
+ // and now do smaller reads/writes, these can also be considered as "same
+ // access". However, it will make the check more expensive, so it's unclear
+ // if it's worth it. But this would conserve trace space, so it's useful
+ // besides potential speed up.
+ if (!(typ & kAccessRead)) {
+ const m128 same = _mm_cmpeq_epi32(shadow, access);
+ return _mm_movemask_epi8(same);
}
- idx = 3;
- if (stored) {
-# include "tsan_update_shadow_word.inc"
- } else {
-# include "tsan_update_shadow_word.inc"
+ // For reads we need to reset read bit in the shadow,
+ // because we need to match read with both reads and writes.
+ // Shadow::kRodata has only read bit set, so it does what we want.
+ // We also abuse it for rodata check to save few cycles
+ // since we already loaded Shadow::kRodata into a register.
+ // Reads from rodata can't race.
+ // Measurements show that they can be 10-20% of all memory accesses.
+ // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
+ // (thread epochs start from 1). So the same read bit mask
+ // serves as rodata indicator.
+ const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
+ const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
+ m128 same = _mm_cmpeq_epi32(masked_shadow, access);
+ // Range memory accesses check Shadow::kRodata before calling this,
+ // Shadow::kRodatas is not possible for free memory access
+ // and Go does not use Shadow::kRodata.
+ if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
+ const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
+ same = _mm_or_si128(ro, same);
}
-#endif
-
- // we did not find any races and had already stored
- // the current access info, so we are done
- if (LIKELY(stored))
- return;
- // choose a random candidate slot and replace it
- StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
- return;
-RACE:
- HandleRace(thr, shadow_mem, cur, old);
- return;
+ return _mm_movemask_epi8(same);
}
-void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
- AccessType typ) {
- DCHECK(!(typ & kAccessAtomic));
- const bool kAccessIsWrite = !(typ & kAccessRead);
- const bool kIsAtomic = false;
- while (size) {
- int size1 = 1;
- int kAccessSizeLog = kSizeLog1;
- if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
- size1 = 8;
- kAccessSizeLog = kSizeLog8;
- } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
- size1 = 4;
- kAccessSizeLog = kSizeLog4;
- } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
- size1 = 2;
- kAccessSizeLog = kSizeLog2;
- }
- MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
- addr += size1;
- size -= size1;
+NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
+ u32 race_mask, m128 shadow, AccessType typ) {
+ // race_mask points which of the shadow elements raced with the current
+ // access. Extract that element.
+ CHECK_NE(race_mask, 0);
+ u32 old;
+ // Note: _mm_extract_epi32 index must be a constant value.
+ switch (__builtin_ffs(race_mask) / 4) {
+ case 0:
+ old = _mm_extract_epi32(shadow, 0);
+ break;
+ case 1:
+ old = _mm_extract_epi32(shadow, 1);
+ break;
+ case 2:
+ old = _mm_extract_epi32(shadow, 2);
+ break;
+ case 3:
+ old = _mm_extract_epi32(shadow, 3);
+ break;
}
+ Shadow prev(static_cast<RawShadow>(old));
+ // For the free shadow markers the first element (that contains kFreeSid)
+ // triggers the race, but the second element contains info about the freeing
+ // thread, take it.
+ if (prev.sid() == kFreeSid)
+ prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
+ DoReportRace(thr, shadow_mem, cur, prev, typ);
}
ALWAYS_INLINE
-bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
- Shadow cur(a);
- for (uptr i = 0; i < kShadowCnt; i++) {
- Shadow old(LoadShadow(&s[i]));
- if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
- old.TidWithIgnore() == cur.TidWithIgnore() &&
- old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() &&
- old.IsRead() <= cur.IsRead())
- return true;
+bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
+ m128 shadow, m128 access, AccessType typ) {
+ // Note: empty/zero slots don't intersect with any access.
+ const m128 zero = _mm_setzero_si128();
+ const m128 mask_access = _mm_set1_epi32(0x000000ff);
+ const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
+ const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
+ const m128 access_and = _mm_and_si128(access, shadow);
+ const m128 access_xor = _mm_xor_si128(access, shadow);
+ const m128 intersect = _mm_and_si128(access_and, mask_access);
+ const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
+ const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
+ const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
+ const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
+ const m128 no_race =
+ _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
+ const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
+ if (UNLIKELY(race_mask))
+ goto SHARED;
+
+STORE : {
+ if (typ & kAccessCheckOnly)
+ return false;
+ // We could also replace different sid's if access is the same,
+ // rw weaker and happens before. However, just checking access below
+ // is not enough because we also need to check that !both_read_or_atomic
+ // (reads from different sids can be concurrent).
+ // Theoretically we could replace smaller accesses with larger accesses,
+ // but it's unclear if it's worth doing.
+ const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
+ const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
+ const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
+ const m128 access_read_atomic =
+ _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
+ const m128 rw_weaker =
+ _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
+ const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
+ const int rewrite_mask = _mm_movemask_epi8(rewrite);
+ int index = __builtin_ffs(rewrite_mask);
+ if (UNLIKELY(index == 0)) {
+ const m128 empty = _mm_cmpeq_epi32(shadow, zero);
+ const int empty_mask = _mm_movemask_epi8(empty);
+ index = __builtin_ffs(empty_mask);
+ if (UNLIKELY(index == 0))
+ index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
}
+ StoreShadow(&shadow_mem[index / 4], cur.raw());
+ // We could zero other slots determined by rewrite_mask.
+ // That would help other threads to evict better slots,
+ // but it's unclear if it's worth it.
return false;
}
-#if TSAN_VECTORIZE
-# define SHUF(v0, v1, i0, i1, i2, i3) \
- _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
- _mm_castsi128_ps(v1), \
- (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
-ALWAYS_INLINE
-bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
- // This is an optimized version of ContainsSameAccessSlow.
- // load current access into access[0:63]
- const m128 access = _mm_cvtsi64_si128(a);
- // duplicate high part of access in addr0:
- // addr0[0:31] = access[32:63]
- // addr0[32:63] = access[32:63]
- // addr0[64:95] = access[32:63]
- // addr0[96:127] = access[32:63]
- const m128 addr0 = SHUF(access, access, 1, 1, 1, 1);
- // load 4 shadow slots
- const m128 shadow0 = _mm_load_si128((__m128i *)s);
- const m128 shadow1 = _mm_load_si128((__m128i *)s + 1);
- // load high parts of 4 shadow slots into addr_vect:
- // addr_vect[0:31] = shadow0[32:63]
- // addr_vect[32:63] = shadow0[96:127]
- // addr_vect[64:95] = shadow1[32:63]
- // addr_vect[96:127] = shadow1[96:127]
- m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3);
- if (!is_write) {
- // set IsRead bit in addr_vect
- const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15);
- const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
- addr_vect = _mm_or_si128(addr_vect, rw_mask);
- }
- // addr0 == addr_vect?
- const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect);
- // epoch1[0:63] = sync_epoch
- const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch);
- // epoch[0:31] = sync_epoch[0:31]
- // epoch[32:63] = sync_epoch[0:31]
- // epoch[64:95] = sync_epoch[0:31]
- // epoch[96:127] = sync_epoch[0:31]
- const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0);
- // load low parts of shadow cell epochs into epoch_vect:
- // epoch_vect[0:31] = shadow0[0:31]
- // epoch_vect[32:63] = shadow0[64:95]
- // epoch_vect[64:95] = shadow1[0:31]
- // epoch_vect[96:127] = shadow1[64:95]
- const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
- // epoch_vect >= sync_epoch?
- const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch);
- // addr_res & epoch_res
- const m128 res = _mm_and_si128(addr_res, epoch_res);
- // mask[0] = res[7]
- // mask[1] = res[15]
- // ...
- // mask[15] = res[127]
- const int mask = _mm_movemask_epi8(res);
- return mask != 0;
+SHARED:
+ m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
+ // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
+ // indexes must be constants.
+# define LOAD_EPOCH(idx) \
+ if (LIKELY(race_mask & (1 << (idx * 4)))) { \
+ u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
+ u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
+ thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
+ }
+ LOAD_EPOCH(0);
+ LOAD_EPOCH(1);
+ LOAD_EPOCH(2);
+ LOAD_EPOCH(3);
+# undef LOAD_EPOCH
+ const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
+ const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
+ const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
+ const int concurrent_mask = _mm_movemask_epi8(concurrent);
+ if (LIKELY(concurrent_mask == 0))
+ goto STORE;
+
+ DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
+ return true;
}
-#endif
-ALWAYS_INLINE
-bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
-#if TSAN_VECTORIZE
- bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
- // NOTE: this check can fail if the shadow is concurrently mutated
- // by other threads. But it still can be useful if you modify
- // ContainsSameAccessFast and want to ensure that it's not completely broken.
- // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
- return res;
-#else
- return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
+ const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
+ const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
#endif
-}
-ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
- int kAccessSizeLog, bool kAccessIsWrite,
- bool kIsAtomic) {
- RawShadow *shadow_mem = MemToShadow(addr);
- DPrintf2(
- "#%d: MemoryAccess: @%p %p size=%d"
- " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
- (int)thr->fast_state.tid(), (void *)pc, (void *)addr,
- (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
- (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2],
- (uptr)shadow_mem[3]);
-#if SANITIZER_DEBUG
- if (!IsAppMem(addr)) {
- Printf("Access to non app mem %zx\n", addr);
- DCHECK(IsAppMem(addr));
+char* DumpShadow(char* buf, RawShadow raw) {
+ if (raw == Shadow::kEmpty) {
+ internal_snprintf(buf, 64, "0");
+ return buf;
}
- if (!IsShadowMem(shadow_mem)) {
- Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
- DCHECK(IsShadowMem(shadow_mem));
- }
-#endif
+ Shadow s(raw);
+ AccessType typ;
+ s.GetAccess(nullptr, nullptr, &typ);
+ internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
+ static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
+ s.access(), static_cast<u32>(typ));
+ return buf;
+}
- if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
- // Access to .rodata section, no races here.
- // Measurements show that it can be 10-20% of all memory accesses.
- return;
- }
+// TryTrace* and TraceRestart* functions allow to turn memory access and func
+// entry/exit callbacks into leaf functions with all associated performance
+// benefits. These hottest callbacks do only 2 slow path calls: report a race
+// and trace part switching. Race reporting is easy to turn into a tail call, we
+// just always return from the runtime after reporting a race. But trace part
+// switching is harder because it needs to be in the middle of callbacks. To
+// turn it into a tail call we immidiately return after TraceRestart* functions,
+// but TraceRestart* functions themselves recurse into the callback after
+// switching trace part. As the result the hottest callbacks contain only tail
+// calls, which effectively makes them leaf functions (can use all registers,
+// no frame setup, etc).
+NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
+ uptr size, AccessType typ) {
+ TraceSwitchPart(thr);
+ MemoryAccess(thr, pc, addr, size, typ);
+}
+
+ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
+ uptr size, AccessType typ) {
+ RawShadow* shadow_mem = MemToShadow(addr);
+ UNUSED char memBuf[4][64];
+ DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
+ static_cast<int>(thr->fast_state.sid()),
+ static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
+ static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
+ DumpShadow(memBuf[1], shadow_mem[1]),
+ DumpShadow(memBuf[2], shadow_mem[2]),
+ DumpShadow(memBuf[3], shadow_mem[3]));
FastState fast_state = thr->fast_state;
- if (UNLIKELY(fast_state.GetIgnoreBit())) {
+ Shadow cur(fast_state, addr, size, typ);
+
+ LOAD_CURRENT_SHADOW(cur, shadow_mem);
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
return;
- }
+ if (UNLIKELY(fast_state.GetIgnoreBit()))
+ return;
+ if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
+ return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
+ CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
+}
- Shadow cur(fast_state);
- cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
- cur.SetWrite(kAccessIsWrite);
- cur.SetAtomic(kIsAtomic);
+void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
- if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
- kAccessIsWrite))) {
- return;
- }
+NOINLINE
+void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
+ AccessType typ) {
+ TraceSwitchPart(thr);
+ MemoryAccess16(thr, pc, addr, typ);
+}
- if (kCollectHistory) {
- fast_state.IncrementEpoch();
- thr->fast_state = fast_state;
- TraceAddEvent(thr, fast_state, EventTypeMop, pc);
- cur.IncrementEpoch();
+ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
+ AccessType typ) {
+ const uptr size = 16;
+ FastState fast_state = thr->fast_state;
+ if (UNLIKELY(fast_state.GetIgnoreBit()))
+ return;
+ Shadow cur(fast_state, 0, 8, typ);
+ RawShadow* shadow_mem = MemToShadow(addr);
+ bool traced = false;
+ {
+ LOAD_CURRENT_SHADOW(cur, shadow_mem);
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
+ goto SECOND;
+ if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
+ return RestartMemoryAccess16(thr, pc, addr, typ);
+ traced = true;
+ if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
+ return;
}
+SECOND:
+ shadow_mem += kShadowCnt;
+ LOAD_CURRENT_SHADOW(cur, shadow_mem);
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
+ return;
+ if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
+ return RestartMemoryAccess16(thr, pc, addr, typ);
+ CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
+}
- MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
- shadow_mem, cur);
+NOINLINE
+void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
+ uptr size, AccessType typ) {
+ TraceSwitchPart(thr);
+ UnalignedMemoryAccess(thr, pc, addr, size, typ);
}
-// Called by MemoryAccessRange in tsan_rtl_thread.cpp
-ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr,
- int kAccessSizeLog,
- bool kAccessIsWrite, bool kIsAtomic,
- u64 *shadow_mem, Shadow cur) {
- if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
- kAccessIsWrite))) {
+ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
+ uptr addr, uptr size,
+ AccessType typ) {
+ DCHECK_LE(size, 8);
+ FastState fast_state = thr->fast_state;
+ if (UNLIKELY(fast_state.GetIgnoreBit()))
return;
+ RawShadow* shadow_mem = MemToShadow(addr);
+ bool traced = false;
+ uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
+ {
+ Shadow cur(fast_state, addr, size1, typ);
+ LOAD_CURRENT_SHADOW(cur, shadow_mem);
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
+ goto SECOND;
+ if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
+ return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
+ traced = true;
+ if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
+ return;
}
+SECOND:
+ uptr size2 = size - size1;
+ if (LIKELY(size2 == 0))
+ return;
+ shadow_mem += kShadowCnt;
+ Shadow cur(fast_state, 0, size2, typ);
+ LOAD_CURRENT_SHADOW(cur, shadow_mem);
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
+ return;
+ if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
+ return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
+ CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
+}
- MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
- shadow_mem, cur);
+void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
+ DCHECK_LE(p, end);
+ DCHECK(IsShadowMem(p));
+ DCHECK(IsShadowMem(end));
+ UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
+ DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
+ DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
+#if !TSAN_VECTORIZE
+ for (; p < end; p += kShadowCnt) {
+ p[0] = v;
+ for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
+ }
+#else
+ m128 vv = _mm_setr_epi32(
+ static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
+ static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
+ m128* vp = reinterpret_cast<m128*>(p);
+ m128* vend = reinterpret_cast<m128*>(end);
+ for (; vp < vend; vp++) _mm_store_si128(vp, vv);
+#endif
}
-static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
- u64 val) {
- (void)thr;
- (void)pc;
+static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
if (size == 0)
return;
- // FIXME: fix me.
- uptr offset = addr % kShadowCell;
- if (offset) {
- offset = kShadowCell - offset;
- if (size <= offset)
- return;
- addr += offset;
- size -= offset;
- }
- DCHECK_EQ(addr % 8, 0);
+ DCHECK_EQ(addr % kShadowCell, 0);
+ DCHECK_EQ(size % kShadowCell, 0);
// If a user passes some insane arguments (memset(0)),
// let it just crash as usual.
if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
return;
+ RawShadow* begin = MemToShadow(addr);
+ RawShadow* end = begin + size / kShadowCell * kShadowCnt;
// Don't want to touch lots of shadow memory.
// If a program maps 10MB stack, there is no need reset the whole range.
- size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
// UnmapOrDie/MmapFixedNoReserve does not work on Windows.
- if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
- RawShadow *p = MemToShadow(addr);
- CHECK(IsShadowMem(p));
- CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1));
- // FIXME: may overwrite a part outside the region
- for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
- p[i++] = val;
- for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0;
- }
- } else {
- // The region is big, reset only beginning and end.
- const uptr kPageSize = GetPageSizeCached();
- RawShadow *begin = MemToShadow(addr);
- RawShadow *end = begin + size / kShadowCell * kShadowCnt;
- RawShadow *p = begin;
- // Set at least first kPageSize/2 to page boundary.
- while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
- *p++ = val;
- for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
- }
- // Reset middle part.
- RawShadow *p1 = p;
- p = RoundDown(end, kPageSize);
- if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
+ if (SANITIZER_WINDOWS ||
+ size <= common_flags()->clear_shadow_mmap_threshold) {
+ ShadowSet(begin, end, val);
+ return;
+ }
+ // The region is big, reset only beginning and end.
+ const uptr kPageSize = GetPageSizeCached();
+ // Set at least first kPageSize/2 to page boundary.
+ RawShadow* mid1 =
+ Min(end, reinterpret_cast<RawShadow*>(RoundUp(
+ reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
+ ShadowSet(begin, mid1, val);
+ // Reset middle part.
+ RawShadow* mid2 = RoundDown(end, kPageSize);
+ if (mid2 > mid1) {
+ if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
Die();
- // Set the ending.
- while (p < end) {
- *p++ = val;
- for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
- }
}
+ // Set the ending.
+ ShadowSet(mid2, end, val);
}
-void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
- MemoryRangeSet(thr, pc, addr, size, 0);
+void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
+ uptr addr1 = RoundDown(addr, kShadowCell);
+ uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
+ MemoryRangeSet(addr1, size1, Shadow::kEmpty);
}
-void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
- // Processing more than 1k (4k of shadow) is expensive,
+void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
+ // Callers must lock the slot to ensure synchronization with the reset.
+ // The problem with "freed" memory is that it's not "monotonic"
+ // with respect to bug detection: freed memory is bad to access,
+ // but then if the heap block is reallocated later, it's good to access.
+ // As the result a garbage "freed" shadow can lead to a false positive
+ // if it happens to match a real free in the thread trace,
+ // but the heap block was reallocated before the current memory access,
+ // so it's still good to access. It's not the case with data races.
+ DCHECK(thr->slot_locked);
+ DCHECK_EQ(addr % kShadowCell, 0);
+ size = RoundUp(size, kShadowCell);
+ // Processing more than 1k (2k of shadow) is expensive,
// can cause excessive memory consumption (user does not necessary touch
// the whole range) and most likely unnecessary.
- if (size > 1024)
- size = 1024;
- CHECK_EQ(thr->is_freeing, false);
- thr->is_freeing = true;
- MemoryAccessRange(thr, pc, addr, size, true);
- thr->is_freeing = false;
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+ size = Min<uptr>(size, 1024);
+ const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
+ kAccessCheckOnly | kAccessNoRodata;
+ TraceMemoryAccessRange(thr, pc, addr, size, typ);
+ RawShadow* shadow_mem = MemToShadow(addr);
+ Shadow cur(thr->fast_state, 0, kShadowCell, typ);
+#if TSAN_VECTORIZE
+ const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
+ const m128 freed = _mm_setr_epi32(
+ static_cast<u32>(Shadow::FreedMarker()),
+ static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
+ for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
+ const m128 shadow = _mm_load_si128((m128*)shadow_mem);
+ if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
+ return;
+ _mm_store_si128((m128*)shadow_mem, freed);
}
- Shadow s(thr->fast_state);
- s.ClearIgnoreBit();
- s.MarkAsFreed();
- s.SetWrite(true);
- s.SetAddr0AndSizeLog(0, 3);
- MemoryRangeSet(thr, pc, addr, size, s.raw());
-}
-
-void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+#else
+ for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
+ if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
+ return;
+ StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
+ StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
+ StoreShadow(&shadow_mem[2], Shadow::kEmpty);
+ StoreShadow(&shadow_mem[3], Shadow::kEmpty);
}
- Shadow s(thr->fast_state);
- s.ClearIgnoreBit();
- s.SetWrite(true);
- s.SetAddr0AndSizeLog(0, 3);
- MemoryRangeSet(thr, pc, addr, size, s.raw());
+#endif
+}
+
+void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
+ DCHECK_EQ(addr % kShadowCell, 0);
+ size = RoundUp(size, kShadowCell);
+ TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
+ Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
+ MemoryRangeSet(addr, size, cur.raw());
}
-void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
+void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
uptr size) {
if (thr->ignore_reads_and_writes == 0)
MemoryRangeImitateWrite(thr, pc, addr, size);
@@ -518,14 +655,29 @@ void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
MemoryResetRange(thr, pc, addr, size);
}
-void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
- bool is_write) {
- if (size == 0)
- return;
+ALWAYS_INLINE
+bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
+ AccessType typ) {
+ LOAD_CURRENT_SHADOW(cur, shadow_mem);
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
+ return false;
+ return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
+}
+
+template <bool is_read>
+NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
+ uptr size) {
+ TraceSwitchPart(thr);
+ MemoryAccessRangeT<is_read>(thr, pc, addr, size);
+}
- RawShadow *shadow_mem = MemToShadow(addr);
- DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid,
- (void *)pc, (void *)addr, (int)size, is_write);
+template <bool is_read>
+void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
+ const AccessType typ =
+ (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
+ RawShadow* shadow_mem = MemToShadow(addr);
+ DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
+ (void*)pc, (void*)addr, (int)size, is_read);
#if SANITIZER_DEBUG
if (!IsAppMem(addr)) {
@@ -537,65 +689,62 @@ void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
DCHECK(IsAppMem(addr + size - 1));
}
if (!IsShadowMem(shadow_mem)) {
- Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+ Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr);
DCHECK(IsShadowMem(shadow_mem));
}
- if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) {
- Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1,
+ if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) {
+ Printf("Bad shadow addr %p (%zx)\n",
+ static_cast<void*>(shadow_mem + size * kShadowCnt - 1),
addr + size - 1);
- DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1));
+ DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1));
}
#endif
- if (*shadow_mem == kShadowRodata) {
- DCHECK(!is_write);
- // Access to .rodata section, no races here.
- // Measurements show that it can be 10-20% of all memory accesses.
+ // Access to .rodata section, no races here.
+ // Measurements show that it can be 10-20% of all memory accesses.
+ // Check here once to not check for every access separately.
+ // Note: we could (and should) do this only for the is_read case
+ // (writes shouldn't go to .rodata). But it happens in Chromium tests:
+ // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
+ // Details are unknown since it happens only on CI machines.
+ if (*shadow_mem == Shadow::kRodata)
return;
- }
FastState fast_state = thr->fast_state;
- if (fast_state.GetIgnoreBit())
+ if (UNLIKELY(fast_state.GetIgnoreBit()))
return;
- fast_state.IncrementEpoch();
- thr->fast_state = fast_state;
- TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+ if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
+ return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
- bool unaligned = (addr % kShadowCell) != 0;
-
- // Handle unaligned beginning, if any.
- for (; addr % kShadowCell && size; addr++, size--) {
- int const kAccessSizeLog = 0;
- Shadow cur(fast_state);
- cur.SetWrite(is_write);
- cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
- MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
- cur);
- }
- if (unaligned)
+ if (UNLIKELY(addr % kShadowCell)) {
+ // Handle unaligned beginning, if any.
+ uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
+ size -= size1;
+ Shadow cur(fast_state, addr, size1, typ);
+ if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
+ return;
shadow_mem += kShadowCnt;
+ }
// Handle middle part, if any.
- for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
- int const kAccessSizeLog = 3;
- Shadow cur(fast_state);
- cur.SetWrite(is_write);
- cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
- MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
- cur);
- shadow_mem += kShadowCnt;
+ Shadow cur(fast_state, 0, kShadowCell, typ);
+ for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
+ if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
+ return;
}
// Handle ending, if any.
- for (; size; addr++, size--) {
- int const kAccessSizeLog = 0;
- Shadow cur(fast_state);
- cur.SetWrite(is_write);
- cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
- MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
- cur);
+ if (UNLIKELY(size)) {
+ Shadow cur(fast_state, 0, size, typ);
+ if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
+ return;
}
}
+template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
+ uptr size);
+template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
+ uptr size);
+
} // namespace __tsan
#if !SANITIZER_GO
diff --git a/libsanitizer/tsan/tsan_rtl_amd64.S b/libsanitizer/tsan/tsan_rtl_amd64.S
index c15b01e..f848be9 100644
--- a/libsanitizer/tsan/tsan_rtl_amd64.S
+++ b/libsanitizer/tsan/tsan_rtl_amd64.S
@@ -9,242 +9,6 @@
.section __TEXT,__text
#endif
-ASM_HIDDEN(__tsan_trace_switch)
-.globl ASM_SYMBOL(__tsan_trace_switch_thunk)
-ASM_SYMBOL(__tsan_trace_switch_thunk):
- CFI_STARTPROC
- _CET_ENDBR
- # Save scratch registers.
- push %rax
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rax, 0)
- push %rcx
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rcx, 0)
- push %rdx
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rdx, 0)
- push %rsi
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rsi, 0)
- push %rdi
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rdi, 0)
- push %r8
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r8, 0)
- push %r9
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r9, 0)
- push %r10
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r10, 0)
- push %r11
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r11, 0)
- # All XMM registers are caller-saved.
- sub $0x100, %rsp
- CFI_ADJUST_CFA_OFFSET(0x100)
- movdqu %xmm0, 0x0(%rsp)
- movdqu %xmm1, 0x10(%rsp)
- movdqu %xmm2, 0x20(%rsp)
- movdqu %xmm3, 0x30(%rsp)
- movdqu %xmm4, 0x40(%rsp)
- movdqu %xmm5, 0x50(%rsp)
- movdqu %xmm6, 0x60(%rsp)
- movdqu %xmm7, 0x70(%rsp)
- movdqu %xmm8, 0x80(%rsp)
- movdqu %xmm9, 0x90(%rsp)
- movdqu %xmm10, 0xa0(%rsp)
- movdqu %xmm11, 0xb0(%rsp)
- movdqu %xmm12, 0xc0(%rsp)
- movdqu %xmm13, 0xd0(%rsp)
- movdqu %xmm14, 0xe0(%rsp)
- movdqu %xmm15, 0xf0(%rsp)
- # Align stack frame.
- push %rbx # non-scratch
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rbx, 0)
- mov %rsp, %rbx # save current rsp
- CFI_DEF_CFA_REGISTER(%rbx)
- shr $4, %rsp # clear 4 lsb, align to 16
- shl $4, %rsp
-
- call ASM_SYMBOL(__tsan_trace_switch)
-
- # Unalign stack frame back.
- mov %rbx, %rsp # restore the original rsp
- CFI_DEF_CFA_REGISTER(%rsp)
- pop %rbx
- CFI_ADJUST_CFA_OFFSET(-8)
- # Restore scratch registers.
- movdqu 0x0(%rsp), %xmm0
- movdqu 0x10(%rsp), %xmm1
- movdqu 0x20(%rsp), %xmm2
- movdqu 0x30(%rsp), %xmm3
- movdqu 0x40(%rsp), %xmm4
- movdqu 0x50(%rsp), %xmm5
- movdqu 0x60(%rsp), %xmm6
- movdqu 0x70(%rsp), %xmm7
- movdqu 0x80(%rsp), %xmm8
- movdqu 0x90(%rsp), %xmm9
- movdqu 0xa0(%rsp), %xmm10
- movdqu 0xb0(%rsp), %xmm11
- movdqu 0xc0(%rsp), %xmm12
- movdqu 0xd0(%rsp), %xmm13
- movdqu 0xe0(%rsp), %xmm14
- movdqu 0xf0(%rsp), %xmm15
- add $0x100, %rsp
- CFI_ADJUST_CFA_OFFSET(-0x100)
- pop %r11
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %r10
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %r9
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %r8
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rdi
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rsi
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rdx
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rcx
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rax
- CFI_ADJUST_CFA_OFFSET(-8)
- CFI_RESTORE(%rax)
- CFI_RESTORE(%rbx)
- CFI_RESTORE(%rcx)
- CFI_RESTORE(%rdx)
- CFI_RESTORE(%rsi)
- CFI_RESTORE(%rdi)
- CFI_RESTORE(%r8)
- CFI_RESTORE(%r9)
- CFI_RESTORE(%r10)
- CFI_RESTORE(%r11)
- ret
- CFI_ENDPROC
-
-ASM_HIDDEN(__tsan_report_race)
-.globl ASM_SYMBOL(__tsan_report_race_thunk)
-ASM_SYMBOL(__tsan_report_race_thunk):
- CFI_STARTPROC
- _CET_ENDBR
- # Save scratch registers.
- push %rax
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rax, 0)
- push %rcx
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rcx, 0)
- push %rdx
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rdx, 0)
- push %rsi
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rsi, 0)
- push %rdi
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rdi, 0)
- push %r8
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r8, 0)
- push %r9
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r9, 0)
- push %r10
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r10, 0)
- push %r11
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%r11, 0)
- # All XMM registers are caller-saved.
- sub $0x100, %rsp
- CFI_ADJUST_CFA_OFFSET(0x100)
- movdqu %xmm0, 0x0(%rsp)
- movdqu %xmm1, 0x10(%rsp)
- movdqu %xmm2, 0x20(%rsp)
- movdqu %xmm3, 0x30(%rsp)
- movdqu %xmm4, 0x40(%rsp)
- movdqu %xmm5, 0x50(%rsp)
- movdqu %xmm6, 0x60(%rsp)
- movdqu %xmm7, 0x70(%rsp)
- movdqu %xmm8, 0x80(%rsp)
- movdqu %xmm9, 0x90(%rsp)
- movdqu %xmm10, 0xa0(%rsp)
- movdqu %xmm11, 0xb0(%rsp)
- movdqu %xmm12, 0xc0(%rsp)
- movdqu %xmm13, 0xd0(%rsp)
- movdqu %xmm14, 0xe0(%rsp)
- movdqu %xmm15, 0xf0(%rsp)
- # Align stack frame.
- push %rbx # non-scratch
- CFI_ADJUST_CFA_OFFSET(8)
- CFI_REL_OFFSET(%rbx, 0)
- mov %rsp, %rbx # save current rsp
- CFI_DEF_CFA_REGISTER(%rbx)
- shr $4, %rsp # clear 4 lsb, align to 16
- shl $4, %rsp
-
- call ASM_SYMBOL(__tsan_report_race)
-
- # Unalign stack frame back.
- mov %rbx, %rsp # restore the original rsp
- CFI_DEF_CFA_REGISTER(%rsp)
- pop %rbx
- CFI_ADJUST_CFA_OFFSET(-8)
- # Restore scratch registers.
- movdqu 0x0(%rsp), %xmm0
- movdqu 0x10(%rsp), %xmm1
- movdqu 0x20(%rsp), %xmm2
- movdqu 0x30(%rsp), %xmm3
- movdqu 0x40(%rsp), %xmm4
- movdqu 0x50(%rsp), %xmm5
- movdqu 0x60(%rsp), %xmm6
- movdqu 0x70(%rsp), %xmm7
- movdqu 0x80(%rsp), %xmm8
- movdqu 0x90(%rsp), %xmm9
- movdqu 0xa0(%rsp), %xmm10
- movdqu 0xb0(%rsp), %xmm11
- movdqu 0xc0(%rsp), %xmm12
- movdqu 0xd0(%rsp), %xmm13
- movdqu 0xe0(%rsp), %xmm14
- movdqu 0xf0(%rsp), %xmm15
- add $0x100, %rsp
- CFI_ADJUST_CFA_OFFSET(-0x100)
- pop %r11
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %r10
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %r9
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %r8
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rdi
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rsi
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rdx
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rcx
- CFI_ADJUST_CFA_OFFSET(-8)
- pop %rax
- CFI_ADJUST_CFA_OFFSET(-8)
- CFI_RESTORE(%rax)
- CFI_RESTORE(%rbx)
- CFI_RESTORE(%rcx)
- CFI_RESTORE(%rdx)
- CFI_RESTORE(%rsi)
- CFI_RESTORE(%rdi)
- CFI_RESTORE(%r8)
- CFI_RESTORE(%r9)
- CFI_RESTORE(%r10)
- CFI_RESTORE(%r11)
- ret
- CFI_ENDPROC
-
ASM_HIDDEN(__tsan_setjmp)
#if defined(__NetBSD__)
.comm _ZN14__interception15real___setjmp14E,8,8
diff --git a/libsanitizer/tsan/tsan_rtl_mutex.cpp b/libsanitizer/tsan/tsan_rtl_mutex.cpp
index 7d6b411..2e97885 100644
--- a/libsanitizer/tsan/tsan_rtl_mutex.cpp
+++ b/libsanitizer/tsan/tsan_rtl_mutex.cpp
@@ -23,6 +23,8 @@
namespace __tsan {
void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r);
+void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr,
+ FastState last_lock, StackID creation_stack_id);
struct Callback final : public DDCallback {
ThreadState *thr;
@@ -36,17 +38,17 @@ struct Callback final : public DDCallback {
}
StackID Unwind() override { return CurrentStackId(thr, pc); }
- int UniqueTid() override { return thr->unique_id; }
+ int UniqueTid() override { return thr->tid; }
};
void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s) {
Callback cb(thr, pc);
ctx->dd->MutexInit(&cb, &s->dd);
- s->dd.ctx = s->GetId();
+ s->dd.ctx = s->addr;
}
static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
- uptr addr, u64 mid) {
+ uptr addr, StackID creation_stack_id) {
// In Go, these misuses are either impossible, or detected by std lib,
// or false positives (e.g. unlock in a different thread).
if (SANITIZER_GO)
@@ -55,7 +57,7 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
return;
ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(typ);
- rep.AddMutex(mid);
+ rep.AddMutex(addr, creation_stack_id);
VarSizeStackTrace trace;
ObtainCurrentStack(thr, pc, &trace);
rep.AddStack(trace, true);
@@ -63,95 +65,94 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
OutputReport(thr, rep);
}
+static void RecordMutexLock(ThreadState *thr, uptr pc, uptr addr,
+ StackID stack_id, bool write) {
+ auto typ = write ? EventType::kLock : EventType::kRLock;
+ // Note: it's important to trace before modifying mutex set
+ // because tracing can switch trace part and we write the current
+ // mutex set in the beginning of each part.
+ // If we do it in the opposite order, we will write already reduced
+ // mutex set in the beginning of the part and then trace unlock again.
+ TraceMutexLock(thr, typ, pc, addr, stack_id);
+ thr->mset.AddAddr(addr, stack_id, write);
+}
+
+static void RecordMutexUnlock(ThreadState *thr, uptr addr) {
+ // See the comment in RecordMutexLock re order of operations.
+ TraceMutexUnlock(thr, addr);
+ thr->mset.DelAddr(addr);
+}
+
void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz);
- if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) {
- CHECK(!thr->is_freeing);
- thr->is_freeing = true;
+ if (!(flagz & MutexFlagLinkerInit) && pc && IsAppMem(addr))
MemoryAccess(thr, pc, addr, 1, kAccessWrite);
- thr->is_freeing = false;
- }
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- Lock l(&s->mtx);
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
s->SetFlags(flagz & MutexCreationFlagMask);
// Save stack in the case the sync object was created before as atomic.
- if (!SANITIZER_GO && s->creation_stack_id == 0)
+ if (!SANITIZER_GO && s->creation_stack_id == kInvalidStackID)
s->creation_stack_id = CurrentStackId(thr, pc);
}
void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr);
bool unlock_locked = false;
- u64 mid = 0;
- u64 last_lock = 0;
+ StackID creation_stack_id;
+ FastState last_lock;
{
- SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
- if (s == 0)
+ auto s = ctx->metamap.GetSyncIfExists(addr);
+ if (!s)
return;
- Lock l(&s->mtx);
- if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) ||
- ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) {
- // Destroy is no-op for linker-initialized mutexes.
- return;
- }
- if (common_flags()->detect_deadlocks) {
- Callback cb(thr, pc);
- ctx->dd->MutexDestroy(&cb, &s->dd);
- ctx->dd->MutexInit(&cb, &s->dd);
- }
- if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid &&
- !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- unlock_locked = true;
- }
- mid = s->GetId();
- last_lock = s->last_lock;
- if (!unlock_locked)
- s->Reset(thr->proc()); // must not reset it before the report is printed
- }
- if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) {
- ThreadRegistryLock l(&ctx->thread_registry);
- ScopedReport rep(ReportTypeMutexDestroyLocked);
- rep.AddMutex(mid);
- VarSizeStackTrace trace;
- ObtainCurrentStack(thr, pc, &trace);
- rep.AddStack(trace, true);
- FastState last(last_lock);
- RestoreStack(last.tid(), last.epoch(), &trace, 0);
- rep.AddStack(trace, true);
- rep.AddLocation(addr, 1);
- OutputReport(thr, rep);
-
- SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
- if (s != 0) {
- Lock l(&s->mtx);
- s->Reset(thr->proc());
+ SlotLocker locker(thr);
+ {
+ Lock lock(&s->mtx);
+ creation_stack_id = s->creation_stack_id;
+ last_lock = s->last_lock;
+ if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) ||
+ ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) {
+ // Destroy is no-op for linker-initialized mutexes.
+ return;
+ }
+ if (common_flags()->detect_deadlocks) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexDestroy(&cb, &s->dd);
+ ctx->dd->MutexInit(&cb, &s->dd);
+ }
+ if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid &&
+ !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ unlock_locked = true;
+ }
+ s->Reset();
}
+ // Imitate a memory write to catch unlock-destroy races.
+ if (pc && IsAppMem(addr))
+ MemoryAccess(thr, pc, addr, 1,
+ kAccessWrite | kAccessFree | kAccessSlotLocked);
}
- thr->mset.Remove(mid);
- // Imitate a memory write to catch unlock-destroy races.
- // Do this outside of sync mutex, because it can report a race which locks
- // sync mutexes.
- if (IsAppMem(addr))
- MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree);
+ if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked))
+ ReportDestroyLocked(thr, pc, addr, last_lock, creation_stack_id);
+ thr->mset.DelAddr(addr, true);
// s will be destroyed and freed in MetaMap::FreeBlock.
}
void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
- if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- {
- ReadLock l(&s->mtx);
- s->UpdateFlags(flagz);
- if (s->owner_tid != thr->tid) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
- }
- }
- Callback cb(thr, pc);
- ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+ if (flagz & MutexFlagTryLock)
+ return;
+ if (!common_flags()->detect_deadlocks)
+ return;
+ Callback cb(thr, pc);
+ {
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ ReadLock lock(&s->mtx);
+ s->UpdateFlags(flagz);
+ if (s->owner_tid != thr->tid)
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
}
+ ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
}
void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) {
@@ -161,48 +162,51 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) {
CHECK_GT(rec, 0);
else
rec = 1;
- if (IsAppMem(addr))
+ if (pc && IsAppMem(addr))
MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
- u64 mid = 0;
+ bool report_double_lock = false;
bool pre_lock = false;
bool first = false;
- bool report_double_lock = false;
+ StackID creation_stack_id = kInvalidStackID;
{
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- Lock l(&s->mtx);
- s->UpdateFlags(flagz);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
- if (s->owner_tid == kInvalidTid) {
- CHECK_EQ(s->recursion, 0);
- s->owner_tid = thr->tid;
- s->last_lock = thr->fast_state.raw();
- } else if (s->owner_tid == thr->tid) {
- CHECK_GT(s->recursion, 0);
- } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_double_lock = true;
- }
- first = s->recursion == 0;
- s->recursion += rec;
- if (first) {
- AcquireImpl(thr, pc, &s->clock);
- AcquireImpl(thr, pc, &s->read_clock);
- } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) {
- }
- thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
- if (first && common_flags()->detect_deadlocks) {
- pre_lock =
- (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock);
- Callback cb(thr, pc);
- if (pre_lock)
- ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
- ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock);
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ creation_stack_id = s->creation_stack_id;
+ RecordMutexLock(thr, pc, addr, creation_stack_id, true);
+ {
+ Lock lock(&s->mtx);
+ first = s->recursion == 0;
+ s->UpdateFlags(flagz);
+ if (s->owner_tid == kInvalidTid) {
+ CHECK_EQ(s->recursion, 0);
+ s->owner_tid = thr->tid;
+ s->last_lock = thr->fast_state;
+ } else if (s->owner_tid == thr->tid) {
+ CHECK_GT(s->recursion, 0);
+ } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_double_lock = true;
+ }
+ s->recursion += rec;
+ if (first) {
+ if (!thr->ignore_sync) {
+ thr->clock.Acquire(s->clock);
+ thr->clock.Acquire(s->read_clock);
+ }
+ }
+ if (first && common_flags()->detect_deadlocks) {
+ pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) &&
+ !(flagz & MutexFlagTryLock);
+ Callback cb(thr, pc);
+ if (pre_lock)
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
+ ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock);
+ }
}
- mid = s->GetId();
}
if (report_double_lock)
- ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid);
+ ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr,
+ creation_stack_id);
if (first && pre_lock && common_flags()->detect_deadlocks) {
Callback cb(thr, pc);
ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
@@ -211,40 +215,47 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) {
int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz);
- if (IsAppMem(addr))
+ if (pc && IsAppMem(addr))
MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
- u64 mid = 0;
+ StackID creation_stack_id;
+ RecordMutexUnlock(thr, addr);
bool report_bad_unlock = false;
int rec = 0;
{
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
- if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
- if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_unlock = true;
- }
- } else {
- rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1;
- s->recursion -= rec;
- if (s->recursion == 0) {
- s->owner_tid = kInvalidTid;
- ReleaseStoreImpl(thr, pc, &s->clock);
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ bool released = false;
+ {
+ Lock lock(&s->mtx);
+ creation_stack_id = s->creation_stack_id;
+ if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
+ if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_unlock = true;
+ }
} else {
+ rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1;
+ s->recursion -= rec;
+ if (s->recursion == 0) {
+ s->owner_tid = kInvalidTid;
+ if (!thr->ignore_sync) {
+ thr->clock.ReleaseStore(&s->clock);
+ released = true;
+ }
+ }
+ }
+ if (common_flags()->detect_deadlocks && s->recursion == 0 &&
+ !report_bad_unlock) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true);
}
}
- thr->mset.Del(s->GetId(), true);
- if (common_flags()->detect_deadlocks && s->recursion == 0 &&
- !report_bad_unlock) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true);
- }
- mid = s->GetId();
+ if (released)
+ IncrementEpoch(thr);
}
if (report_bad_unlock)
- ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
+ ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr,
+ creation_stack_id);
if (common_flags()->detect_deadlocks && !report_bad_unlock) {
Callback cb(thr, pc);
ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
@@ -254,53 +265,56 @@ int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
- if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
- {
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- ReadLock l(&s->mtx);
- s->UpdateFlags(flagz);
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
- }
- Callback cb(thr, pc);
- ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+ if ((flagz & MutexFlagTryLock) || !common_flags()->detect_deadlocks)
+ return;
+ Callback cb(thr, pc);
+ {
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ ReadLock lock(&s->mtx);
+ s->UpdateFlags(flagz);
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
}
+ ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
}
void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
- if (IsAppMem(addr))
+ if (pc && IsAppMem(addr))
MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
- u64 mid = 0;
bool report_bad_lock = false;
bool pre_lock = false;
+ StackID creation_stack_id = kInvalidStackID;
{
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- ReadLock l(&s->mtx);
- s->UpdateFlags(flagz);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
- if (s->owner_tid != kInvalidTid) {
- if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_lock = true;
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ creation_stack_id = s->creation_stack_id;
+ RecordMutexLock(thr, pc, addr, creation_stack_id, false);
+ {
+ ReadLock lock(&s->mtx);
+ s->UpdateFlags(flagz);
+ if (s->owner_tid != kInvalidTid) {
+ if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_lock = true;
+ }
+ }
+ if (!thr->ignore_sync)
+ thr->clock.Acquire(s->clock);
+ s->last_lock = thr->fast_state;
+ if (common_flags()->detect_deadlocks) {
+ pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) &&
+ !(flagz & MutexFlagTryLock);
+ Callback cb(thr, pc);
+ if (pre_lock)
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
+ ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock);
}
}
- AcquireImpl(thr, pc, &s->clock);
- s->last_lock = thr->fast_state.raw();
- thr->mset.Add(s->GetId(), false, thr->fast_state.epoch());
- if (common_flags()->detect_deadlocks) {
- pre_lock =
- (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock);
- Callback cb(thr, pc);
- if (pre_lock)
- ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
- ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock);
- }
- mid = s->GetId();
}
if (report_bad_lock)
- ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid);
+ ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr,
+ creation_stack_id);
if (pre_lock && common_flags()->detect_deadlocks) {
Callback cb(thr, pc);
ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
@@ -309,31 +323,39 @@ void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr);
- if (IsAppMem(addr))
+ if (pc && IsAppMem(addr))
MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
- u64 mid = 0;
+ RecordMutexUnlock(thr, addr);
+ StackID creation_stack_id;
bool report_bad_unlock = false;
{
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
- if (s->owner_tid != kInvalidTid) {
- if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_unlock = true;
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ bool released = false;
+ {
+ Lock lock(&s->mtx);
+ creation_stack_id = s->creation_stack_id;
+ if (s->owner_tid != kInvalidTid) {
+ if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_unlock = true;
+ }
+ }
+ if (!thr->ignore_sync) {
+ thr->clock.Release(&s->read_clock);
+ released = true;
+ }
+ if (common_flags()->detect_deadlocks && s->recursion == 0) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false);
}
}
- ReleaseImpl(thr, pc, &s->read_clock);
- if (common_flags()->detect_deadlocks && s->recursion == 0) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false);
- }
- mid = s->GetId();
+ if (released)
+ IncrementEpoch(thr);
}
- thr->mset.Del(mid, false);
if (report_bad_unlock)
- ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, mid);
+ ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr,
+ creation_stack_id);
if (common_flags()->detect_deadlocks) {
Callback cb(thr, pc);
ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
@@ -342,44 +364,52 @@ void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) {
void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr);
- if (IsAppMem(addr))
+ if (pc && IsAppMem(addr))
MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
- u64 mid = 0;
+ RecordMutexUnlock(thr, addr);
+ StackID creation_stack_id;
bool report_bad_unlock = false;
+ bool write = true;
{
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- Lock l(&s->mtx);
- bool write = true;
- if (s->owner_tid == kInvalidTid) {
- // Seems to be read unlock.
- write = false;
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
- ReleaseImpl(thr, pc, &s->read_clock);
- } else if (s->owner_tid == thr->tid) {
- // Seems to be write unlock.
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
- CHECK_GT(s->recursion, 0);
- s->recursion--;
- if (s->recursion == 0) {
- s->owner_tid = kInvalidTid;
- ReleaseStoreImpl(thr, pc, &s->clock);
- } else {
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ bool released = false;
+ {
+ Lock lock(&s->mtx);
+ creation_stack_id = s->creation_stack_id;
+ if (s->owner_tid == kInvalidTid) {
+ // Seems to be read unlock.
+ write = false;
+ if (!thr->ignore_sync) {
+ thr->clock.Release(&s->read_clock);
+ released = true;
+ }
+ } else if (s->owner_tid == thr->tid) {
+ // Seems to be write unlock.
+ CHECK_GT(s->recursion, 0);
+ s->recursion--;
+ if (s->recursion == 0) {
+ s->owner_tid = kInvalidTid;
+ if (!thr->ignore_sync) {
+ thr->clock.ReleaseStore(&s->clock);
+ released = true;
+ }
+ }
+ } else if (!s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_unlock = true;
+ }
+ if (common_flags()->detect_deadlocks && s->recursion == 0) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write);
}
- } else if (!s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_unlock = true;
- }
- thr->mset.Del(s->GetId(), write);
- if (common_flags()->detect_deadlocks && s->recursion == 0) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write);
}
- mid = s->GetId();
+ if (released)
+ IncrementEpoch(thr);
}
if (report_bad_unlock)
- ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
+ ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr,
+ creation_stack_id);
if (common_flags()->detect_deadlocks) {
Callback cb(thr, pc);
ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
@@ -388,143 +418,112 @@ void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) {
void MutexRepair(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr);
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- Lock l(&s->mtx);
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock lock(&s->mtx);
s->owner_tid = kInvalidTid;
s->recursion = 0;
}
void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexInvalidAccess %zx\n", thr->tid, addr);
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
- ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, s->GetId());
+ StackID creation_stack_id = kInvalidStackID;
+ {
+ SlotLocker locker(thr);
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ if (s)
+ creation_stack_id = s->creation_stack_id;
+ }
+ ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr,
+ creation_stack_id);
}
void Acquire(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: Acquire %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
+ auto s = ctx->metamap.GetSyncIfExists(addr);
if (!s)
return;
- ReadLock l(&s->mtx);
- AcquireImpl(thr, pc, &s->clock);
-}
-
-static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) {
- ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
- ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
- u64 epoch = tctx->epoch1;
- if (tctx->status == ThreadStatusRunning) {
- epoch = tctx->thr->fast_state.epoch();
- tctx->thr->clock.NoteGlobalAcquire(epoch);
- }
- thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
+ SlotLocker locker(thr);
+ if (!s->clock)
+ return;
+ ReadLock lock(&s->mtx);
+ thr->clock.Acquire(s->clock);
}
void AcquireGlobal(ThreadState *thr) {
DPrintf("#%d: AcquireGlobal\n", thr->tid);
if (thr->ignore_sync)
return;
- ThreadRegistryLock l(&ctx->thread_registry);
- ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateClockCallback, thr);
-}
-
-void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) {
- DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr);
- if (thr->ignore_sync)
- return;
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- ReleaseStoreAcquireImpl(thr, pc, &s->clock);
+ SlotLocker locker(thr);
+ for (auto &slot : ctx->slots) thr->clock.Set(slot.sid, slot.epoch());
}
void Release(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: Release %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- ReleaseImpl(thr, pc, &s->clock);
+ SlotLocker locker(thr);
+ {
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+ Lock lock(&s->mtx);
+ thr->clock.Release(&s->clock);
+ }
+ IncrementEpoch(thr);
}
void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
- Lock l(&s->mtx);
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- ReleaseStoreImpl(thr, pc, &s->clock);
-}
-
-#if !SANITIZER_GO
-static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) {
- ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
- ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
- u64 epoch = tctx->epoch1;
- if (tctx->status == ThreadStatusRunning)
- epoch = tctx->thr->fast_state.epoch();
- thr->last_sleep_clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
-}
-
-void AfterSleep(ThreadState *thr, uptr pc) {
- DPrintf("#%d: AfterSleep\n", thr->tid);
- if (thr->ignore_sync)
- return;
- thr->last_sleep_stack_id = CurrentStackId(thr, pc);
- ThreadRegistryLock l(&ctx->thread_registry);
- ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateSleepClockCallback,
- thr);
-}
-#endif
-
-void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) {
- if (thr->ignore_sync)
- return;
- thr->clock.set(thr->fast_state.epoch());
- thr->clock.acquire(&thr->proc()->clock_cache, c);
-}
-
-void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) {
- if (thr->ignore_sync)
- return;
- thr->clock.set(thr->fast_state.epoch());
- thr->fast_synch_epoch = thr->fast_state.epoch();
- thr->clock.releaseStoreAcquire(&thr->proc()->clock_cache, c);
+ SlotLocker locker(thr);
+ {
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+ Lock lock(&s->mtx);
+ thr->clock.ReleaseStore(&s->clock);
+ }
+ IncrementEpoch(thr);
}
-void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) {
+ DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- thr->clock.set(thr->fast_state.epoch());
- thr->fast_synch_epoch = thr->fast_state.epoch();
- thr->clock.release(&thr->proc()->clock_cache, c);
+ SlotLocker locker(thr);
+ {
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+ Lock lock(&s->mtx);
+ thr->clock.ReleaseStoreAcquire(&s->clock);
+ }
+ IncrementEpoch(thr);
}
-void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) {
- if (thr->ignore_sync)
- return;
- thr->clock.set(thr->fast_state.epoch());
- thr->fast_synch_epoch = thr->fast_state.epoch();
- thr->clock.ReleaseStore(&thr->proc()->clock_cache, c);
+void IncrementEpoch(ThreadState *thr) {
+ DCHECK(!thr->ignore_sync);
+ DCHECK(thr->slot_locked);
+ Epoch epoch = EpochInc(thr->fast_state.epoch());
+ if (!EpochOverflow(epoch)) {
+ Sid sid = thr->fast_state.sid();
+ thr->clock.Set(sid, epoch);
+ thr->fast_state.SetEpoch(epoch);
+ thr->slot->SetEpoch(epoch);
+ TraceTime(thr);
+ }
}
-void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+#if !SANITIZER_GO
+void AfterSleep(ThreadState *thr, uptr pc) {
+ DPrintf("#%d: AfterSleep\n", thr->tid);
if (thr->ignore_sync)
return;
- thr->clock.set(thr->fast_state.epoch());
- thr->fast_synch_epoch = thr->fast_state.epoch();
- thr->clock.acq_rel(&thr->proc()->clock_cache, c);
+ thr->last_sleep_stack_id = CurrentStackId(thr, pc);
+ thr->last_sleep_clock.Reset();
+ SlotLocker locker(thr);
+ for (auto &slot : ctx->slots)
+ thr->last_sleep_clock.Set(slot.sid, slot.epoch());
}
+#endif
void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
@@ -532,7 +531,7 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(ReportTypeDeadlock);
for (int i = 0; i < r->n; i++) {
- rep.AddMutex(r->loop[i].mtx_ctx0);
+ rep.AddMutex(r->loop[i].mtx_ctx0, r->loop[i].stk[0]);
rep.AddUniqueTid((int)r->loop[i].thr_ctx);
rep.AddThread((int)r->loop[i].thr_ctx);
}
@@ -540,7 +539,7 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
for (int i = 0; i < r->n; i++) {
for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) {
u32 stk = r->loop[i].stk[j];
- if (stk && stk != 0xffffffff) {
+ if (stk && stk != kInvalidStackID) {
rep.AddStack(StackDepotGet(stk), true);
} else {
// Sometimes we fail to extract the stack trace (FIXME: investigate),
@@ -552,4 +551,28 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
OutputReport(thr, rep);
}
+void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr,
+ FastState last_lock, StackID creation_stack_id) {
+ // We need to lock the slot during RestoreStack because it protects
+ // the slot journal.
+ Lock slot_lock(&ctx->slots[static_cast<uptr>(last_lock.sid())].mtx);
+ ThreadRegistryLock l0(&ctx->thread_registry);
+ Lock slots_lock(&ctx->slot_mtx);
+ ScopedReport rep(ReportTypeMutexDestroyLocked);
+ rep.AddMutex(addr, creation_stack_id);
+ VarSizeStackTrace trace;
+ ObtainCurrentStack(thr, pc, &trace);
+ rep.AddStack(trace, true);
+
+ Tid tid;
+ DynamicMutexSet mset;
+ uptr tag;
+ if (!RestoreStack(EventType::kLock, last_lock.sid(), last_lock.epoch(), addr,
+ 0, kAccessWrite, &tid, &trace, mset, &tag))
+ return;
+ rep.AddStack(trace, true);
+ rep.AddLocation(addr, 1);
+ OutputReport(thr, rep);
+}
+
} // namespace __tsan
diff --git a/libsanitizer/tsan/tsan_rtl_ppc64.S b/libsanitizer/tsan/tsan_rtl_ppc64.S
index 9e533a7..8285e21 100644
--- a/libsanitizer/tsan/tsan_rtl_ppc64.S
+++ b/libsanitizer/tsan/tsan_rtl_ppc64.S
@@ -1,6 +1,5 @@
#include "tsan_ppc_regs.h"
- .machine altivec
.section .text
.hidden __tsan_setjmp
.globl _setjmp
diff --git a/libsanitizer/tsan/tsan_rtl_proc.cpp b/libsanitizer/tsan/tsan_rtl_proc.cpp
index def61cc..5acc396 100644
--- a/libsanitizer/tsan/tsan_rtl_proc.cpp
+++ b/libsanitizer/tsan/tsan_rtl_proc.cpp
@@ -35,7 +35,6 @@ void ProcDestroy(Processor *proc) {
#if !SANITIZER_GO
AllocatorProcFinish(proc);
#endif
- ctx->clock_alloc.FlushCache(&proc->clock_cache);
ctx->metamap.OnProcIdle(proc);
if (common_flags()->detect_deadlocks)
ctx->dd->DestroyPhysicalThread(proc->dd_pt);
diff --git a/libsanitizer/tsan/tsan_rtl_report.cpp b/libsanitizer/tsan/tsan_rtl_report.cpp
index 811695d..4cf8816 100644
--- a/libsanitizer/tsan/tsan_rtl_report.cpp
+++ b/libsanitizer/tsan/tsan_rtl_report.cpp
@@ -175,22 +175,26 @@ void ScopedReportBase::AddStack(StackTrace stack, bool suppressable) {
}
void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
- StackTrace stack, const MutexSet *mset) {
+ Tid tid, StackTrace stack,
+ const MutexSet *mset) {
+ uptr addr0, size;
+ AccessType typ;
+ s.GetAccess(&addr0, &size, &typ);
auto *mop = New<ReportMop>();
rep_->mops.PushBack(mop);
- mop->tid = s.tid();
- mop->addr = addr + s.addr0();
- mop->size = s.size();
- mop->write = s.IsWrite();
- mop->atomic = s.IsAtomic();
+ mop->tid = tid;
+ mop->addr = addr + addr0;
+ mop->size = size;
+ mop->write = !(typ & kAccessRead);
+ mop->atomic = typ & kAccessAtomic;
mop->stack = SymbolizeStack(stack);
mop->external_tag = external_tag;
if (mop->stack)
mop->stack->suppressable = true;
for (uptr i = 0; i < mset->Size(); i++) {
MutexSet::Desc d = mset->Get(i);
- u64 mid = this->AddMutex(d.id);
- ReportMopMutex mtx = {mid, d.write};
+ int id = this->AddMutex(d.addr, d.stack_id);
+ ReportMopMutex mtx = {id, d.write};
mop->mset.PushBack(mtx);
}
}
@@ -219,18 +223,6 @@ void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) {
}
#if !SANITIZER_GO
-static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) {
- int unique_id = *(int *)arg;
- return tctx->unique_id == (u32)unique_id;
-}
-
-static ThreadContext *FindThreadByUidLocked(Tid unique_id) {
- ctx->thread_registry.CheckLocked();
- return static_cast<ThreadContext *>(
- ctx->thread_registry.FindThreadContextLocked(
- FindThreadByUidLockedCallback, &unique_id));
-}
-
static ThreadContext *FindThreadByTidLocked(Tid tid) {
ctx->thread_registry.CheckLocked();
return static_cast<ThreadContext *>(
@@ -262,55 +254,24 @@ ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) {
}
#endif
-void ScopedReportBase::AddThread(Tid unique_tid, bool suppressable) {
+void ScopedReportBase::AddThread(Tid tid, bool suppressable) {
#if !SANITIZER_GO
- if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid))
+ if (const ThreadContext *tctx = FindThreadByTidLocked(tid))
AddThread(tctx, suppressable);
#endif
}
-void ScopedReportBase::AddMutex(const SyncVar *s) {
- for (uptr i = 0; i < rep_->mutexes.Size(); i++) {
- if (rep_->mutexes[i]->id == s->uid)
- return;
- }
- auto *rm = New<ReportMutex>();
- rep_->mutexes.PushBack(rm);
- rm->id = s->uid;
- rm->addr = s->addr;
- rm->destroyed = false;
- rm->stack = SymbolizeStackId(s->creation_stack_id);
-}
-
-u64 ScopedReportBase::AddMutex(u64 id) {
- u64 uid = 0;
- u64 mid = id;
- uptr addr = SyncVar::SplitId(id, &uid);
- SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
- // Check that the mutex is still alive.
- // Another mutex can be created at the same address,
- // so check uid as well.
- if (s && s->CheckId(uid)) {
- Lock l(&s->mtx);
- mid = s->uid;
- AddMutex(s);
- } else {
- AddDeadMutex(id);
- }
- return mid;
-}
-
-void ScopedReportBase::AddDeadMutex(u64 id) {
+int ScopedReportBase::AddMutex(uptr addr, StackID creation_stack_id) {
for (uptr i = 0; i < rep_->mutexes.Size(); i++) {
- if (rep_->mutexes[i]->id == id)
- return;
+ if (rep_->mutexes[i]->addr == addr)
+ return rep_->mutexes[i]->id;
}
auto *rm = New<ReportMutex>();
rep_->mutexes.PushBack(rm);
- rm->id = id;
- rm->addr = 0;
- rm->destroyed = true;
- rm->stack = 0;
+ rm->id = rep_->mutexes.Size() - 1;
+ rm->addr = addr;
+ rm->stack = SymbolizeStackId(creation_stack_id);
+ return rm->id;
}
void ScopedReportBase::AddLocation(uptr addr, uptr size) {
@@ -327,7 +288,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
loc->tid = creat_tid;
loc->stack = SymbolizeStackId(creat_stack);
rep_->locs.PushBack(loc);
- ThreadContext *tctx = FindThreadByUidLocked(creat_tid);
+ ThreadContext *tctx = FindThreadByTidLocked(creat_tid);
if (tctx)
AddThread(tctx);
return;
@@ -343,16 +304,15 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
if (!b)
b = JavaHeapBlock(addr, &block_begin);
if (b != 0) {
- ThreadContext *tctx = FindThreadByTidLocked(b->tid);
auto *loc = New<ReportLocation>();
loc->type = ReportLocationHeap;
- loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr);
+ loc->heap_chunk_start = block_begin;
loc->heap_chunk_size = b->siz;
loc->external_tag = b->tag;
- loc->tid = tctx ? tctx->tid : b->tid;
+ loc->tid = b->tid;
loc->stack = SymbolizeStackId(b->stk);
rep_->locs.PushBack(loc);
- if (tctx)
+ if (ThreadContext *tctx = FindThreadByTidLocked(b->tid))
AddThread(tctx);
return;
}
@@ -380,6 +340,8 @@ void ScopedReportBase::AddSleep(StackID stack_id) {
void ScopedReportBase::SetCount(int count) { rep_->count = count; }
+void ScopedReportBase::SetSigNum(int sig) { rep_->signum = sig; }
+
const ReportDesc *ScopedReportBase::GetReport() const { return rep_; }
ScopedReport::ScopedReport(ReportType typ, uptr tag)
@@ -387,71 +349,6 @@ ScopedReport::ScopedReport(ReportType typ, uptr tag)
ScopedReport::~ScopedReport() {}
-void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
- MutexSet *mset, uptr *tag) {
- // This function restores stack trace and mutex set for the thread/epoch.
- // It does so by getting stack trace and mutex set at the beginning of
- // trace part, and then replaying the trace till the given epoch.
- Trace* trace = ThreadTrace(tid);
- ReadLock l(&trace->mtx);
- const int partidx = (epoch / kTracePartSize) % TraceParts();
- TraceHeader* hdr = &trace->headers[partidx];
- if (epoch < hdr->epoch0 || epoch >= hdr->epoch0 + kTracePartSize)
- return;
- CHECK_EQ(RoundDown(epoch, kTracePartSize), hdr->epoch0);
- const u64 epoch0 = RoundDown(epoch, TraceSize());
- const u64 eend = epoch % TraceSize();
- const u64 ebegin = RoundDown(eend, kTracePartSize);
- DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n",
- tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx);
- Vector<uptr> stack;
- stack.Resize(hdr->stack0.size + 64);
- for (uptr i = 0; i < hdr->stack0.size; i++) {
- stack[i] = hdr->stack0.trace[i];
- DPrintf2(" #%02zu: pc=%zx\n", i, stack[i]);
- }
- if (mset)
- *mset = hdr->mset0;
- uptr pos = hdr->stack0.size;
- Event *events = (Event*)GetThreadTrace(tid);
- for (uptr i = ebegin; i <= eend; i++) {
- Event ev = events[i];
- EventType typ = (EventType)(ev >> kEventPCBits);
- uptr pc = (uptr)(ev & ((1ull << kEventPCBits) - 1));
- DPrintf2(" %zu typ=%d pc=%zx\n", i, typ, pc);
- if (typ == EventTypeMop) {
- stack[pos] = pc;
- } else if (typ == EventTypeFuncEnter) {
- if (stack.Size() < pos + 2)
- stack.Resize(pos + 2);
- stack[pos++] = pc;
- } else if (typ == EventTypeFuncExit) {
- if (pos > 0)
- pos--;
- }
- if (mset) {
- if (typ == EventTypeLock) {
- mset->Add(pc, true, epoch0 + i);
- } else if (typ == EventTypeUnlock) {
- mset->Del(pc, true);
- } else if (typ == EventTypeRLock) {
- mset->Add(pc, false, epoch0 + i);
- } else if (typ == EventTypeRUnlock) {
- mset->Del(pc, false);
- }
- }
- for (uptr j = 0; j <= pos; j++)
- DPrintf2(" #%zu: %zx\n", j, stack[j]);
- }
- if (pos == 0 && stack[0] == 0)
- return;
- pos++;
- stk->Init(&stack[0], pos);
- ExtractTagFromStack(stk, tag);
-}
-
-namespace v3 {
-
// Replays the trace up to last_pos position in the last part
// or up to the provided epoch/sid (whichever is earlier)
// and calls the provided function f for each event.
@@ -469,6 +366,7 @@ void TraceReplay(Trace *trace, TracePart *last, Event *last_pos, Sid sid,
Event *end = &part->events[TracePart::kSize - 1];
if (part == last)
end = last_pos;
+ f(kFreeSid, kEpochOver, nullptr); // notify about part start
for (Event *evp = &part->events[0]; evp < end; evp++) {
Event *evp0 = evp;
if (!evp->is_access && !evp->is_func) {
@@ -528,21 +426,36 @@ static constexpr bool IsWithinAccess(uptr addr1, uptr size1, uptr addr2,
return addr1 >= addr2 && addr1 + size1 <= addr2 + size2;
}
-// Replays the trace of thread tid up to the target event identified
-// by sid/epoch/addr/size/typ and restores and returns stack, mutex set
+// Replays the trace of slot sid up to the target event identified
+// by epoch/addr/size/typ and restores and returns tid, stack, mutex set
// and tag for that event. If there are multiple such events, it returns
// the last one. Returns false if the event is not present in the trace.
-bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
- uptr size, AccessType typ, VarSizeStackTrace *pstk,
+bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size,
+ AccessType typ, Tid *ptid, VarSizeStackTrace *pstk,
MutexSet *pmset, uptr *ptag) {
// This function restores stack trace and mutex set for the thread/epoch.
// It does so by getting stack trace and mutex set at the beginning of
// trace part, and then replaying the trace till the given epoch.
- DPrintf2("RestoreStack: tid=%u sid=%u@%u addr=0x%zx/%zu typ=%x\n", tid,
+ DPrintf2("RestoreStack: sid=%u@%u addr=0x%zx/%zu typ=%x\n",
static_cast<int>(sid), static_cast<int>(epoch), addr, size,
static_cast<int>(typ));
ctx->slot_mtx.CheckLocked(); // needed to prevent trace part recycling
ctx->thread_registry.CheckLocked();
+ TidSlot *slot = &ctx->slots[static_cast<uptr>(sid)];
+ Tid tid = kInvalidTid;
+ // Need to lock the slot mutex as it protects slot->journal.
+ slot->mtx.CheckLocked();
+ for (uptr i = 0; i < slot->journal.Size(); i++) {
+ DPrintf2(" journal: epoch=%d tid=%d\n",
+ static_cast<int>(slot->journal[i].epoch), slot->journal[i].tid);
+ if (i == slot->journal.Size() - 1 || slot->journal[i + 1].epoch > epoch) {
+ tid = slot->journal[i].tid;
+ break;
+ }
+ }
+ if (tid == kInvalidTid)
+ return false;
+ *ptid = tid;
ThreadContext *tctx =
static_cast<ThreadContext *>(ctx->thread_registry.GetThreadLocked(tid));
Trace *trace = &tctx->trace;
@@ -553,8 +466,10 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
{
Lock lock(&trace->mtx);
first_part = trace->parts.Front();
- if (!first_part)
+ if (!first_part) {
+ DPrintf2("RestoreStack: tid=%d trace=%p no trace parts\n", tid, trace);
return false;
+ }
last_part = trace->parts.Back();
last_pos = trace->final_pos;
if (tctx->thr)
@@ -567,9 +482,18 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
bool is_read = typ & kAccessRead;
bool is_atomic = typ & kAccessAtomic;
bool is_free = typ & kAccessFree;
+ DPrintf2("RestoreStack: tid=%d parts=[%p-%p] last_pos=%p\n", tid,
+ trace->parts.Front(), last_part, last_pos);
TraceReplay(
trace, last_part, last_pos, sid, epoch,
[&](Sid ev_sid, Epoch ev_epoch, Event *evp) {
+ if (evp == nullptr) {
+ // Each trace part is self-consistent, so we reset state.
+ stack.Resize(0);
+ mset->Reset();
+ prev_pc = 0;
+ return;
+ }
bool match = ev_sid == sid && ev_epoch == epoch;
if (evp->is_access) {
if (evp->is_func == 0 && evp->type == EventType::kAccessExt &&
@@ -592,12 +516,15 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
if (evp->is_func) {
auto *ev = reinterpret_cast<EventFunc *>(evp);
if (ev->pc) {
- DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc);
+ DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc);
stack.PushBack(ev->pc);
} else {
- DPrintf2(" FuncExit\n");
- CHECK(stack.Size());
- stack.PopBack();
+ DPrintf2(" FuncExit\n");
+ // We don't log pathologically large stacks in each part,
+ // if the stack was truncated we can have more func exits than
+ // entries.
+ if (stack.Size())
+ stack.PopBack();
}
return;
}
@@ -666,8 +593,6 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
return found;
}
-} // namespace v3
-
bool RacyStacks::operator==(const RacyStacks &other) const {
if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
return true;
@@ -758,10 +683,7 @@ bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
ctx->fired_suppressions.push_back(s);
}
{
- bool old_is_freeing = thr->is_freeing;
- thr->is_freeing = false;
bool suppressed = OnReport(rep, pc_or_addr != 0);
- thr->is_freeing = old_is_freeing;
if (suppressed) {
thr->current_report = nullptr;
return false;
@@ -808,97 +730,72 @@ static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) {
return false;
}
-static bool RaceBetweenAtomicAndFree(ThreadState *thr) {
- Shadow s0(thr->racy_state[0]);
- Shadow s1(thr->racy_state[1]);
- CHECK(!(s0.IsAtomic() && s1.IsAtomic()));
- if (!s0.IsAtomic() && !s1.IsAtomic())
- return true;
- if (s0.IsAtomic() && s1.IsFreed())
- return true;
- if (s1.IsAtomic() && thr->is_freeing)
- return true;
- return false;
-}
-
-void ReportRace(ThreadState *thr) {
+void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
+ AccessType typ0) {
CheckedMutex::CheckNoLocks();
// Symbolizer makes lots of intercepted calls. If we try to process them,
// at best it will cause deadlocks on internal mutexes.
ScopedIgnoreInterceptors ignore;
+ uptr addr = ShadowToMem(shadow_mem);
+ DPrintf("#%d: ReportRace %p\n", thr->tid, (void *)addr);
if (!ShouldReport(thr, ReportTypeRace))
return;
- if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr))
+ uptr addr_off0, size0;
+ cur.GetAccess(&addr_off0, &size0, nullptr);
+ uptr addr_off1, size1, typ1;
+ old.GetAccess(&addr_off1, &size1, &typ1);
+ if (!flags()->report_atomic_races &&
+ ((typ0 & kAccessAtomic) || (typ1 & kAccessAtomic)) &&
+ !(typ0 & kAccessFree) && !(typ1 & kAccessFree))
return;
- bool freed = false;
- {
- Shadow s(thr->racy_state[1]);
- freed = s.GetFreedAndReset();
- thr->racy_state[1] = s.raw();
- }
-
- uptr addr = ShadowToMem(thr->racy_shadow_addr);
- uptr addr_min = 0;
- uptr addr_max = 0;
- {
- uptr a0 = addr + Shadow(thr->racy_state[0]).addr0();
- uptr a1 = addr + Shadow(thr->racy_state[1]).addr0();
- uptr e0 = a0 + Shadow(thr->racy_state[0]).size();
- uptr e1 = a1 + Shadow(thr->racy_state[1]).size();
- addr_min = min(a0, a1);
- addr_max = max(e0, e1);
- if (IsExpectedReport(addr_min, addr_max - addr_min))
- return;
- }
+ const uptr kMop = 2;
+ Shadow s[kMop] = {cur, old};
+ uptr addr0 = addr + addr_off0;
+ uptr addr1 = addr + addr_off1;
+ uptr end0 = addr0 + size0;
+ uptr end1 = addr1 + size1;
+ uptr addr_min = min(addr0, addr1);
+ uptr addr_max = max(end0, end1);
+ if (IsExpectedReport(addr_min, addr_max - addr_min))
+ return;
if (HandleRacyAddress(thr, addr_min, addr_max))
return;
- ReportType typ = ReportTypeRace;
- if (thr->is_vptr_access && freed)
- typ = ReportTypeVptrUseAfterFree;
- else if (thr->is_vptr_access)
- typ = ReportTypeVptrRace;
- else if (freed)
- typ = ReportTypeUseAfterFree;
+ ReportType rep_typ = ReportTypeRace;
+ if ((typ0 & kAccessVptr) && (typ1 & kAccessFree))
+ rep_typ = ReportTypeVptrUseAfterFree;
+ else if (typ0 & kAccessVptr)
+ rep_typ = ReportTypeVptrRace;
+ else if (typ1 & kAccessFree)
+ rep_typ = ReportTypeUseAfterFree;
- if (IsFiredSuppression(ctx, typ, addr))
+ if (IsFiredSuppression(ctx, rep_typ, addr))
return;
- const uptr kMop = 2;
VarSizeStackTrace traces[kMop];
- uptr tags[kMop] = {kExternalTagNone};
- uptr toppc = TraceTopPC(thr);
- if (toppc >> kEventPCBits) {
- // This is a work-around for a known issue.
- // The scenario where this happens is rather elaborate and requires
- // an instrumented __sanitizer_report_error_summary callback and
- // a __tsan_symbolize_external callback and a race during a range memory
- // access larger than 8 bytes. MemoryAccessRange adds the current PC to
- // the trace and starts processing memory accesses. A first memory access
- // triggers a race, we report it and call the instrumented
- // __sanitizer_report_error_summary, which adds more stuff to the trace
- // since it is intrumented. Then a second memory access in MemoryAccessRange
- // also triggers a race and we get here and call TraceTopPC to get the
- // current PC, however now it contains some unrelated events from the
- // callback. Most likely, TraceTopPC will now return a EventTypeFuncExit
- // event. Later we subtract -1 from it (in GetPreviousInstructionPc)
- // and the resulting PC has kExternalPCBit set, so we pass it to
- // __tsan_symbolize_external_ex. __tsan_symbolize_external_ex is within its
- // rights to crash since the PC is completely bogus.
- // test/tsan/double_race.cpp contains a test case for this.
- toppc = 0;
- }
- ObtainCurrentStack(thr, toppc, &traces[0], &tags[0]);
- if (IsFiredSuppression(ctx, typ, traces[0]))
+ Tid tids[kMop] = {thr->tid, kInvalidTid};
+ uptr tags[kMop] = {kExternalTagNone, kExternalTagNone};
+
+ ObtainCurrentStack(thr, thr->trace_prev_pc, &traces[0], &tags[0]);
+ if (IsFiredSuppression(ctx, rep_typ, traces[0]))
return;
- DynamicMutexSet mset2;
- Shadow s2(thr->racy_state[1]);
- RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]);
- if (IsFiredSuppression(ctx, typ, traces[1]))
+ DynamicMutexSet mset1;
+ MutexSet *mset[kMop] = {&thr->mset, mset1};
+
+ // We need to lock the slot during RestoreStack because it protects
+ // the slot journal.
+ Lock slot_lock(&ctx->slots[static_cast<uptr>(s[1].sid())].mtx);
+ ThreadRegistryLock l0(&ctx->thread_registry);
+ Lock slots_lock(&ctx->slot_mtx);
+ if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1,
+ size1, typ1, &tids[1], &traces[1], mset[1], &tags[1]))
+ return;
+
+ if (IsFiredSuppression(ctx, rep_typ, traces[1]))
return;
if (HandleRacyStacks(thr, traces))
@@ -908,39 +805,41 @@ void ReportRace(ThreadState *thr) {
uptr tag = kExternalTagNone;
for (uptr i = 0; i < kMop; i++) {
if (tags[i] != kExternalTagNone) {
- typ = ReportTypeExternalRace;
+ rep_typ = ReportTypeExternalRace;
tag = tags[i];
break;
}
}
- ThreadRegistryLock l0(&ctx->thread_registry);
- ScopedReport rep(typ, tag);
- for (uptr i = 0; i < kMop; i++) {
- Shadow s(thr->racy_state[i]);
- rep.AddMemoryAccess(addr, tags[i], s, traces[i],
- i == 0 ? &thr->mset : mset2);
- }
+ ScopedReport rep(rep_typ, tag);
+ for (uptr i = 0; i < kMop; i++)
+ rep.AddMemoryAccess(addr, tags[i], s[i], tids[i], traces[i], mset[i]);
for (uptr i = 0; i < kMop; i++) {
- FastState s(thr->racy_state[i]);
ThreadContext *tctx = static_cast<ThreadContext *>(
- ctx->thread_registry.GetThreadLocked(s.tid()));
- if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1)
- continue;
+ ctx->thread_registry.GetThreadLocked(tids[i]));
rep.AddThread(tctx);
}
rep.AddLocation(addr_min, addr_max - addr_min);
-#if !SANITIZER_GO
- {
- Shadow s(thr->racy_state[1]);
- if (s.epoch() <= thr->last_sleep_clock.get(s.tid()))
- rep.AddSleep(thr->last_sleep_stack_id);
+ if (flags()->print_full_thread_history) {
+ const ReportDesc *rep_desc = rep.GetReport();
+ for (uptr i = 0; i < rep_desc->threads.Size(); i++) {
+ Tid parent_tid = rep_desc->threads[i]->parent_tid;
+ if (parent_tid == kMainTid || parent_tid == kInvalidTid)
+ continue;
+ ThreadContext *parent_tctx = static_cast<ThreadContext *>(
+ ctx->thread_registry.GetThreadLocked(parent_tid));
+ rep.AddThread(parent_tctx);
+ }
}
-#endif
+#if !SANITIZER_GO
+ if (!((typ0 | typ1) & kAccessFree) &&
+ s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid()))
+ rep.AddSleep(thr->last_sleep_stack_id);
+#endif
OutputReport(thr, rep);
}
diff --git a/libsanitizer/tsan/tsan_rtl_thread.cpp b/libsanitizer/tsan/tsan_rtl_thread.cpp
index 6e652ee..86c8b37 100644
--- a/libsanitizer/tsan/tsan_rtl_thread.cpp
+++ b/libsanitizer/tsan/tsan_rtl_thread.cpp
@@ -21,20 +21,14 @@ namespace __tsan {
// ThreadContext implementation.
-ThreadContext::ThreadContext(Tid tid)
- : ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {}
+ThreadContext::ThreadContext(Tid tid) : ThreadContextBase(tid), thr(), sync() {}
#if !SANITIZER_GO
ThreadContext::~ThreadContext() {
}
#endif
-void ThreadContext::OnReset() {
- CHECK_EQ(sync.size(), 0);
- uptr trace_p = GetThreadTrace(tid);
- ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event));
- //!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace));
-}
+void ThreadContext::OnReset() { CHECK(!sync); }
#if !SANITIZER_GO
struct ThreadLeak {
@@ -57,7 +51,9 @@ static void CollectThreadLeaks(ThreadContextBase *tctx_base, void *arg) {
}
#endif
-#if !SANITIZER_GO
+// Disabled on Mac because lldb test TestTsanBasic fails:
+// https://reviews.llvm.org/D112603#3163158
+#if !SANITIZER_GO && !SANITIZER_MAC
static void ReportIgnoresEnabled(ThreadContext *tctx, IgnoreSet *set) {
if (tctx->tid == kMainTid) {
Printf("ThreadSanitizer: main thread finished with ignores enabled\n");
@@ -112,30 +108,35 @@ int ThreadCount(ThreadState *thr) {
}
struct OnCreatedArgs {
- ThreadState *thr;
- uptr pc;
+ VectorClock *sync;
+ uptr sync_epoch;
+ StackID stack;
};
Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
- OnCreatedArgs args = { thr, pc };
- u32 parent_tid = thr ? thr->tid : kInvalidTid; // No parent for GCD workers.
- Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args);
- DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid);
+ // The main thread and GCD workers don't have a parent thread.
+ Tid parent = kInvalidTid;
+ OnCreatedArgs arg = {nullptr, 0, kInvalidStackID};
+ if (thr) {
+ parent = thr->tid;
+ arg.stack = CurrentStackId(thr, pc);
+ if (!thr->ignore_sync) {
+ SlotLocker locker(thr);
+ thr->clock.ReleaseStore(&arg.sync);
+ arg.sync_epoch = ctx->global_epoch;
+ IncrementEpoch(thr);
+ }
+ }
+ Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent, &arg);
+ DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent, tid, uid);
return tid;
}
void ThreadContext::OnCreated(void *arg) {
- thr = 0;
- if (tid == kMainTid)
- return;
OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg);
- if (!args->thr) // GCD workers don't have a parent thread.
- return;
- args->thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
- ReleaseImpl(args->thr, 0, &sync);
- creation_stack_id = CurrentStackId(args->thr, args->pc);
+ sync = args->sync;
+ sync_epoch = args->sync_epoch;
+ creation_stack_id = args->stack;
}
extern "C" void __tsan_stack_initialization() {}
@@ -150,6 +151,15 @@ struct OnStartedArgs {
void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadType thread_type) {
+ ctx->thread_registry.StartThread(tid, os_id, thread_type, thr);
+ if (!thr->ignore_sync) {
+ SlotAttachAndLock(thr);
+ if (thr->tctx->sync_epoch == ctx->global_epoch)
+ thr->clock.Acquire(thr->tctx->sync);
+ SlotUnlock(thr);
+ }
+ Free(thr->tctx->sync);
+
uptr stk_addr = 0;
uptr stk_size = 0;
uptr tls_addr = 0;
@@ -159,12 +169,10 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr,
&tls_size);
#endif
-
- ThreadRegistry *tr = &ctx->thread_registry;
- OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size };
- tr->StartThread(tid, os_id, thread_type, &args);
-
- while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack();
+ thr->stk_addr = stk_addr;
+ thr->stk_size = stk_size;
+ thr->tls_addr = tls_addr;
+ thr->tls_size = tls_size;
#if !SANITIZER_GO
if (ctx->after_multithreaded_fork) {
@@ -192,69 +200,80 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
}
void ThreadContext::OnStarted(void *arg) {
- OnStartedArgs *args = static_cast<OnStartedArgs *>(arg);
- thr = args->thr;
- // RoundUp so that one trace part does not contain events
- // from different threads.
- epoch0 = RoundUp(epoch1 + 1, kTracePartSize);
- epoch1 = (u64)-1;
- new (thr)
- ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr,
- args->stk_size, args->tls_addr, args->tls_size);
+ thr = static_cast<ThreadState *>(arg);
+ DPrintf("#%d: ThreadStart\n", tid);
+ new (thr) ThreadState(tid);
if (common_flags()->detect_deadlocks)
- thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
- thr->fast_state.SetHistorySize(flags()->history_size);
- // Commit switch to the new part of the trace.
- // TraceAddEvent will reset stack0/mset0 in the new part for us.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
-
- thr->fast_synch_epoch = epoch0;
- AcquireImpl(thr, 0, &sync);
- sync.Reset(&thr->proc()->clock_cache);
+ thr->dd_lt = ctx->dd->CreateLogicalThread(tid);
thr->tctx = this;
+#if !SANITIZER_GO
thr->is_inited = true;
- DPrintf(
- "#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
- "tls_addr=%zx tls_size=%zx\n",
- tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr,
- args->tls_size);
+#endif
}
void ThreadFinish(ThreadState *thr) {
+ DPrintf("#%d: ThreadFinish\n", thr->tid);
ThreadCheckIgnore(thr);
if (thr->stk_addr && thr->stk_size)
DontNeedShadowFor(thr->stk_addr, thr->stk_size);
if (thr->tls_addr && thr->tls_size)
DontNeedShadowFor(thr->tls_addr, thr->tls_size);
thr->is_dead = true;
- ctx->thread_registry.FinishThread(thr->tid);
-}
-
-void ThreadContext::OnFinished() {
-#if SANITIZER_GO
+#if !SANITIZER_GO
+ thr->is_inited = false;
+ thr->ignore_interceptors++;
+ PlatformCleanUpThreadState(thr);
+#endif
+ if (!thr->ignore_sync) {
+ SlotLocker locker(thr);
+ ThreadRegistryLock lock(&ctx->thread_registry);
+ // Note: detached is protected by the thread registry mutex,
+ // the thread may be detaching concurrently in another thread.
+ if (!thr->tctx->detached) {
+ thr->clock.ReleaseStore(&thr->tctx->sync);
+ thr->tctx->sync_epoch = ctx->global_epoch;
+ IncrementEpoch(thr);
+ }
+ }
+#if !SANITIZER_GO
+ UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr));
+#else
Free(thr->shadow_stack);
+#endif
+ thr->shadow_stack = nullptr;
thr->shadow_stack_pos = nullptr;
thr->shadow_stack_end = nullptr;
-#endif
- if (!detached) {
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- ReleaseImpl(thr, 0, &sync);
- }
- epoch1 = thr->fast_state.epoch();
-
if (common_flags()->detect_deadlocks)
ctx->dd->DestroyLogicalThread(thr->dd_lt);
- thr->clock.ResetCached(&thr->proc()->clock_cache);
-#if !SANITIZER_GO
- thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
-#endif
-#if !SANITIZER_GO
- PlatformCleanUpThreadState(thr);
-#endif
+ SlotDetach(thr);
+ ctx->thread_registry.FinishThread(thr->tid);
thr->~ThreadState();
- thr = 0;
+}
+
+void ThreadContext::OnFinished() {
+ Lock lock(&ctx->slot_mtx);
+ Lock lock1(&trace.mtx);
+ // Queue all trace parts into the global recycle queue.
+ auto parts = &trace.parts;
+ while (trace.local_head) {
+ CHECK(parts->Queued(trace.local_head));
+ ctx->trace_part_recycle.PushBack(trace.local_head);
+ trace.local_head = parts->Next(trace.local_head);
+ }
+ ctx->trace_part_recycle_finished += parts->Size();
+ if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadHi) {
+ ctx->trace_part_finished_excess += parts->Size();
+ trace.parts_allocated = 0;
+ } else if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadLo &&
+ parts->Size() > 1) {
+ ctx->trace_part_finished_excess += parts->Size() - 1;
+ trace.parts_allocated = 1;
+ }
+ // From now on replay will use trace->final_pos.
+ trace.final_pos = (Event *)atomic_load_relaxed(&thr->trace_pos);
+ atomic_store_relaxed(&thr->trace_pos, 0);
+ thr->tctx = nullptr;
+ thr = nullptr;
}
struct ConsumeThreadContext {
@@ -262,60 +281,47 @@ struct ConsumeThreadContext {
ThreadContextBase *tctx;
};
-static bool ConsumeThreadByUid(ThreadContextBase *tctx, void *arg) {
- ConsumeThreadContext *findCtx = (ConsumeThreadContext *)arg;
- if (tctx->user_id == findCtx->uid && tctx->status != ThreadStatusInvalid) {
- if (findCtx->tctx) {
- // Ensure that user_id is unique. If it's not the case we are screwed.
- // Something went wrong before, but now there is no way to recover.
- // Returning a wrong thread is not an option, it may lead to very hard
- // to debug false positives (e.g. if we join a wrong thread).
- Report("ThreadSanitizer: dup thread with used id 0x%zx\n", findCtx->uid);
- Die();
- }
- findCtx->tctx = tctx;
- tctx->user_id = 0;
- }
- return false;
-}
-
Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) {
- ConsumeThreadContext findCtx = {uid, nullptr};
- ctx->thread_registry.FindThread(ConsumeThreadByUid, &findCtx);
- Tid tid = findCtx.tctx ? findCtx.tctx->tid : kInvalidTid;
- DPrintf("#%d: ThreadTid uid=%zu tid=%d\n", thr->tid, uid, tid);
- return tid;
+ return ctx->thread_registry.ConsumeThreadUserId(uid);
}
+struct JoinArg {
+ VectorClock *sync;
+ uptr sync_epoch;
+};
+
void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) {
CHECK_GT(tid, 0);
- CHECK_LT(tid, kMaxTid);
DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid);
- ctx->thread_registry.JoinThread(tid, thr);
+ JoinArg arg = {};
+ ctx->thread_registry.JoinThread(tid, &arg);
+ if (!thr->ignore_sync) {
+ SlotLocker locker(thr);
+ if (arg.sync_epoch == ctx->global_epoch)
+ thr->clock.Acquire(arg.sync);
+ }
+ Free(arg.sync);
}
-void ThreadContext::OnJoined(void *arg) {
- ThreadState *caller_thr = static_cast<ThreadState *>(arg);
- AcquireImpl(caller_thr, 0, &sync);
- sync.Reset(&caller_thr->proc()->clock_cache);
+void ThreadContext::OnJoined(void *ptr) {
+ auto arg = static_cast<JoinArg *>(ptr);
+ arg->sync = sync;
+ arg->sync_epoch = sync_epoch;
+ sync = nullptr;
+ sync_epoch = 0;
}
-void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); }
+void ThreadContext::OnDead() { CHECK_EQ(sync, nullptr); }
void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) {
CHECK_GT(tid, 0);
- CHECK_LT(tid, kMaxTid);
ctx->thread_registry.DetachThread(tid, thr);
}
-void ThreadContext::OnDetached(void *arg) {
- ThreadState *thr1 = static_cast<ThreadState *>(arg);
- sync.Reset(&thr1->proc()->clock_cache);
-}
+void ThreadContext::OnDetached(void *arg) { Free(sync); }
void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) {
CHECK_GT(tid, 0);
- CHECK_LT(tid, kMaxTid);
ctx->thread_registry.SetThreadUserId(tid, uid);
}
diff --git a/libsanitizer/tsan/tsan_shadow.h b/libsanitizer/tsan/tsan_shadow.h
index 8b7bc34..843573e 100644
--- a/libsanitizer/tsan/tsan_shadow.h
+++ b/libsanitizer/tsan/tsan_shadow.h
@@ -10,223 +10,170 @@
#define TSAN_SHADOW_H
#include "tsan_defs.h"
-#include "tsan_trace.h"
namespace __tsan {
-// FastState (from most significant bit):
-// ignore : 1
-// tid : kTidBits
-// unused : -
-// history_size : 3
-// epoch : kClkBits
class FastState {
public:
- FastState(u64 tid, u64 epoch) {
- x_ = tid << kTidShift;
- x_ |= epoch;
- DCHECK_EQ(tid, this->tid());
- DCHECK_EQ(epoch, this->epoch());
- DCHECK_EQ(GetIgnoreBit(), false);
- }
-
- explicit FastState(u64 x) : x_(x) {}
-
- u64 raw() const { return x_; }
-
- u64 tid() const {
- u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
- return res;
- }
-
- u64 TidWithIgnore() const {
- u64 res = x_ >> kTidShift;
- return res;
- }
-
- u64 epoch() const {
- u64 res = x_ & ((1ull << kClkBits) - 1);
- return res;
- }
+ FastState() { Reset(); }
- void IncrementEpoch() {
- u64 old_epoch = epoch();
- x_ += 1;
- DCHECK_EQ(old_epoch + 1, epoch());
- (void)old_epoch;
+ void Reset() {
+ part_.unused0_ = 0;
+ part_.sid_ = static_cast<u8>(kFreeSid);
+ part_.epoch_ = static_cast<u16>(kEpochLast);
+ part_.unused1_ = 0;
+ part_.ignore_accesses_ = false;
}
- void SetIgnoreBit() { x_ |= kIgnoreBit; }
- void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
- bool GetIgnoreBit() const { return (s64)x_ < 0; }
+ void SetSid(Sid sid) { part_.sid_ = static_cast<u8>(sid); }
- void SetHistorySize(int hs) {
- CHECK_GE(hs, 0);
- CHECK_LE(hs, 7);
- x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
- }
+ Sid sid() const { return static_cast<Sid>(part_.sid_); }
- ALWAYS_INLINE
- int GetHistorySize() const {
- return (int)((x_ >> kHistoryShift) & kHistoryMask);
- }
+ Epoch epoch() const { return static_cast<Epoch>(part_.epoch_); }
- void ClearHistorySize() { SetHistorySize(0); }
+ void SetEpoch(Epoch epoch) { part_.epoch_ = static_cast<u16>(epoch); }
- ALWAYS_INLINE
- u64 GetTracePos() const {
- const int hs = GetHistorySize();
- // When hs == 0, the trace consists of 2 parts.
- const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
- return epoch() & mask;
- }
+ void SetIgnoreBit() { part_.ignore_accesses_ = 1; }
+ void ClearIgnoreBit() { part_.ignore_accesses_ = 0; }
+ bool GetIgnoreBit() const { return part_.ignore_accesses_; }
private:
friend class Shadow;
- static const int kTidShift = 64 - kTidBits - 1;
- static const u64 kIgnoreBit = 1ull << 63;
- static const u64 kFreedBit = 1ull << 63;
- static const u64 kHistoryShift = kClkBits;
- static const u64 kHistoryMask = 7;
- u64 x_;
+ struct Parts {
+ u32 unused0_ : 8;
+ u32 sid_ : 8;
+ u32 epoch_ : kEpochBits;
+ u32 unused1_ : 1;
+ u32 ignore_accesses_ : 1;
+ };
+ union {
+ Parts part_;
+ u32 raw_;
+ };
};
-// Shadow (from most significant bit):
-// freed : 1
-// tid : kTidBits
-// is_atomic : 1
-// is_read : 1
-// size_log : 2
-// addr0 : 3
-// epoch : kClkBits
-class Shadow : public FastState {
- public:
- explicit Shadow(u64 x) : FastState(x) {}
+static_assert(sizeof(FastState) == kShadowSize, "bad FastState size");
- explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); }
-
- void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
- DCHECK_EQ((x_ >> kClkBits) & 31, 0);
- DCHECK_LE(addr0, 7);
- DCHECK_LE(kAccessSizeLog, 3);
- x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
- DCHECK_EQ(kAccessSizeLog, size_log());
- DCHECK_EQ(addr0, this->addr0());
- }
-
- void SetWrite(unsigned kAccessIsWrite) {
- DCHECK_EQ(x_ & kReadBit, 0);
- if (!kAccessIsWrite)
- x_ |= kReadBit;
- DCHECK_EQ(kAccessIsWrite, IsWrite());
- }
-
- void SetAtomic(bool kIsAtomic) {
- DCHECK(!IsAtomic());
- if (kIsAtomic)
- x_ |= kAtomicBit;
- DCHECK_EQ(IsAtomic(), kIsAtomic);
- }
-
- bool IsAtomic() const { return x_ & kAtomicBit; }
-
- bool IsZero() const { return x_ == 0; }
-
- static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
- u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
- DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
- return shifted_xor == 0;
- }
-
- static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1,
- const Shadow s2) {
- u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
- return masked_xor == 0;
+class Shadow {
+ public:
+ static constexpr RawShadow kEmpty = static_cast<RawShadow>(0);
+
+ Shadow(FastState state, u32 addr, u32 size, AccessType typ) {
+ raw_ = state.raw_;
+ DCHECK_GT(size, 0);
+ DCHECK_LE(size, 8);
+ UNUSED Sid sid0 = part_.sid_;
+ UNUSED u16 epoch0 = part_.epoch_;
+ raw_ |= (!!(typ & kAccessAtomic) << kIsAtomicShift) |
+ (!!(typ & kAccessRead) << kIsReadShift) |
+ (((((1u << size) - 1) << (addr & 0x7)) & 0xff) << kAccessShift);
+ // Note: we don't check kAccessAtomic because it overlaps with
+ // FastState::ignore_accesses_ and it may be set spuriously.
+ DCHECK_EQ(part_.is_read_, !!(typ & kAccessRead));
+ DCHECK_EQ(sid(), sid0);
+ DCHECK_EQ(epoch(), epoch0);
+ }
+
+ explicit Shadow(RawShadow x = Shadow::kEmpty) { raw_ = static_cast<u32>(x); }
+
+ RawShadow raw() const { return static_cast<RawShadow>(raw_); }
+ Sid sid() const { return part_.sid_; }
+ Epoch epoch() const { return static_cast<Epoch>(part_.epoch_); }
+ u8 access() const { return part_.access_; }
+
+ void GetAccess(uptr *addr, uptr *size, AccessType *typ) const {
+ DCHECK(part_.access_ != 0 || raw_ == static_cast<u32>(Shadow::kRodata));
+ if (addr)
+ *addr = part_.access_ ? __builtin_ffs(part_.access_) - 1 : 0;
+ if (size)
+ *size = part_.access_ == kFreeAccess ? kShadowCell
+ : __builtin_popcount(part_.access_);
+ if (typ)
+ *typ = (part_.is_read_ ? kAccessRead : kAccessWrite) |
+ (part_.is_atomic_ ? kAccessAtomic : 0) |
+ (part_.access_ == kFreeAccess ? kAccessFree : 0);
}
- static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
- unsigned kS2AccessSize) {
- bool res = false;
- u64 diff = s1.addr0() - s2.addr0();
- if ((s64)diff < 0) { // s1.addr0 < s2.addr0
- // if (s1.addr0() + size1) > s2.addr0()) return true;
- if (s1.size() > -diff)
- res = true;
- } else {
- // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
- if (kS2AccessSize > diff)
- res = true;
- }
- DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
- DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
+ ALWAYS_INLINE
+ bool IsBothReadsOrAtomic(AccessType typ) const {
+ u32 is_read = !!(typ & kAccessRead);
+ u32 is_atomic = !!(typ & kAccessAtomic);
+ bool res =
+ raw_ & ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift));
+ DCHECK_EQ(res,
+ (part_.is_read_ && is_read) || (part_.is_atomic_ && is_atomic));
return res;
}
- u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
- u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
- bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
- bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
-
- // The idea behind the freed bit is as follows.
- // When the memory is freed (or otherwise unaccessible) we write to the shadow
- // values with tid/epoch related to the free and the freed bit set.
- // During memory accesses processing the freed bit is considered
- // as msb of tid. So any access races with shadow with freed bit set
- // (it is as if write from a thread with which we never synchronized before).
- // This allows us to detect accesses to freed memory w/o additional
- // overheads in memory access processing and at the same time restore
- // tid/epoch of free.
- void MarkAsFreed() { x_ |= kFreedBit; }
-
- bool IsFreed() const { return x_ & kFreedBit; }
-
- bool GetFreedAndReset() {
- bool res = x_ & kFreedBit;
- x_ &= ~kFreedBit;
+ ALWAYS_INLINE
+ bool IsRWWeakerOrEqual(AccessType typ) const {
+ u32 is_read = !!(typ & kAccessRead);
+ u32 is_atomic = !!(typ & kAccessAtomic);
+ UNUSED u32 res0 =
+ (part_.is_atomic_ > is_atomic) ||
+ (part_.is_atomic_ == is_atomic && part_.is_read_ >= is_read);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ const u32 kAtomicReadMask = (1 << kIsAtomicShift) | (1 << kIsReadShift);
+ bool res = (raw_ & kAtomicReadMask) >=
+ ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift));
+
+ DCHECK_EQ(res, res0);
return res;
+#else
+ return res0;
+#endif
}
- bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
- bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) |
- (u64(kIsAtomic) << kAtomicShift));
- DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
- return v;
- }
-
- bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
- bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
- DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
- (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
- return v;
+ // The FreedMarker must not pass "the same access check" so that we don't
+ // return from the race detection algorithm early.
+ static RawShadow FreedMarker() {
+ FastState fs;
+ fs.SetSid(kFreeSid);
+ fs.SetEpoch(kEpochLast);
+ Shadow s(fs, 0, 8, kAccessWrite);
+ return s.raw();
}
- bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
- bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
- DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
- (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
- return v;
+ static RawShadow FreedInfo(Sid sid, Epoch epoch) {
+ Shadow s;
+ s.part_.sid_ = sid;
+ s.part_.epoch_ = static_cast<u16>(epoch);
+ s.part_.access_ = kFreeAccess;
+ return s.raw();
}
private:
- static const u64 kReadShift = 5 + kClkBits;
- static const u64 kReadBit = 1ull << kReadShift;
- static const u64 kAtomicShift = 6 + kClkBits;
- static const u64 kAtomicBit = 1ull << kAtomicShift;
-
- u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
-
- static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
- if (s1.addr0() == s2.addr0())
- return true;
- if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
- return true;
- if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
- return true;
- return false;
- }
+ struct Parts {
+ u8 access_;
+ Sid sid_;
+ u16 epoch_ : kEpochBits;
+ u16 is_read_ : 1;
+ u16 is_atomic_ : 1;
+ };
+ union {
+ Parts part_;
+ u32 raw_;
+ };
+
+ static constexpr u8 kFreeAccess = 0x81;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ static constexpr uptr kAccessShift = 0;
+ static constexpr uptr kIsReadShift = 30;
+ static constexpr uptr kIsAtomicShift = 31;
+#else
+ static constexpr uptr kAccessShift = 24;
+ static constexpr uptr kIsReadShift = 1;
+ static constexpr uptr kIsAtomicShift = 0;
+#endif
+
+ public:
+ // .rodata shadow marker, see MapRodata and ContainsSameAccessFast.
+ static constexpr RawShadow kRodata =
+ static_cast<RawShadow>(1 << kIsReadShift);
};
-const RawShadow kShadowRodata = (RawShadow)-1; // .rodata shadow marker
+static_assert(sizeof(Shadow) == kShadowSize, "bad Shadow size");
} // namespace __tsan
diff --git a/libsanitizer/tsan/tsan_sync.cpp b/libsanitizer/tsan/tsan_sync.cpp
index f042aba..09d4178 100644
--- a/libsanitizer/tsan/tsan_sync.cpp
+++ b/libsanitizer/tsan/tsan_sync.cpp
@@ -18,43 +18,31 @@ namespace __tsan {
void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);
-SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); }
+SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(); }
-void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid,
- bool save_stack) {
+void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack) {
+ Reset();
this->addr = addr;
- this->uid = uid;
- this->next = 0;
-
- creation_stack_id = kInvalidStackID;
+ next = 0;
if (save_stack && !SANITIZER_GO) // Go does not use them
creation_stack_id = CurrentStackId(thr, pc);
if (common_flags()->detect_deadlocks)
DDMutexInit(thr, pc, this);
}
-void SyncVar::Reset(Processor *proc) {
- uid = 0;
+void SyncVar::Reset() {
+ CHECK(!ctx->resetting);
creation_stack_id = kInvalidStackID;
owner_tid = kInvalidTid;
- last_lock = 0;
+ last_lock.Reset();
recursion = 0;
atomic_store_relaxed(&flags, 0);
-
- if (proc == 0) {
- CHECK_EQ(clock.size(), 0);
- CHECK_EQ(read_clock.size(), 0);
- } else {
- clock.Reset(&proc->clock_cache);
- read_clock.Reset(&proc->clock_cache);
- }
+ Free(clock);
+ Free(read_clock);
}
MetaMap::MetaMap()
- : block_alloc_(LINKER_INITIALIZED, "heap block allocator"),
- sync_alloc_(LINKER_INITIALIZED, "sync allocator") {
- atomic_store(&uid_gen_, 0, memory_order_relaxed);
-}
+ : block_alloc_("heap block allocator"), sync_alloc_("sync allocator") {}
void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache);
@@ -68,16 +56,16 @@ void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
*meta = idx | kFlagBlock;
}
-uptr MetaMap::FreeBlock(Processor *proc, uptr p) {
+uptr MetaMap::FreeBlock(Processor *proc, uptr p, bool reset) {
MBlock* b = GetBlock(p);
if (b == 0)
return 0;
uptr sz = RoundUpTo(b->siz, kMetaShadowCell);
- FreeRange(proc, p, sz);
+ FreeRange(proc, p, sz, reset);
return sz;
}
-bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
+bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) {
bool has_something = false;
u32 *meta = MemToMeta(p);
u32 *end = MemToMeta(p + sz);
@@ -99,7 +87,8 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
DCHECK(idx & kFlagSync);
SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
u32 next = s->next;
- s->Reset(proc);
+ if (reset)
+ s->Reset();
sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask);
idx = next;
} else {
@@ -116,30 +105,30 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
// which can be huge. The function probes pages one-by-one until it finds a page
// without meta objects, at this point it stops freeing meta objects. Because
// thread stacks grow top-down, we do the same starting from end as well.
-void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
+void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) {
if (SANITIZER_GO) {
// UnmapOrDie/MmapFixedNoReserve does not work on Windows,
// so we do the optimization only for C/C++.
- FreeRange(proc, p, sz);
+ FreeRange(proc, p, sz, reset);
return;
}
const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
if (sz <= 4 * kPageSize) {
// If the range is small, just do the normal free procedure.
- FreeRange(proc, p, sz);
+ FreeRange(proc, p, sz, reset);
return;
}
// First, round both ends of the range to page size.
uptr diff = RoundUp(p, kPageSize) - p;
if (diff != 0) {
- FreeRange(proc, p, diff);
+ FreeRange(proc, p, diff, reset);
p += diff;
sz -= diff;
}
diff = p + sz - RoundDown(p + sz, kPageSize);
if (diff != 0) {
- FreeRange(proc, p + sz - diff, diff);
+ FreeRange(proc, p + sz - diff, diff, reset);
sz -= diff;
}
// Now we must have a non-empty page-aligned range.
@@ -150,7 +139,7 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
const uptr sz0 = sz;
// Probe start of the range.
for (uptr checked = 0; sz > 0; checked += kPageSize) {
- bool has_something = FreeRange(proc, p, kPageSize);
+ bool has_something = FreeRange(proc, p, kPageSize, reset);
p += kPageSize;
sz -= kPageSize;
if (!has_something && checked > (128 << 10))
@@ -158,7 +147,7 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
}
// Probe end of the range.
for (uptr checked = 0; sz > 0; checked += kPageSize) {
- bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize);
+ bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize, reset);
sz -= kPageSize;
// Stacks grow down, so sync object are most likely at the end of the region
// (if it is a stack). The very end of the stack is TLS and tsan increases
@@ -177,6 +166,27 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
Die();
}
+void MetaMap::ResetClocks() {
+ // This can be called from the background thread
+ // which does not have proc/cache.
+ // The cache is too large for stack.
+ static InternalAllocatorCache cache;
+ internal_memset(&cache, 0, sizeof(cache));
+ internal_allocator()->InitCache(&cache);
+ sync_alloc_.ForEach([&](SyncVar *s) {
+ if (s->clock) {
+ InternalFree(s->clock, &cache);
+ s->clock = nullptr;
+ }
+ if (s->read_clock) {
+ InternalFree(s->read_clock, &cache);
+ s->read_clock = nullptr;
+ }
+ s->last_lock.Reset();
+ });
+ internal_allocator()->DestroyCache(&cache);
+}
+
MBlock* MetaMap::GetBlock(uptr p) {
u32 *meta = MemToMeta(p);
u32 idx = *meta;
@@ -193,6 +203,7 @@ MBlock* MetaMap::GetBlock(uptr p) {
SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
bool save_stack) {
+ DCHECK(!create || thr->slot_locked);
u32 *meta = MemToMeta(addr);
u32 idx0 = *meta;
u32 myidx = 0;
@@ -203,7 +214,7 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
if (LIKELY(s->addr == addr)) {
if (UNLIKELY(myidx != 0)) {
- mys->Reset(thr->proc());
+ mys->Reset();
sync_alloc_.Free(&thr->proc()->sync_cache, myidx);
}
return s;
@@ -218,10 +229,9 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
}
if (LIKELY(myidx == 0)) {
- const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache);
mys = sync_alloc_.Map(myidx);
- mys->Init(thr, pc, addr, uid, save_stack);
+ mys->Init(thr, pc, addr, save_stack);
}
mys->next = idx0;
if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0,
diff --git a/libsanitizer/tsan/tsan_sync.h b/libsanitizer/tsan/tsan_sync.h
index fc8fa28..67d3c0b 100644
--- a/libsanitizer/tsan/tsan_sync.h
+++ b/libsanitizer/tsan/tsan_sync.h
@@ -16,8 +16,9 @@
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
#include "tsan_defs.h"
-#include "tsan_clock.h"
#include "tsan_dense_alloc.h"
+#include "tsan_shadow.h"
+#include "tsan_vector_clock.h"
namespace __tsan {
@@ -53,34 +54,18 @@ struct SyncVar {
uptr addr; // overwritten by DenseSlabAlloc freelist
Mutex mtx;
- u64 uid; // Globally unique id.
StackID creation_stack_id;
Tid owner_tid; // Set only by exclusive owners.
- u64 last_lock;
+ FastState last_lock;
int recursion;
atomic_uint32_t flags;
u32 next; // in MetaMap
DDMutex dd;
- SyncClock read_clock; // Used for rw mutexes only.
- // The clock is placed last, so that it is situated on a different cache line
- // with the mtx. This reduces contention for hot sync objects.
- SyncClock clock;
+ VectorClock *read_clock; // Used for rw mutexes only.
+ VectorClock *clock;
- void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack);
- void Reset(Processor *proc);
-
- u64 GetId() const {
- // 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits.
- return GetLsb((u64)addr | (uid << 48), 60);
- }
- bool CheckId(u64 uid) const {
- CHECK_EQ(uid, GetLsb(uid, 14));
- return GetLsb(this->uid, 14) == uid;
- }
- static uptr SplitId(u64 id, u64 *uid) {
- *uid = id >> 48;
- return (uptr)GetLsb(id, 48);
- }
+ void Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack);
+ void Reset();
bool IsFlagSet(u32 f) const {
return atomic_load_relaxed(&flags) & f;
@@ -110,9 +95,20 @@ class MetaMap {
MetaMap();
void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz);
- uptr FreeBlock(Processor *proc, uptr p);
- bool FreeRange(Processor *proc, uptr p, uptr sz);
- void ResetRange(Processor *proc, uptr p, uptr sz);
+
+ // FreeBlock resets all sync objects in the range if reset=true and must not
+ // run concurrently with ResetClocks which resets all sync objects
+ // w/o any synchronization (as part of DoReset).
+ // If we don't have a thread slot (very early/late in thread lifetime or
+ // Go/Java callbacks) or the slot is not locked, then reset must be set to
+ // false. In such case sync object clocks will be reset later (when it's
+ // reused or during the next ResetClocks).
+ uptr FreeBlock(Processor *proc, uptr p, bool reset);
+ bool FreeRange(Processor *proc, uptr p, uptr sz, bool reset);
+ void ResetRange(Processor *proc, uptr p, uptr sz, bool reset);
+ // Reset vector clocks of all sync objects.
+ // Must be called when no other threads access sync objects.
+ void ResetClocks();
MBlock* GetBlock(uptr p);
SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr,
@@ -142,7 +138,6 @@ class MetaMap {
typedef DenseSlabAlloc<SyncVar, 1 << 20, 1 << 10, kFlagMask> SyncAlloc;
BlockAlloc block_alloc_;
SyncAlloc sync_alloc_;
- atomic_uint64_t uid_gen_;
SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
bool save_stack);
diff --git a/libsanitizer/tsan/tsan_trace.h b/libsanitizer/tsan/tsan_trace.h
index a771ad9..01bb7b3 100644
--- a/libsanitizer/tsan/tsan_trace.h
+++ b/libsanitizer/tsan/tsan_trace.h
@@ -19,57 +19,6 @@
namespace __tsan {
-const int kTracePartSizeBits = 13;
-const int kTracePartSize = 1 << kTracePartSizeBits;
-const int kTraceParts = 2 * 1024 * 1024 / kTracePartSize;
-const int kTraceSize = kTracePartSize * kTraceParts;
-
-// Must fit into 3 bits.
-enum EventType {
- EventTypeMop,
- EventTypeFuncEnter,
- EventTypeFuncExit,
- EventTypeLock,
- EventTypeUnlock,
- EventTypeRLock,
- EventTypeRUnlock
-};
-
-// Represents a thread event (from most significant bit):
-// u64 typ : 3; // EventType.
-// u64 addr : 61; // Associated pc.
-typedef u64 Event;
-
-const uptr kEventPCBits = 61;
-
-struct TraceHeader {
-#if !SANITIZER_GO
- BufferedStackTrace stack0; // Start stack for the trace.
-#else
- VarSizeStackTrace stack0;
-#endif
- u64 epoch0; // Start epoch for the trace.
- MutexSet mset0;
-
- TraceHeader() : stack0(), epoch0() {}
-};
-
-struct Trace {
- Mutex mtx;
-#if !SANITIZER_GO
- // Must be last to catch overflow as paging fault.
- // Go shadow stack is dynamically allocated.
- uptr shadow_stack[kShadowStackSize];
-#endif
- // Must be the last field, because we unmap the unused part in
- // CreateThreadContext.
- TraceHeader headers[kTraceParts];
-
- Trace() : mtx(MutexTypeTrace) {}
-};
-
-namespace v3 {
-
enum class EventType : u64 {
kAccessExt,
kAccessRange,
@@ -99,6 +48,8 @@ static constexpr Event NopEvent = {1, 0, EventType::kAccessExt, 0};
// close enough to each other. Otherwise we fall back to EventAccessExt.
struct EventAccess {
static constexpr uptr kPCBits = 15;
+ static_assert(kPCBits + kCompressedAddrBits + 5 == 64,
+ "unused bits in EventAccess");
u64 is_access : 1; // = 1
u64 is_read : 1;
@@ -119,13 +70,23 @@ static_assert(sizeof(EventFunc) == 8, "bad EventFunc size");
// Extended memory access with full PC.
struct EventAccessExt {
+ // Note: precisely specifying the unused parts of the bitfield is critical for
+ // performance. If we don't specify them, compiler will generate code to load
+ // the old value and shuffle it to extract the unused bits to apply to the new
+ // value. If we specify the unused part and store 0 in there, all that
+ // unnecessary code goes away (store of the 0 const is combined with other
+ // constant parts).
+ static constexpr uptr kUnusedBits = 11;
+ static_assert(kCompressedAddrBits + kUnusedBits + 9 == 64,
+ "unused bits in EventAccessExt");
+
u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kAccessExt
u64 is_read : 1;
u64 is_atomic : 1;
u64 size_log : 2;
- u64 _ : 11;
+ u64 _ : kUnusedBits;
u64 addr : kCompressedAddrBits;
u64 pc;
};
@@ -134,6 +95,8 @@ static_assert(sizeof(EventAccessExt) == 16, "bad EventAccessExt size");
// Access to a memory range.
struct EventAccessRange {
static constexpr uptr kSizeLoBits = 13;
+ static_assert(kCompressedAddrBits + kSizeLoBits + 7 == 64,
+ "unused bits in EventAccessRange");
u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
@@ -150,6 +113,13 @@ static_assert(sizeof(EventAccessRange) == 16, "bad EventAccessRange size");
// Mutex lock.
struct EventLock {
static constexpr uptr kStackIDLoBits = 15;
+ static constexpr uptr kStackIDHiBits =
+ sizeof(StackID) * kByteBits - kStackIDLoBits;
+ static constexpr uptr kUnusedBits = 3;
+ static_assert(kCompressedAddrBits + kStackIDLoBits + 5 == 64,
+ "unused bits in EventLock");
+ static_assert(kCompressedAddrBits + kStackIDHiBits + kUnusedBits == 64,
+ "unused bits in EventLock");
u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
@@ -157,29 +127,37 @@ struct EventLock {
u64 pc : kCompressedAddrBits;
u64 stack_lo : kStackIDLoBits;
u64 stack_hi : sizeof(StackID) * kByteBits - kStackIDLoBits;
- u64 _ : 3;
+ u64 _ : kUnusedBits;
u64 addr : kCompressedAddrBits;
};
static_assert(sizeof(EventLock) == 16, "bad EventLock size");
// Mutex unlock.
struct EventUnlock {
+ static constexpr uptr kUnusedBits = 15;
+ static_assert(kCompressedAddrBits + kUnusedBits + 5 == 64,
+ "unused bits in EventUnlock");
+
u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kUnlock
- u64 _ : 15;
+ u64 _ : kUnusedBits;
u64 addr : kCompressedAddrBits;
};
static_assert(sizeof(EventUnlock) == 8, "bad EventUnlock size");
// Time change event.
struct EventTime {
+ static constexpr uptr kUnusedBits = 37;
+ static_assert(kUnusedBits + sizeof(Sid) * kByteBits + kEpochBits + 5 == 64,
+ "unused bits in EventTime");
+
u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kTime
u64 sid : sizeof(Sid) * kByteBits;
u64 epoch : kEpochBits;
- u64 _ : 64 - 5 - sizeof(Sid) * kByteBits - kEpochBits;
+ u64 _ : kUnusedBits;
};
static_assert(sizeof(EventTime) == 8, "bad EventTime size");
@@ -188,10 +166,12 @@ struct Trace;
struct TraceHeader {
Trace* trace = nullptr; // back-pointer to Trace containing this part
INode trace_parts; // in Trace::parts
+ INode global; // in Contex::trace_part_recycle
};
struct TracePart : TraceHeader {
- static constexpr uptr kByteSize = 256 << 10;
+ // There are a lot of goroutines in Go, so we use smaller parts.
+ static constexpr uptr kByteSize = (SANITIZER_GO ? 128 : 256) << 10;
static constexpr uptr kSize =
(kByteSize - sizeof(TraceHeader)) / sizeof(Event);
// TraceAcquire does a fast event pointer overflow check by comparing
@@ -209,13 +189,26 @@ static_assert(sizeof(TracePart) == TracePart::kByteSize, "bad TracePart size");
struct Trace {
Mutex mtx;
IList<TraceHeader, &TraceHeader::trace_parts, TracePart> parts;
- Event* final_pos =
- nullptr; // final position in the last part for finished threads
+ // First node non-queued into ctx->trace_part_recycle.
+ TracePart* local_head;
+ // Final position in the last part for finished threads.
+ Event* final_pos = nullptr;
+ // Number of trace parts allocated on behalf of this trace specifically.
+ // Total number of parts in this trace can be larger if we retake some
+ // parts from other traces.
+ uptr parts_allocated = 0;
Trace() : mtx(MutexTypeTrace) {}
-};
-} // namespace v3
+ // We need at least 3 parts per thread, because we want to keep at last
+ // 2 parts per thread that are not queued into ctx->trace_part_recycle
+ // (the current one being filled and one full part that ensures that
+ // we always have at least one part worth of previous memory accesses).
+ static constexpr uptr kMinParts = 3;
+
+ static constexpr uptr kFinishedThreadLo = 16;
+ static constexpr uptr kFinishedThreadHi = 64;
+};
} // namespace __tsan
diff --git a/libsanitizer/tsan/tsan_update_shadow_word.inc b/libsanitizer/tsan/tsan_update_shadow_word.inc
deleted file mode 100644
index a58ef0f..0000000
--- a/libsanitizer/tsan/tsan_update_shadow_word.inc
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-// Body of the hottest inner loop.
-// If we wrap this body into a function, compilers (both gcc and clang)
-// produce sligtly less efficient code.
-//===----------------------------------------------------------------------===//
-do {
- const unsigned kAccessSize = 1 << kAccessSizeLog;
- u64 *sp = &shadow_mem[idx];
- old = LoadShadow(sp);
- if (LIKELY(old.IsZero())) {
- if (!stored) {
- StoreIfNotYetStored(sp, &store_word);
- stored = true;
- }
- break;
- }
- // is the memory access equal to the previous?
- if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) {
- // same thread?
- if (LIKELY(Shadow::TidsAreEqual(old, cur))) {
- if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) {
- StoreIfNotYetStored(sp, &store_word);
- stored = true;
- }
- break;
- }
- if (HappensBefore(old, thr)) {
- if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) {
- StoreIfNotYetStored(sp, &store_word);
- stored = true;
- }
- break;
- }
- if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)))
- break;
- goto RACE;
- }
- // Do the memory access intersect?
- if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) {
- if (Shadow::TidsAreEqual(old, cur))
- break;
- if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))
- break;
- if (LIKELY(HappensBefore(old, thr)))
- break;
- goto RACE;
- }
- // The accesses do not intersect.
- break;
-} while (0);