diff options
author | Martin Liska <mliska@suse.cz> | 2022-05-03 12:56:26 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-05-04 11:00:48 +0200 |
commit | f732bf6a603721f61102a08ad2d023c7c2670870 (patch) | |
tree | d0d8dafedac59ab6d55b678e53afe19fdd113fba /libsanitizer/tsan | |
parent | e2285af309000b74da0f7dc756a0b55e5f0b1b56 (diff) | |
download | gcc-f732bf6a603721f61102a08ad2d023c7c2670870.zip gcc-f732bf6a603721f61102a08ad2d023c7c2670870.tar.gz gcc-f732bf6a603721f61102a08ad2d023c7c2670870.tar.bz2 |
libsanitizer: merge from upstream (0a1bcab9f3bf75c4c5d3e53bafb3eeb80320af46).
Diffstat (limited to 'libsanitizer/tsan')
40 files changed, 2619 insertions, 3561 deletions
diff --git a/libsanitizer/tsan/tsan_clock.cpp b/libsanitizer/tsan/tsan_clock.cpp deleted file mode 100644 index d122b67..0000000 --- a/libsanitizer/tsan/tsan_clock.cpp +++ /dev/null @@ -1,625 +0,0 @@ -//===-- tsan_clock.cpp ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -//===----------------------------------------------------------------------===// -#include "tsan_clock.h" -#include "tsan_rtl.h" -#include "sanitizer_common/sanitizer_placement_new.h" - -// SyncClock and ThreadClock implement vector clocks for sync variables -// (mutexes, atomic variables, file descriptors, etc) and threads, respectively. -// ThreadClock contains fixed-size vector clock for maximum number of threads. -// SyncClock contains growable vector clock for currently necessary number of -// threads. -// Together they implement very simple model of operations, namely: -// -// void ThreadClock::acquire(const SyncClock *src) { -// for (int i = 0; i < kMaxThreads; i++) -// clock[i] = max(clock[i], src->clock[i]); -// } -// -// void ThreadClock::release(SyncClock *dst) const { -// for (int i = 0; i < kMaxThreads; i++) -// dst->clock[i] = max(dst->clock[i], clock[i]); -// } -// -// void ThreadClock::releaseStoreAcquire(SyncClock *sc) const { -// for (int i = 0; i < kMaxThreads; i++) { -// tmp = clock[i]; -// clock[i] = max(clock[i], sc->clock[i]); -// sc->clock[i] = tmp; -// } -// } -// -// void ThreadClock::ReleaseStore(SyncClock *dst) const { -// for (int i = 0; i < kMaxThreads; i++) -// dst->clock[i] = clock[i]; -// } -// -// void ThreadClock::acq_rel(SyncClock *dst) { -// acquire(dst); -// release(dst); -// } -// -// Conformance to this model is extensively verified in tsan_clock_test.cpp. -// However, the implementation is significantly more complex. The complexity -// allows to implement important classes of use cases in O(1) instead of O(N). -// -// The use cases are: -// 1. Singleton/once atomic that has a single release-store operation followed -// by zillions of acquire-loads (the acquire-load is O(1)). -// 2. Thread-local mutex (both lock and unlock can be O(1)). -// 3. Leaf mutex (unlock is O(1)). -// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). -// 5. An atomic with a single writer (writes can be O(1)). -// The implementation dynamically adopts to workload. So if an atomic is in -// read-only phase, these reads will be O(1); if it later switches to read/write -// phase, the implementation will correctly handle that by switching to O(N). -// -// Thread-safety note: all const operations on SyncClock's are conducted under -// a shared lock; all non-const operations on SyncClock's are conducted under -// an exclusive lock; ThreadClock's are private to respective threads and so -// do not need any protection. -// -// Description of SyncClock state: -// clk_ - variable size vector clock, low kClkBits hold timestamp, -// the remaining bits hold "acquired" flag (the actual value is thread's -// reused counter); -// if acquired == thr->reused_, then the respective thread has already -// acquired this clock (except possibly for dirty elements). -// dirty_ - holds up to two indices in the vector clock that other threads -// need to acquire regardless of "acquired" flag value; -// release_store_tid_ - denotes that the clock state is a result of -// release-store operation by the thread with release_store_tid_ index. -// release_store_reused_ - reuse count of release_store_tid_. - -namespace __tsan { - -static atomic_uint32_t *ref_ptr(ClockBlock *cb) { - return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]); -} - -// Drop reference to the first level block idx. -static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) { - ClockBlock *cb = ctx->clock_alloc.Map(idx); - atomic_uint32_t *ref = ref_ptr(cb); - u32 v = atomic_load(ref, memory_order_acquire); - for (;;) { - CHECK_GT(v, 0); - if (v == 1) - break; - if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel)) - return; - } - // First level block owns second level blocks, so them as well. - for (uptr i = 0; i < blocks; i++) - ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]); - ctx->clock_alloc.Free(c, idx); -} - -ThreadClock::ThreadClock(unsigned tid, unsigned reused) - : tid_(tid) - , reused_(reused + 1) // 0 has special meaning - , last_acquire_() - , global_acquire_() - , cached_idx_() - , cached_size_() - , cached_blocks_() { - CHECK_LT(tid, kMaxTidInClock); - CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); - nclk_ = tid_ + 1; - internal_memset(clk_, 0, sizeof(clk_)); -} - -void ThreadClock::ResetCached(ClockCache *c) { - if (cached_idx_) { - UnrefClockBlock(c, cached_idx_, cached_blocks_); - cached_idx_ = 0; - cached_size_ = 0; - cached_blocks_ = 0; - } -} - -void ThreadClock::acquire(ClockCache *c, SyncClock *src) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(src->size_, kMaxTid); - - // Check if it's empty -> no need to do anything. - const uptr nclk = src->size_; - if (nclk == 0) - return; - - bool acquired = false; - for (unsigned i = 0; i < kDirtyTids; i++) { - SyncClock::Dirty dirty = src->dirty_[i]; - unsigned tid = dirty.tid(); - if (tid != kInvalidTid) { - if (clk_[tid] < dirty.epoch) { - clk_[tid] = dirty.epoch; - acquired = true; - } - } - } - - // Check if we've already acquired src after the last release operation on src - if (tid_ >= nclk || src->elem(tid_).reused != reused_) { - // O(N) acquire. - nclk_ = max(nclk_, nclk); - u64 *dst_pos = &clk_[0]; - for (ClockElem &src_elem : *src) { - u64 epoch = src_elem.epoch; - if (*dst_pos < epoch) { - *dst_pos = epoch; - acquired = true; - } - dst_pos++; - } - - // Remember that this thread has acquired this clock. - if (nclk > tid_) - src->elem(tid_).reused = reused_; - } - - if (acquired) { - last_acquire_ = clk_[tid_]; - ResetCached(c); - } -} - -void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(sc->size_, kMaxTid); - - if (sc->size_ == 0) { - // ReleaseStore will correctly set release_store_tid_, - // which can be important for future operations. - ReleaseStore(c, sc); - return; - } - - nclk_ = max(nclk_, (uptr) sc->size_); - - // Check if we need to resize sc. - if (sc->size_ < nclk_) - sc->Resize(c, nclk_); - - bool acquired = false; - - sc->Unshare(c); - // Update sc->clk_. - sc->FlushDirty(); - uptr i = 0; - for (ClockElem &ce : *sc) { - u64 tmp = clk_[i]; - if (clk_[i] < ce.epoch) { - clk_[i] = ce.epoch; - acquired = true; - } - ce.epoch = tmp; - ce.reused = 0; - i++; - } - sc->release_store_tid_ = kInvalidTid; - sc->release_store_reused_ = 0; - - if (acquired) { - last_acquire_ = clk_[tid_]; - ResetCached(c); - } -} - -void ThreadClock::release(ClockCache *c, SyncClock *dst) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(dst->size_, kMaxTid); - - if (dst->size_ == 0) { - // ReleaseStore will correctly set release_store_tid_, - // which can be important for future operations. - ReleaseStore(c, dst); - return; - } - - // Check if we need to resize dst. - if (dst->size_ < nclk_) - dst->Resize(c, nclk_); - - // Check if we had not acquired anything from other threads - // since the last release on dst. If so, we need to update - // only dst->elem(tid_). - if (!HasAcquiredAfterRelease(dst)) { - UpdateCurrentThread(c, dst); - if (dst->release_store_tid_ != tid_ || - dst->release_store_reused_ != reused_) - dst->release_store_tid_ = kInvalidTid; - return; - } - - // O(N) release. - dst->Unshare(c); - // First, remember whether we've acquired dst. - bool acquired = IsAlreadyAcquired(dst); - // Update dst->clk_. - dst->FlushDirty(); - uptr i = 0; - for (ClockElem &ce : *dst) { - ce.epoch = max(ce.epoch, clk_[i]); - ce.reused = 0; - i++; - } - // Clear 'acquired' flag in the remaining elements. - dst->release_store_tid_ = kInvalidTid; - dst->release_store_reused_ = 0; - // If we've acquired dst, remember this fact, - // so that we don't need to acquire it on next acquire. - if (acquired) - dst->elem(tid_).reused = reused_; -} - -void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(dst->size_, kMaxTid); - - if (dst->size_ == 0 && cached_idx_ != 0) { - // Reuse the cached clock. - // Note: we could reuse/cache the cached clock in more cases: - // we could update the existing clock and cache it, or replace it with the - // currently cached clock and release the old one. And for a shared - // existing clock, we could replace it with the currently cached; - // or unshare, update and cache. But, for simplicity, we currently reuse - // cached clock only when the target clock is empty. - dst->tab_ = ctx->clock_alloc.Map(cached_idx_); - dst->tab_idx_ = cached_idx_; - dst->size_ = cached_size_; - dst->blocks_ = cached_blocks_; - CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid); - // The cached clock is shared (immutable), - // so this is where we store the current clock. - dst->dirty_[0].set_tid(tid_); - dst->dirty_[0].epoch = clk_[tid_]; - dst->release_store_tid_ = tid_; - dst->release_store_reused_ = reused_; - // Remember that we don't need to acquire it in future. - dst->elem(tid_).reused = reused_; - // Grab a reference. - atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed); - return; - } - - // Check if we need to resize dst. - if (dst->size_ < nclk_) - dst->Resize(c, nclk_); - - if (dst->release_store_tid_ == tid_ && - dst->release_store_reused_ == reused_ && - !HasAcquiredAfterRelease(dst)) { - UpdateCurrentThread(c, dst); - return; - } - - // O(N) release-store. - dst->Unshare(c); - // Note: dst can be larger than this ThreadClock. - // This is fine since clk_ beyond size is all zeros. - uptr i = 0; - for (ClockElem &ce : *dst) { - ce.epoch = clk_[i]; - ce.reused = 0; - i++; - } - for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid); - dst->release_store_tid_ = tid_; - dst->release_store_reused_ = reused_; - // Remember that we don't need to acquire it in future. - dst->elem(tid_).reused = reused_; - - // If the resulting clock is cachable, cache it for future release operations. - // The clock is always cachable if we released to an empty sync object. - if (cached_idx_ == 0 && dst->Cachable()) { - // Grab a reference to the ClockBlock. - atomic_uint32_t *ref = ref_ptr(dst->tab_); - if (atomic_load(ref, memory_order_acquire) == 1) - atomic_store_relaxed(ref, 2); - else - atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed); - cached_idx_ = dst->tab_idx_; - cached_size_ = dst->size_; - cached_blocks_ = dst->blocks_; - } -} - -void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { - acquire(c, dst); - ReleaseStore(c, dst); -} - -// Updates only single element related to the current thread in dst->clk_. -void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const { - // Update the threads time, but preserve 'acquired' flag. - for (unsigned i = 0; i < kDirtyTids; i++) { - SyncClock::Dirty *dirty = &dst->dirty_[i]; - const unsigned tid = dirty->tid(); - if (tid == tid_ || tid == kInvalidTid) { - dirty->set_tid(tid_); - dirty->epoch = clk_[tid_]; - return; - } - } - // Reset all 'acquired' flags, O(N). - // We are going to touch dst elements, so we need to unshare it. - dst->Unshare(c); - dst->elem(tid_).epoch = clk_[tid_]; - for (uptr i = 0; i < dst->size_; i++) - dst->elem(i).reused = 0; - dst->FlushDirty(); -} - -// Checks whether the current thread has already acquired src. -bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { - if (src->elem(tid_).reused != reused_) - return false; - for (unsigned i = 0; i < kDirtyTids; i++) { - SyncClock::Dirty dirty = src->dirty_[i]; - if (dirty.tid() != kInvalidTid) { - if (clk_[dirty.tid()] < dirty.epoch) - return false; - } - } - return true; -} - -// Checks whether the current thread has acquired anything -// from other clocks after releasing to dst (directly or indirectly). -bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const { - const u64 my_epoch = dst->elem(tid_).epoch; - return my_epoch <= last_acquire_ || - my_epoch <= atomic_load_relaxed(&global_acquire_); -} - -// Sets a single element in the vector clock. -// This function is called only from weird places like AcquireGlobal. -void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) { - DCHECK_LT(tid, kMaxTid); - DCHECK_GE(v, clk_[tid]); - clk_[tid] = v; - if (nclk_ <= tid) - nclk_ = tid + 1; - last_acquire_ = clk_[tid_]; - ResetCached(c); -} - -void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { - printf("clock=["); - for (uptr i = 0; i < nclk_; i++) - printf("%s%llu", i == 0 ? "" : ",", clk_[i]); - printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_); -} - -SyncClock::SyncClock() { - ResetImpl(); -} - -SyncClock::~SyncClock() { - // Reset must be called before dtor. - CHECK_EQ(size_, 0); - CHECK_EQ(blocks_, 0); - CHECK_EQ(tab_, 0); - CHECK_EQ(tab_idx_, 0); -} - -void SyncClock::Reset(ClockCache *c) { - if (size_) - UnrefClockBlock(c, tab_idx_, blocks_); - ResetImpl(); -} - -void SyncClock::ResetImpl() { - tab_ = 0; - tab_idx_ = 0; - size_ = 0; - blocks_ = 0; - release_store_tid_ = kInvalidTid; - release_store_reused_ = 0; - for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid); -} - -void SyncClock::Resize(ClockCache *c, uptr nclk) { - Unshare(c); - if (nclk <= capacity()) { - // Memory is already allocated, just increase the size. - size_ = nclk; - return; - } - if (size_ == 0) { - // Grow from 0 to one-level table. - CHECK_EQ(size_, 0); - CHECK_EQ(blocks_, 0); - CHECK_EQ(tab_, 0); - CHECK_EQ(tab_idx_, 0); - tab_idx_ = ctx->clock_alloc.Alloc(c); - tab_ = ctx->clock_alloc.Map(tab_idx_); - internal_memset(tab_, 0, sizeof(*tab_)); - atomic_store_relaxed(ref_ptr(tab_), 1); - size_ = 1; - } else if (size_ > blocks_ * ClockBlock::kClockCount) { - u32 idx = ctx->clock_alloc.Alloc(c); - ClockBlock *new_cb = ctx->clock_alloc.Map(idx); - uptr top = size_ - blocks_ * ClockBlock::kClockCount; - CHECK_LT(top, ClockBlock::kClockCount); - const uptr move = top * sizeof(tab_->clock[0]); - internal_memcpy(&new_cb->clock[0], tab_->clock, move); - internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move); - internal_memset(tab_->clock, 0, move); - append_block(idx); - } - // At this point we have first level table allocated and all clock elements - // are evacuated from it to a second level block. - // Add second level tables as necessary. - while (nclk > capacity()) { - u32 idx = ctx->clock_alloc.Alloc(c); - ClockBlock *cb = ctx->clock_alloc.Map(idx); - internal_memset(cb, 0, sizeof(*cb)); - append_block(idx); - } - size_ = nclk; -} - -// Flushes all dirty elements into the main clock array. -void SyncClock::FlushDirty() { - for (unsigned i = 0; i < kDirtyTids; i++) { - Dirty *dirty = &dirty_[i]; - if (dirty->tid() != kInvalidTid) { - CHECK_LT(dirty->tid(), size_); - elem(dirty->tid()).epoch = dirty->epoch; - dirty->set_tid(kInvalidTid); - } - } -} - -bool SyncClock::IsShared() const { - if (size_ == 0) - return false; - atomic_uint32_t *ref = ref_ptr(tab_); - u32 v = atomic_load(ref, memory_order_acquire); - CHECK_GT(v, 0); - return v > 1; -} - -// Unshares the current clock if it's shared. -// Shared clocks are immutable, so they need to be unshared before any updates. -// Note: this does not apply to dirty entries as they are not shared. -void SyncClock::Unshare(ClockCache *c) { - if (!IsShared()) - return; - // First, copy current state into old. - SyncClock old; - old.tab_ = tab_; - old.tab_idx_ = tab_idx_; - old.size_ = size_; - old.blocks_ = blocks_; - old.release_store_tid_ = release_store_tid_; - old.release_store_reused_ = release_store_reused_; - for (unsigned i = 0; i < kDirtyTids; i++) - old.dirty_[i] = dirty_[i]; - // Then, clear current object. - ResetImpl(); - // Allocate brand new clock in the current object. - Resize(c, old.size_); - // Now copy state back into this object. - Iter old_iter(&old); - for (ClockElem &ce : *this) { - ce = *old_iter; - ++old_iter; - } - release_store_tid_ = old.release_store_tid_; - release_store_reused_ = old.release_store_reused_; - for (unsigned i = 0; i < kDirtyTids; i++) - dirty_[i] = old.dirty_[i]; - // Drop reference to old and delete if necessary. - old.Reset(c); -} - -// Can we cache this clock for future release operations? -ALWAYS_INLINE bool SyncClock::Cachable() const { - if (size_ == 0) - return false; - for (unsigned i = 0; i < kDirtyTids; i++) { - if (dirty_[i].tid() != kInvalidTid) - return false; - } - return atomic_load_relaxed(ref_ptr(tab_)) == 1; -} - -// elem linearizes the two-level structure into linear array. -// Note: this is used only for one time accesses, vector operations use -// the iterator as it is much faster. -ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const { - DCHECK_LT(tid, size_); - const uptr block = tid / ClockBlock::kClockCount; - DCHECK_LE(block, blocks_); - tid %= ClockBlock::kClockCount; - if (block == blocks_) - return tab_->clock[tid]; - u32 idx = get_block(block); - ClockBlock *cb = ctx->clock_alloc.Map(idx); - return cb->clock[tid]; -} - -ALWAYS_INLINE uptr SyncClock::capacity() const { - if (size_ == 0) - return 0; - uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]); - // How many clock elements we can fit into the first level block. - // +1 for ref counter. - uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio; - return blocks_ * ClockBlock::kClockCount + top; -} - -ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const { - DCHECK(size_); - DCHECK_LT(bi, blocks_); - return tab_->table[ClockBlock::kBlockIdx - bi]; -} - -ALWAYS_INLINE void SyncClock::append_block(u32 idx) { - uptr bi = blocks_++; - CHECK_EQ(get_block(bi), 0); - tab_->table[ClockBlock::kBlockIdx - bi] = idx; -} - -// Used only by tests. -u64 SyncClock::get(unsigned tid) const { - for (unsigned i = 0; i < kDirtyTids; i++) { - Dirty dirty = dirty_[i]; - if (dirty.tid() == tid) - return dirty.epoch; - } - return elem(tid).epoch; -} - -// Used only by Iter test. -u64 SyncClock::get_clean(unsigned tid) const { - return elem(tid).epoch; -} - -void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { - printf("clock=["); - for (uptr i = 0; i < size_; i++) - printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); - printf("] reused=["); - for (uptr i = 0; i < size_; i++) - printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); - printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]", - release_store_tid_, release_store_reused_, dirty_[0].tid(), - dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch); -} - -void SyncClock::Iter::Next() { - // Finished with the current block, move on to the next one. - block_++; - if (block_ < parent_->blocks_) { - // Iterate over the next second level block. - u32 idx = parent_->get_block(block_); - ClockBlock *cb = ctx->clock_alloc.Map(idx); - pos_ = &cb->clock[0]; - end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount, - ClockBlock::kClockCount); - return; - } - if (block_ == parent_->blocks_ && - parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) { - // Iterate over elements in the first level block. - pos_ = &parent_->tab_->clock[0]; - end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount, - ClockBlock::kClockCount); - return; - } - parent_ = nullptr; // denotes end -} -} // namespace __tsan diff --git a/libsanitizer/tsan/tsan_clock.h b/libsanitizer/tsan/tsan_clock.h deleted file mode 100644 index 11cbc0c..0000000 --- a/libsanitizer/tsan/tsan_clock.h +++ /dev/null @@ -1,293 +0,0 @@ -//===-- tsan_clock.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -//===----------------------------------------------------------------------===// -#ifndef TSAN_CLOCK_H -#define TSAN_CLOCK_H - -#include "tsan_defs.h" -#include "tsan_dense_alloc.h" - -namespace __tsan { - -typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc; -typedef DenseSlabAllocCache ClockCache; - -// The clock that lives in sync variables (mutexes, atomics, etc). -class SyncClock { - public: - SyncClock(); - ~SyncClock(); - - uptr size() const; - - // These are used only in tests. - u64 get(unsigned tid) const; - u64 get_clean(unsigned tid) const; - - void Resize(ClockCache *c, uptr nclk); - void Reset(ClockCache *c); - - void DebugDump(int(*printf)(const char *s, ...)); - - // Clock element iterator. - // Note: it iterates only over the table without regard to dirty entries. - class Iter { - public: - explicit Iter(SyncClock* parent); - Iter& operator++(); - bool operator!=(const Iter& other); - ClockElem &operator*(); - - private: - SyncClock *parent_; - // [pos_, end_) is the current continuous range of clock elements. - ClockElem *pos_; - ClockElem *end_; - int block_; // Current number of second level block. - - NOINLINE void Next(); - }; - - Iter begin(); - Iter end(); - - private: - friend class ThreadClock; - friend class Iter; - static const uptr kDirtyTids = 2; - - struct Dirty { - u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; } - void set_tid(u32 tid) { - tid_ = tid == kInvalidTid ? kShortInvalidTid : tid; - } - u64 epoch : kClkBits; - - private: - // Full kInvalidTid won't fit into Dirty::tid. - static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1; - u64 tid_ : 64 - kClkBits; // kInvalidId if not active - }; - - static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit"); - - unsigned release_store_tid_; - unsigned release_store_reused_; - Dirty dirty_[kDirtyTids]; - // If size_ is 0, tab_ is nullptr. - // If size <= 64 (kClockCount), tab_ contains pointer to an array with - // 64 ClockElem's (ClockBlock::clock). - // Otherwise, tab_ points to an array with up to 127 u32 elements, - // each pointing to the second-level 512b block with 64 ClockElem's. - // Unused space in the first level ClockBlock is used to store additional - // clock elements. - // The last u32 element in the first level ClockBlock is always used as - // reference counter. - // - // See the following scheme for details. - // All memory blocks are 512 bytes (allocated from ClockAlloc). - // Clock (clk) elements are 64 bits. - // Idx and ref are 32 bits. - // - // tab_ - // | - // \/ - // +----------------------------------------------------+ - // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref | - // +----------------------------------------------------+ - // | | - // | \/ - // | +----------------+ - // | | clk0 ... clk63 | - // | +----------------+ - // \/ - // +------------------+ - // | clk64 ... clk127 | - // +------------------+ - // - // Note: dirty entries, if active, always override what's stored in the clock. - ClockBlock *tab_; - u32 tab_idx_; - u16 size_; - u16 blocks_; // Number of second level blocks. - - void Unshare(ClockCache *c); - bool IsShared() const; - bool Cachable() const; - void ResetImpl(); - void FlushDirty(); - uptr capacity() const; - u32 get_block(uptr bi) const; - void append_block(u32 idx); - ClockElem &elem(unsigned tid) const; -}; - -// The clock that lives in threads. -class ThreadClock { - public: - typedef DenseSlabAllocCache Cache; - - explicit ThreadClock(unsigned tid, unsigned reused = 0); - - u64 get(unsigned tid) const; - void set(ClockCache *c, unsigned tid, u64 v); - void set(u64 v); - void tick(); - uptr size() const; - - void acquire(ClockCache *c, SyncClock *src); - void releaseStoreAcquire(ClockCache *c, SyncClock *src); - void release(ClockCache *c, SyncClock *dst); - void acq_rel(ClockCache *c, SyncClock *dst); - void ReleaseStore(ClockCache *c, SyncClock *dst); - void ResetCached(ClockCache *c); - void NoteGlobalAcquire(u64 v); - - void DebugReset(); - void DebugDump(int(*printf)(const char *s, ...)); - - private: - static const uptr kDirtyTids = SyncClock::kDirtyTids; - // Index of the thread associated with he clock ("current thread"). - const unsigned tid_; - const unsigned reused_; // tid_ reuse count. - // Current thread time when it acquired something from other threads. - u64 last_acquire_; - - // Last time another thread has done a global acquire of this thread's clock. - // It helps to avoid problem described in: - // https://github.com/golang/go/issues/39186 - // See test/tsan/java_finalizer2.cpp for a regression test. - // Note the failuire is _extremely_ hard to hit, so if you are trying - // to reproduce it, you may want to run something like: - // $ go get golang.org/x/tools/cmd/stress - // $ stress -p=64 ./a.out - // - // The crux of the problem is roughly as follows. - // A number of O(1) optimizations in the clocks algorithm assume proper - // transitive cumulative propagation of clock values. The AcquireGlobal - // operation may produce an inconsistent non-linearazable view of - // thread clocks. Namely, it may acquire a later value from a thread - // with a higher ID, but fail to acquire an earlier value from a thread - // with a lower ID. If a thread that executed AcquireGlobal then releases - // to a sync clock, it will spoil the sync clock with the inconsistent - // values. If another thread later releases to the sync clock, the optimized - // algorithm may break. - // - // The exact sequence of events that leads to the failure. - // - thread 1 executes AcquireGlobal - // - thread 1 acquires value 1 for thread 2 - // - thread 2 increments clock to 2 - // - thread 2 releases to sync object 1 - // - thread 3 at time 1 - // - thread 3 acquires from sync object 1 - // - thread 3 increments clock to 2 - // - thread 1 acquires value 2 for thread 3 - // - thread 1 releases to sync object 2 - // - sync object 2 clock has 1 for thread 2 and 2 for thread 3 - // - thread 3 releases to sync object 2 - // - thread 3 sees value 2 in the clock for itself - // and decides that it has already released to the clock - // and did not acquire anything from other threads after that - // (the last_acquire_ check in release operation) - // - thread 3 does not update the value for thread 2 in the clock from 1 to 2 - // - thread 4 acquires from sync object 2 - // - thread 4 detects a false race with thread 2 - // as it should have been synchronized with thread 2 up to time 2, - // but because of the broken clock it is now synchronized only up to time 1 - // - // The global_acquire_ value helps to prevent this scenario. - // Namely, thread 3 will not trust any own clock values up to global_acquire_ - // for the purposes of the last_acquire_ optimization. - atomic_uint64_t global_acquire_; - - // Cached SyncClock (without dirty entries and release_store_tid_). - // We reuse it for subsequent store-release operations without intervening - // acquire operations. Since it is shared (and thus constant), clock value - // for the current thread is then stored in dirty entries in the SyncClock. - // We host a reference to the table while it is cached here. - u32 cached_idx_; - u16 cached_size_; - u16 cached_blocks_; - - // Number of active elements in the clk_ table (the rest is zeros). - uptr nclk_; - u64 clk_[kMaxTidInClock]; // Fixed size vector clock. - - bool IsAlreadyAcquired(const SyncClock *src) const; - bool HasAcquiredAfterRelease(const SyncClock *dst) const; - void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const; -}; - -ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const { - DCHECK_LT(tid, kMaxTidInClock); - return clk_[tid]; -} - -ALWAYS_INLINE void ThreadClock::set(u64 v) { - DCHECK_GE(v, clk_[tid_]); - clk_[tid_] = v; -} - -ALWAYS_INLINE void ThreadClock::tick() { - clk_[tid_]++; -} - -ALWAYS_INLINE uptr ThreadClock::size() const { - return nclk_; -} - -ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) { - // Here we rely on the fact that AcquireGlobal is protected by - // ThreadRegistryLock, thus only one thread at a time executes it - // and values passed to this function should not go backwards. - CHECK_LE(atomic_load_relaxed(&global_acquire_), v); - atomic_store_relaxed(&global_acquire_, v); -} - -ALWAYS_INLINE SyncClock::Iter SyncClock::begin() { - return Iter(this); -} - -ALWAYS_INLINE SyncClock::Iter SyncClock::end() { - return Iter(nullptr); -} - -ALWAYS_INLINE uptr SyncClock::size() const { - return size_; -} - -ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent) - : parent_(parent) - , pos_(nullptr) - , end_(nullptr) - , block_(-1) { - if (parent) - Next(); -} - -ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() { - pos_++; - if (UNLIKELY(pos_ >= end_)) - Next(); - return *this; -} - -ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) { - return parent_ != other.parent_; -} - -ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() { - return *pos_; -} -} // namespace __tsan - -#endif // TSAN_CLOCK_H diff --git a/libsanitizer/tsan/tsan_debugging.cpp b/libsanitizer/tsan/tsan_debugging.cpp index 1d3c384..1e61c31 100644 --- a/libsanitizer/tsan/tsan_debugging.cpp +++ b/libsanitizer/tsan/tsan_debugging.cpp @@ -157,7 +157,7 @@ int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr, ReportMutex *mutex = rep->mutexes[idx]; *mutex_id = mutex->id; *addr = (void *)mutex->addr; - *destroyed = mutex->destroyed; + *destroyed = false; if (mutex->stack) CopyTrace(mutex->stack->frames, trace, trace_size); return 1; } diff --git a/libsanitizer/tsan/tsan_defs.h b/libsanitizer/tsan/tsan_defs.h index fe0c1da..1ffa3d6 100644 --- a/libsanitizer/tsan/tsan_defs.h +++ b/libsanitizer/tsan/tsan_defs.h @@ -63,41 +63,14 @@ enum class Epoch : u16 {}; constexpr uptr kEpochBits = 14; constexpr Epoch kEpochZero = static_cast<Epoch>(0); constexpr Epoch kEpochOver = static_cast<Epoch>(1 << kEpochBits); +constexpr Epoch kEpochLast = static_cast<Epoch>((1 << kEpochBits) - 1); -const int kClkBits = 42; -const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; - -struct ClockElem { - u64 epoch : kClkBits; - u64 reused : 64 - kClkBits; // tid reuse count -}; - -struct ClockBlock { - static const uptr kSize = 512; - static const uptr kTableSize = kSize / sizeof(u32); - static const uptr kClockCount = kSize / sizeof(ClockElem); - static const uptr kRefIdx = kTableSize - 1; - static const uptr kBlockIdx = kTableSize - 2; - - union { - u32 table[kTableSize]; - ClockElem clock[kClockCount]; - }; +inline Epoch EpochInc(Epoch epoch) { + return static_cast<Epoch>(static_cast<u16>(epoch) + 1); +} - ClockBlock() { - } -}; +inline bool EpochOverflow(Epoch epoch) { return epoch == kEpochOver; } -const int kTidBits = 13; -// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is -// occupied by reference counter, so total number of elements we can store -// in SyncClock is kClockCount * (kTableSize - 1). -const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount; -#if !SANITIZER_GO -const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit. -#else -const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory. -#endif const uptr kShadowStackSize = 64 * 1024; // Count of shadow values in a shadow cell. @@ -107,7 +80,7 @@ const uptr kShadowCnt = 4; const uptr kShadowCell = 8; // Single shadow value. -typedef u64 RawShadow; +enum class RawShadow : u32 {}; const uptr kShadowSize = sizeof(RawShadow); // Shadow memory is kShadowMultiplier times larger than user memory. @@ -184,10 +157,13 @@ MD5Hash md5_hash(const void *data, uptr size); struct Processor; struct ThreadState; class ThreadContext; +struct TidSlot; struct Context; struct ReportStack; class ReportDesc; class RegionAlloc; +struct Trace; +struct TracePart; typedef uptr AccessType; @@ -198,6 +174,9 @@ enum : AccessType { kAccessVptr = 1 << 2, // read or write of an object virtual table pointer kAccessFree = 1 << 3, // synthetic memory access during memory freeing kAccessExternalPC = 1 << 4, // access PC can have kExternalPCBit set + kAccessCheckOnly = 1 << 5, // check for races, but don't store + kAccessNoRodata = 1 << 6, // don't check for .rodata marker + kAccessSlotLocked = 1 << 7, // memory access with TidSlot locked }; // Descriptor of user's memory block. @@ -219,15 +198,18 @@ enum ExternalTag : uptr { // as 16-bit values, see tsan_defs.h. }; -enum MutexType { - MutexTypeTrace = MutexLastCommon, - MutexTypeReport, +enum { + MutexTypeReport = MutexLastCommon, MutexTypeSyncVar, MutexTypeAnnotations, MutexTypeAtExit, MutexTypeFired, MutexTypeRacy, MutexTypeGlobalProc, + MutexTypeInternalAlloc, + MutexTypeTrace, + MutexTypeSlot, + MutexTypeSlots, }; } // namespace __tsan diff --git a/libsanitizer/tsan/tsan_dense_alloc.h b/libsanitizer/tsan/tsan_dense_alloc.h index 9e15f74..7a39a39 100644 --- a/libsanitizer/tsan/tsan_dense_alloc.h +++ b/libsanitizer/tsan/tsan_dense_alloc.h @@ -104,6 +104,15 @@ class DenseSlabAlloc { return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T); } + template <typename Func> + void ForEach(Func func) { + SpinMutexLock lock(&mtx_); + uptr fillpos = atomic_load_relaxed(&fillpos_); + for (uptr l1 = 0; l1 < fillpos; l1++) { + for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]); + } + } + private: T *map_[kL1Size]; SpinMutex mtx_; diff --git a/libsanitizer/tsan/tsan_fd.cpp b/libsanitizer/tsan/tsan_fd.cpp index 255ffa8..cf8f491 100644 --- a/libsanitizer/tsan/tsan_fd.cpp +++ b/libsanitizer/tsan/tsan_fd.cpp @@ -11,9 +11,12 @@ //===----------------------------------------------------------------------===// #include "tsan_fd.h" -#include "tsan_rtl.h" + #include <sanitizer_common/sanitizer_atomic.h> +#include "tsan_interceptors.h" +#include "tsan_rtl.h" + namespace __tsan { const int kTableSizeL1 = 1024; @@ -26,6 +29,9 @@ struct FdSync { struct FdDesc { FdSync *sync; + // This is used to establish write -> epoll_wait synchronization + // where epoll_wait receives notification about the write. + atomic_uintptr_t aux_sync; // FdSync* Tid creation_tid; StackID creation_stack; }; @@ -100,6 +106,10 @@ static void init(ThreadState *thr, uptr pc, int fd, FdSync *s, unref(thr, pc, d->sync); d->sync = 0; } + unref(thr, pc, + reinterpret_cast<FdSync *>( + atomic_load(&d->aux_sync, memory_order_relaxed))); + atomic_store(&d->aux_sync, 0, memory_order_relaxed); if (flags()->io_sync == 0) { unref(thr, pc, s); } else if (flags()->io_sync == 1) { @@ -110,12 +120,17 @@ static void init(ThreadState *thr, uptr pc, int fd, FdSync *s, } d->creation_tid = thr->tid; d->creation_stack = CurrentStackId(thr, pc); + // This prevents false positives on fd_close_norace3.cpp test. + // The mechanics of the false positive are not completely clear, + // but it happens only if global reset is enabled (flush_memory_ms=1) + // and may be related to lost writes during asynchronous MADV_DONTNEED. + SlotLocker locker(thr); if (write) { // To catch races between fd usage and open. MemoryRangeImitateWrite(thr, pc, (uptr)d, 8); } else { // See the dup-related comment in FdClose. - MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead); + MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead | kAccessSlotLocked); } } @@ -177,6 +192,8 @@ void FdRelease(ThreadState *thr, uptr pc, int fd) { MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead); if (s) Release(thr, pc, (uptr)s); + if (uptr aux_sync = atomic_load(&d->aux_sync, memory_order_acquire)) + Release(thr, pc, aux_sync); } void FdAccess(ThreadState *thr, uptr pc, int fd) { @@ -192,25 +209,39 @@ void FdClose(ThreadState *thr, uptr pc, int fd, bool write) { if (bogusfd(fd)) return; FdDesc *d = fddesc(thr, pc, fd); - if (write) { - // To catch races between fd usage and close. - MemoryAccess(thr, pc, (uptr)d, 8, kAccessWrite); - } else { - // This path is used only by dup2/dup3 calls. - // We do read instead of write because there is a number of legitimate - // cases where write would lead to false positives: - // 1. Some software dups a closed pipe in place of a socket before closing - // the socket (to prevent races actually). - // 2. Some daemons dup /dev/null in place of stdin/stdout. - // On the other hand we have not seen cases when write here catches real - // bugs. - MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead); + { + // Need to lock the slot to make MemoryAccess and MemoryResetRange atomic + // with respect to global reset. See the comment in MemoryRangeFreed. + SlotLocker locker(thr); + if (!MustIgnoreInterceptor(thr)) { + if (write) { + // To catch races between fd usage and close. + MemoryAccess(thr, pc, (uptr)d, 8, + kAccessWrite | kAccessCheckOnly | kAccessSlotLocked); + } else { + // This path is used only by dup2/dup3 calls. + // We do read instead of write because there is a number of legitimate + // cases where write would lead to false positives: + // 1. Some software dups a closed pipe in place of a socket before + // closing + // the socket (to prevent races actually). + // 2. Some daemons dup /dev/null in place of stdin/stdout. + // On the other hand we have not seen cases when write here catches real + // bugs. + MemoryAccess(thr, pc, (uptr)d, 8, + kAccessRead | kAccessCheckOnly | kAccessSlotLocked); + } + } + // We need to clear it, because if we do not intercept any call out there + // that creates fd, we will hit false postives. + MemoryResetRange(thr, pc, (uptr)d, 8); } - // We need to clear it, because if we do not intercept any call out there - // that creates fd, we will hit false postives. - MemoryResetRange(thr, pc, (uptr)d, 8); unref(thr, pc, d->sync); d->sync = 0; + unref(thr, pc, + reinterpret_cast<FdSync *>( + atomic_load(&d->aux_sync, memory_order_relaxed))); + atomic_store(&d->aux_sync, 0, memory_order_relaxed); d->creation_tid = kInvalidTid; d->creation_stack = kInvalidStackID; } @@ -269,6 +300,30 @@ void FdPollCreate(ThreadState *thr, uptr pc, int fd) { init(thr, pc, fd, allocsync(thr, pc)); } +void FdPollAdd(ThreadState *thr, uptr pc, int epfd, int fd) { + DPrintf("#%d: FdPollAdd(%d, %d)\n", thr->tid, epfd, fd); + if (bogusfd(epfd) || bogusfd(fd)) + return; + FdDesc *d = fddesc(thr, pc, fd); + // Associate fd with epoll fd only once. + // While an fd can be associated with multiple epolls at the same time, + // or with different epolls during different phases of lifetime, + // synchronization semantics (and examples) of this are unclear. + // So we don't support this for now. + // If we change the association, it will also create lifetime management + // problem for FdRelease which accesses the aux_sync. + if (atomic_load(&d->aux_sync, memory_order_relaxed)) + return; + FdDesc *epd = fddesc(thr, pc, epfd); + FdSync *s = epd->sync; + if (!s) + return; + uptr cmp = 0; + if (atomic_compare_exchange_strong( + &d->aux_sync, &cmp, reinterpret_cast<uptr>(s), memory_order_release)) + ref(s); +} + void FdSocketCreate(ThreadState *thr, uptr pc, int fd) { DPrintf("#%d: FdSocketCreate(%d)\n", thr->tid, fd); if (bogusfd(fd)) diff --git a/libsanitizer/tsan/tsan_fd.h b/libsanitizer/tsan/tsan_fd.h index d964817..92625dc 100644 --- a/libsanitizer/tsan/tsan_fd.h +++ b/libsanitizer/tsan/tsan_fd.h @@ -49,6 +49,7 @@ void FdEventCreate(ThreadState *thr, uptr pc, int fd); void FdSignalCreate(ThreadState *thr, uptr pc, int fd); void FdInotifyCreate(ThreadState *thr, uptr pc, int fd); void FdPollCreate(ThreadState *thr, uptr pc, int fd); +void FdPollAdd(ThreadState *thr, uptr pc, int epfd, int fd); void FdSocketCreate(ThreadState *thr, uptr pc, int fd); void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd); void FdSocketConnecting(ThreadState *thr, uptr pc, int fd); diff --git a/libsanitizer/tsan/tsan_flags.cpp b/libsanitizer/tsan/tsan_flags.cpp index ee89862..ee78f25 100644 --- a/libsanitizer/tsan/tsan_flags.cpp +++ b/libsanitizer/tsan/tsan_flags.cpp @@ -97,7 +97,7 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) { ubsan_parser.ParseStringFromEnv("UBSAN_OPTIONS"); #endif - // Sanity check. + // Check flags. if (!f->report_bugs) { f->report_thread_leaks = false; f->report_destroy_locked = false; @@ -110,12 +110,6 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) { if (common_flags()->help) parser.PrintFlagDescriptions(); - if (f->history_size < 0 || f->history_size > 7) { - Printf("ThreadSanitizer: incorrect value for history_size" - " (must be [0..7])\n"); - Die(); - } - if (f->io_sync < 0 || f->io_sync > 2) { Printf("ThreadSanitizer: incorrect value for io_sync" " (must be [0..2])\n"); diff --git a/libsanitizer/tsan/tsan_flags.inc b/libsanitizer/tsan/tsan_flags.inc index 7954a430..32cf3bb 100644 --- a/libsanitizer/tsan/tsan_flags.inc +++ b/libsanitizer/tsan/tsan_flags.inc @@ -43,6 +43,9 @@ TSAN_FLAG( bool, force_seq_cst_atomics, false, "If set, all atomics are effectively sequentially consistent (seq_cst), " "regardless of what user actually specified.") +TSAN_FLAG(bool, force_background_thread, false, + "If set, eagerly launch a background thread for memory reclamation " + "instead of waiting for a user call to pthread_create.") TSAN_FLAG(bool, halt_on_error, false, "Exit after first reported error.") TSAN_FLAG(int, atexit_sleep_ms, 1000, "Sleep in main thread before exiting for that many ms " @@ -59,14 +62,10 @@ TSAN_FLAG(bool, stop_on_start, false, "Stops on start until __tsan_resume() is called (for debugging).") TSAN_FLAG(bool, running_on_valgrind, false, "Controls whether RunningOnValgrind() returns true or false.") -// There are a lot of goroutines in Go, so we use smaller history. TSAN_FLAG( - int, history_size, SANITIZER_GO ? 1 : 3, - "Per-thread history size, controls how many previous memory accesses " - "are remembered per thread. Possible values are [0..7]. " - "history_size=0 amounts to 32K memory accesses. Each next value doubles " - "the amount of memory accesses, up to history_size=7 that amounts to " - "4M memory accesses. The default value is 2 (128K memory accesses).") + uptr, history_size, 0, + "Per-thread history size," + " controls how many extra previous memory accesses are remembered per thread.") TSAN_FLAG(int, io_sync, 1, "Controls level of synchronization implied by IO operations. " "0 - no synchronization " @@ -82,3 +81,6 @@ TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_MAC ? true : false, "modules.") TSAN_FLAG(bool, shared_ptr_interceptor, true, "Track atomic reference counting in libc++ shared_ptr and weak_ptr.") +TSAN_FLAG(bool, print_full_thread_history, false, + "If set, prints thread creation stacks for the threads involved in " + "the report and their ancestors up to the main thread.") diff --git a/libsanitizer/tsan/tsan_interceptors.h b/libsanitizer/tsan/tsan_interceptors.h index 61dbb81..3091ad8 100644 --- a/libsanitizer/tsan/tsan_interceptors.h +++ b/libsanitizer/tsan/tsan_interceptors.h @@ -36,6 +36,10 @@ inline bool in_symbolizer() { } #endif +inline bool MustIgnoreInterceptor(ThreadState *thr) { + return !thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib; +} + } // namespace __tsan #define SCOPED_INTERCEPTOR_RAW(func, ...) \ @@ -60,10 +64,10 @@ inline bool in_symbolizer() { # define CHECK_REAL_FUNC(func) DCHECK(REAL(func)) #endif -#define SCOPED_TSAN_INTERCEPTOR(func, ...) \ - SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \ - CHECK_REAL_FUNC(func); \ - if (!thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib) \ +#define SCOPED_TSAN_INTERCEPTOR(func, ...) \ + SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \ + CHECK_REAL_FUNC(func); \ + if (MustIgnoreInterceptor(thr)) \ return REAL(func)(__VA_ARGS__); #define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \ @@ -74,6 +78,14 @@ inline bool in_symbolizer() { #define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__) +#if SANITIZER_FREEBSD +# define TSAN_INTERCEPTOR_FREEBSD_ALIAS(ret, func, ...) \ + TSAN_INTERCEPTOR(ret, _pthread_##func, __VA_ARGS__) \ + ALIAS(WRAPPER_NAME(pthread_##func)); +#else +# define TSAN_INTERCEPTOR_FREEBSD_ALIAS(ret, func, ...) +#endif + #if SANITIZER_NETBSD # define TSAN_INTERCEPTOR_NETBSD_ALIAS(ret, func, ...) \ TSAN_INTERCEPTOR(ret, __libc_##func, __VA_ARGS__) \ diff --git a/libsanitizer/tsan/tsan_interceptors_posix.cpp b/libsanitizer/tsan/tsan_interceptors_posix.cpp index 9a85ee0..60ca963 100644 --- a/libsanitizer/tsan/tsan_interceptors_posix.cpp +++ b/libsanitizer/tsan/tsan_interceptors_posix.cpp @@ -90,6 +90,7 @@ DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *) DECLARE_REAL(int, fflush, __sanitizer_FILE *fp) DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size) DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr) +extern "C" int pthread_equal(void *t1, void *t2); extern "C" void *pthread_self(); extern "C" void _exit(int status); #if !SANITIZER_NETBSD @@ -176,6 +177,7 @@ struct ThreadSignalContext { struct AtExitCtx { void (*f)(); void *arg; + uptr pc; }; // InterceptorContext holds all global data required for interceptors. @@ -287,20 +289,25 @@ void ScopedInterceptor::DisableIgnoresImpl() { } #define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func) +#if SANITIZER_FREEBSD || SANITIZER_NETBSD +# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func) +#else +# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver) +#endif #if SANITIZER_FREEBSD -# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func) -# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) -# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) -#elif SANITIZER_NETBSD -# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func) -# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) \ - INTERCEPT_FUNCTION(__libc_##func) -# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) \ - INTERCEPT_FUNCTION(__libc_thr_##func) +# define TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(func) \ + INTERCEPT_FUNCTION(_pthread_##func) #else -# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver) -# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) -# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) +# define TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(func) +#endif +#if SANITIZER_NETBSD +# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) \ + INTERCEPT_FUNCTION(__libc_##func) +# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) \ + INTERCEPT_FUNCTION(__libc_thr_##func) +#else +# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) +# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) #endif #define READ_STRING_OF_LEN(thr, pc, s, len, n) \ @@ -366,7 +373,10 @@ TSAN_INTERCEPTOR(int, pause, int fake) { return BLOCK_REAL(pause)(fake); } -static void at_exit_wrapper() { +// Note: we specifically call the function in such strange way +// with "installed_at" because in reports it will appear between +// callback frames and the frame that installed the callback. +static void at_exit_callback_installed_at() { AtExitCtx *ctx; { // Ensure thread-safety. @@ -378,15 +388,21 @@ static void at_exit_wrapper() { interceptor_ctx()->AtExitStack.PopBack(); } - Acquire(cur_thread(), (uptr)0, (uptr)ctx); + ThreadState *thr = cur_thread(); + Acquire(thr, ctx->pc, (uptr)ctx); + FuncEntry(thr, ctx->pc); ((void(*)())ctx->f)(); + FuncExit(thr); Free(ctx); } -static void cxa_at_exit_wrapper(void *arg) { - Acquire(cur_thread(), 0, (uptr)arg); +static void cxa_at_exit_callback_installed_at(void *arg) { + ThreadState *thr = cur_thread(); AtExitCtx *ctx = (AtExitCtx*)arg; + Acquire(thr, ctx->pc, (uptr)arg); + FuncEntry(thr, ctx->pc); ((void(*)(void *arg))ctx->f)(ctx->arg); + FuncExit(thr); Free(ctx); } @@ -400,7 +416,7 @@ TSAN_INTERCEPTOR(int, atexit, void (*f)()) { // We want to setup the atexit callback even if we are in ignored lib // or after fork. SCOPED_INTERCEPTOR_RAW(atexit, f); - return setup_at_exit_wrapper(thr, pc, (void(*)())f, 0, 0); + return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, 0, 0); } #endif @@ -408,7 +424,7 @@ TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) { if (in_symbolizer()) return 0; SCOPED_TSAN_INTERCEPTOR(__cxa_atexit, f, arg, dso); - return setup_at_exit_wrapper(thr, pc, (void(*)())f, arg, dso); + return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, arg, dso); } static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), @@ -416,6 +432,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), auto *ctx = New<AtExitCtx>(); ctx->f = f; ctx->arg = arg; + ctx->pc = pc; Release(thr, pc, (uptr)ctx); // Memory allocation in __cxa_atexit will race with free during exit, // because we do not see synchronization around atexit callback list. @@ -431,25 +448,27 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), // due to atexit_mu held on exit from the calloc interceptor. ScopedIgnoreInterceptors ignore; - res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_wrapper, 0, 0); + res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_callback_installed_at, + 0, 0); // Push AtExitCtx on the top of the stack of callback functions if (!res) { interceptor_ctx()->AtExitStack.PushBack(ctx); } } else { - res = REAL(__cxa_atexit)(cxa_at_exit_wrapper, ctx, dso); + res = REAL(__cxa_atexit)(cxa_at_exit_callback_installed_at, ctx, dso); } ThreadIgnoreEnd(thr); return res; } #if !SANITIZER_MAC && !SANITIZER_NETBSD -static void on_exit_wrapper(int status, void *arg) { +static void on_exit_callback_installed_at(int status, void *arg) { ThreadState *thr = cur_thread(); - uptr pc = 0; - Acquire(thr, pc, (uptr)arg); AtExitCtx *ctx = (AtExitCtx*)arg; + Acquire(thr, ctx->pc, (uptr)arg); + FuncEntry(thr, ctx->pc); ((void(*)(int status, void *arg))ctx->f)(status, ctx->arg); + FuncExit(thr); Free(ctx); } @@ -460,11 +479,12 @@ TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) { auto *ctx = New<AtExitCtx>(); ctx->f = (void(*)())f; ctx->arg = arg; + ctx->pc = GET_CALLER_PC(); Release(thr, pc, (uptr)ctx); // Memory allocation in __cxa_atexit will race with free during exit, // because we do not see synchronization around atexit callback list. ThreadIgnoreBegin(thr, pc); - int res = REAL(on_exit)(on_exit_wrapper, ctx); + int res = REAL(on_exit)(on_exit_callback_installed_at, ctx); ThreadIgnoreEnd(thr); return res; } @@ -880,10 +900,11 @@ static int guard_acquire(ThreadState *thr, uptr pc, atomic_uint32_t *g, } } -static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g) { +static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g, + u32 v) { if (!thr->in_ignored_lib) Release(thr, pc, (uptr)g); - u32 old = atomic_exchange(g, kGuardDone, memory_order_release); + u32 old = atomic_exchange(g, v, memory_order_release); if (old & kGuardWaiter) FutexWake(g, 1 << 30); } @@ -913,12 +934,12 @@ STDCXX_INTERCEPTOR(int, __cxa_guard_acquire, atomic_uint32_t *g) { STDCXX_INTERCEPTOR(void, __cxa_guard_release, atomic_uint32_t *g) { SCOPED_INTERCEPTOR_RAW(__cxa_guard_release, g); - guard_release(thr, pc, g); + guard_release(thr, pc, g, kGuardDone); } STDCXX_INTERCEPTOR(void, __cxa_guard_abort, atomic_uint32_t *g) { SCOPED_INTERCEPTOR_RAW(__cxa_guard_abort, g); - atomic_store(g, kGuardInit, memory_order_relaxed); + guard_release(thr, pc, g, kGuardInit); } namespace __tsan { @@ -1515,12 +1536,12 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) { // result in crashes due to too little stack space. if (guard_acquire(thr, pc, a, !SANITIZER_MAC)) { (*f)(); - guard_release(thr, pc, a); + guard_release(thr, pc, a, kGuardDone); } return 0; } -#if SANITIZER_LINUX && !SANITIZER_ANDROID +#if SANITIZER_GLIBC TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf); if (fd > 0) @@ -1533,20 +1554,20 @@ TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) { #endif TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) { -#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID || SANITIZER_NETBSD - SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf); +#if SANITIZER_GLIBC + SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf); if (fd > 0) FdAccess(thr, pc, fd); - return REAL(fstat)(fd, buf); + return REAL(__fxstat)(0, fd, buf); #else - SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf); + SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf); if (fd > 0) FdAccess(thr, pc, fd); - return REAL(__fxstat)(0, fd, buf); + return REAL(fstat)(fd, buf); #endif } -#if SANITIZER_LINUX && !SANITIZER_ANDROID +#if SANITIZER_GLIBC TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf); if (fd > 0) @@ -1558,7 +1579,7 @@ TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) { #define TSAN_MAYBE_INTERCEPT___FXSTAT64 #endif -#if SANITIZER_LINUX && !SANITIZER_ANDROID +#if SANITIZER_GLIBC TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf); if (fd > 0) @@ -1665,11 +1686,10 @@ TSAN_INTERCEPTOR(int, eventfd, unsigned initval, int flags) { #if SANITIZER_LINUX TSAN_INTERCEPTOR(int, signalfd, int fd, void *mask, int flags) { - SCOPED_TSAN_INTERCEPTOR(signalfd, fd, mask, flags); - if (fd >= 0) - FdClose(thr, pc, fd); + SCOPED_INTERCEPTOR_RAW(signalfd, fd, mask, flags); + FdClose(thr, pc, fd); fd = REAL(signalfd)(fd, mask, flags); - if (fd >= 0) + if (!MustIgnoreInterceptor(thr)) FdSignalCreate(thr, pc, fd); return fd; } @@ -1746,17 +1766,16 @@ TSAN_INTERCEPTOR(int, listen, int fd, int backlog) { } TSAN_INTERCEPTOR(int, close, int fd) { - SCOPED_TSAN_INTERCEPTOR(close, fd); - if (fd >= 0) + SCOPED_INTERCEPTOR_RAW(close, fd); + if (!in_symbolizer()) FdClose(thr, pc, fd); return REAL(close)(fd); } #if SANITIZER_LINUX TSAN_INTERCEPTOR(int, __close, int fd) { - SCOPED_TSAN_INTERCEPTOR(__close, fd); - if (fd >= 0) - FdClose(thr, pc, fd); + SCOPED_INTERCEPTOR_RAW(__close, fd); + FdClose(thr, pc, fd); return REAL(__close)(fd); } #define TSAN_MAYBE_INTERCEPT___CLOSE TSAN_INTERCEPT(__close) @@ -1767,13 +1786,10 @@ TSAN_INTERCEPTOR(int, __close, int fd) { // glibc guts #if SANITIZER_LINUX && !SANITIZER_ANDROID TSAN_INTERCEPTOR(void, __res_iclose, void *state, bool free_addr) { - SCOPED_TSAN_INTERCEPTOR(__res_iclose, state, free_addr); + SCOPED_INTERCEPTOR_RAW(__res_iclose, state, free_addr); int fds[64]; int cnt = ExtractResolvFDs(state, fds, ARRAY_SIZE(fds)); - for (int i = 0; i < cnt; i++) { - if (fds[i] > 0) - FdClose(thr, pc, fds[i]); - } + for (int i = 0; i < cnt; i++) FdClose(thr, pc, fds[i]); REAL(__res_iclose)(state, free_addr); } #define TSAN_MAYBE_INTERCEPT___RES_ICLOSE TSAN_INTERCEPT(__res_iclose) @@ -1854,7 +1870,7 @@ TSAN_INTERCEPTOR(int, rmdir, char *path) { } TSAN_INTERCEPTOR(int, closedir, void *dirp) { - SCOPED_TSAN_INTERCEPTOR(closedir, dirp); + SCOPED_INTERCEPTOR_RAW(closedir, dirp); if (dirp) { int fd = dirfd(dirp); FdClose(thr, pc, fd); @@ -1885,8 +1901,10 @@ TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) { FdAccess(thr, pc, epfd); if (epfd >= 0 && fd >= 0) FdAccess(thr, pc, fd); - if (op == EPOLL_CTL_ADD && epfd >= 0) + if (op == EPOLL_CTL_ADD && epfd >= 0) { + FdPollAdd(thr, pc, epfd, fd); FdRelease(thr, pc, epfd); + } int res = REAL(epoll_ctl)(epfd, op, fd, ev); return res; } @@ -1949,13 +1967,14 @@ TSAN_INTERCEPTOR(int, pthread_sigmask, int how, const __sanitizer_sigset_t *set, namespace __tsan { -static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) { +static void ReportErrnoSpoiling(ThreadState *thr, uptr pc, int sig) { VarSizeStackTrace stack; // StackTrace::GetNestInstructionPc(pc) is used because return address is // expected, OutputReport() will undo this. ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack); ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(ReportTypeErrnoInSignal); + rep.SetSigNum(sig); if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) { rep.AddStack(stack, true); OutputReport(thr, rep); @@ -1965,6 +1984,7 @@ static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) { static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire, int sig, __sanitizer_siginfo *info, void *uctx) { + CHECK(thr->slot); __sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions; if (acquire) Acquire(thr, 0, (uptr)&sigactions[sig]); @@ -2021,7 +2041,7 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire, // signal; and it looks too fragile to intercept all ways to reraise a signal. if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM && errno != 99) - ReportErrnoSpoiling(thr, pc); + ReportErrnoSpoiling(thr, pc, sig); errno = saved_errno; } @@ -2132,11 +2152,11 @@ TSAN_INTERCEPTOR(int, pthread_kill, void *tid, int sig) { ThreadSignalContext *sctx = SigCtx(thr); CHECK_NE(sctx, 0); int prev = sctx->int_signal_send; - if (tid == pthread_self()) { + bool self = pthread_equal(tid, pthread_self()); + if (self) sctx->int_signal_send = sig; - } int res = REAL(pthread_kill)(tid, sig); - if (tid == pthread_self()) { + if (self) { CHECK_EQ(sctx->int_signal_send, sig); sctx->int_signal_send = prev; } @@ -2193,6 +2213,7 @@ void atfork_child() { FdOnFork(thr, pc); } +#if !SANITIZER_IOS TSAN_INTERCEPTOR(int, vfork, int fake) { // Some programs (e.g. openjdk) call close for all file descriptors // in the child process. Under tsan it leads to false positives, because @@ -2209,6 +2230,7 @@ TSAN_INTERCEPTOR(int, vfork, int fake) { // Instead we simply turn vfork into fork. return WRAP(fork)(fake); } +#endif #if SANITIZER_LINUX TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags, @@ -2252,7 +2274,7 @@ struct dl_iterate_phdr_data { }; static bool IsAppNotRodata(uptr addr) { - return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata; + return IsAppMem(addr) && *MemToShadow(addr) != Shadow::kRodata; } static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size, @@ -2358,9 +2380,18 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, #define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file) \ if (file) { \ int fd = fileno_unlocked(file); \ - if (fd >= 0) FdClose(thr, pc, fd); \ + FdClose(thr, pc, fd); \ } +#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \ + ({ \ + CheckNoDeepBind(filename, flag); \ + ThreadIgnoreBegin(thr, 0); \ + void *res = REAL(dlopen)(filename, flag); \ + ThreadIgnoreEnd(thr); \ + res; \ + }) + #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \ libignore()->OnLibraryLoaded(filename) @@ -2391,8 +2422,11 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, #define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \ ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name) -#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \ - __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name) +#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \ + if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \ + COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name); \ + else \ + __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name) #define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name) @@ -2553,7 +2587,7 @@ static USED void syscall_release(uptr pc, uptr addr) { } static void syscall_fd_close(uptr pc, int fd) { - TSAN_SYSCALL(); + auto *thr = cur_thread(); FdClose(thr, pc, fd); } @@ -2688,6 +2722,26 @@ TSAN_INTERCEPTOR(void, thr_exit, tid_t *state) { #define TSAN_MAYBE_INTERCEPT_THR_EXIT #endif +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_init, void *c, void *a) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_destroy, void *c) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_signal, void *c) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_broadcast, void *c) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, cond_wait, void *c, void *m) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_init, void *m, void *a) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_destroy, void *m) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_lock, void *m) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_trylock, void *m) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, mutex_unlock, void *m) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_init, void *l, void *a) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_destroy, void *l) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_rdlock, void *l) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_tryrdlock, void *l) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_wrlock, void *l) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_trywrlock, void *l) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, rwlock_unlock, void *l) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, once, void *o, void (*i)()) +TSAN_INTERCEPTOR_FREEBSD_ALIAS(int, sigmask, int f, void *n, void *o) + TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_init, void *c, void *a) TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_signal, void *c) TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_broadcast, void *c) @@ -2916,6 +2970,26 @@ void InitializeInterceptors() { } #endif + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_init); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_destroy); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_signal); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_broadcast); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(cond_wait); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_init); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_destroy); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_lock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_trylock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(mutex_unlock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_init); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_destroy); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_rdlock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_tryrdlock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_wrlock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_trywrlock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(rwlock_unlock); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(once); + TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(sigmask); + TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_init); TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_signal); TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_broadcast); diff --git a/libsanitizer/tsan/tsan_interface.cpp b/libsanitizer/tsan/tsan_interface.cpp index 0487151..e6c4bf2 100644 --- a/libsanitizer/tsan/tsan_interface.cpp +++ b/libsanitizer/tsan/tsan_interface.cpp @@ -26,20 +26,6 @@ void __tsan_flush_memory() { FlushShadowMemory(); } -void __tsan_read16(void *addr) { - uptr pc = CALLERPC; - ThreadState *thr = cur_thread(); - MemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead); - MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead); -} - -void __tsan_write16(void *addr) { - uptr pc = CALLERPC; - ThreadState *thr = cur_thread(); - MemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite); - MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite); -} - void __tsan_read16_pc(void *addr, void *pc) { uptr pc_no_pac = STRIP_PAC_PC(pc); ThreadState *thr = cur_thread(); diff --git a/libsanitizer/tsan/tsan_interface.inc b/libsanitizer/tsan/tsan_interface.inc index 0031800..b0a424f 100644 --- a/libsanitizer/tsan/tsan_interface.inc +++ b/libsanitizer/tsan/tsan_interface.inc @@ -34,6 +34,10 @@ void __tsan_read8(void *addr) { MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead); } +void __tsan_read16(void *addr) { + MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead); +} + void __tsan_write1(void *addr) { MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite); } @@ -50,6 +54,10 @@ void __tsan_write8(void *addr) { MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite); } +void __tsan_write16(void *addr) { + MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite); +} + void __tsan_read1_pc(void *addr, void *pc) { MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC); } diff --git a/libsanitizer/tsan/tsan_interface_atomic.cpp b/libsanitizer/tsan/tsan_interface_atomic.cpp index 24ba3bb..f794a2f 100644 --- a/libsanitizer/tsan/tsan_interface_atomic.cpp +++ b/libsanitizer/tsan/tsan_interface_atomic.cpp @@ -235,8 +235,9 @@ static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) { T v = NoTsanAtomicLoad(a, mo); SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a); if (s) { - ReadLock l(&s->mtx); - AcquireImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + ReadLock lock(&s->mtx); + thr->clock.Acquire(s->clock); // Re-read under sync mutex because we need a consistent snapshot // of the value and the clock we acquire. v = NoTsanAtomicLoad(a, mo); @@ -270,14 +271,14 @@ static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v, NoTsanAtomicStore(a, v, mo); return; } - __sync_synchronize(); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseStoreImpl(thr, pc, &s->clock); - NoTsanAtomicStore(a, v, mo); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + Lock lock(&s->mtx); + thr->clock.ReleaseStore(&s->clock); + NoTsanAtomicStore(a, v, mo); + } + IncrementEpoch(thr); } template <typename T, T (*F)(volatile T *v, T op)> @@ -285,18 +286,21 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) { MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic); if (LIKELY(mo == mo_relaxed)) return F(a, v); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - if (IsAcqRelOrder(mo)) - AcquireReleaseImpl(thr, pc, &s->clock); - else if (IsReleaseOrder(mo)) - ReleaseImpl(thr, pc, &s->clock); - else if (IsAcquireOrder(mo)) - AcquireImpl(thr, pc, &s->clock); - return F(a, v); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + RWLock lock(&s->mtx, IsReleaseOrder(mo)); + if (IsAcqRelOrder(mo)) + thr->clock.ReleaseAcquire(&s->clock); + else if (IsReleaseOrder(mo)) + thr->clock.Release(&s->clock); + else if (IsAcquireOrder(mo)) + thr->clock.Acquire(s->clock); + v = F(a, v); + } + if (IsReleaseOrder(mo)) + IncrementEpoch(thr); + return v; } template<typename T> @@ -416,27 +420,28 @@ static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v, *c = pr; return false; } - + SlotLocker locker(thr); bool release = IsReleaseOrder(mo); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - RWLock l(&s->mtx, release); - T cc = *c; - T pr = func_cas(a, cc, v); - bool success = pr == cc; - if (!success) { - *c = pr; - mo = fmo; + bool success; + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + RWLock lock(&s->mtx, release); + T cc = *c; + T pr = func_cas(a, cc, v); + success = pr == cc; + if (!success) { + *c = pr; + mo = fmo; + } + if (success && IsAcqRelOrder(mo)) + thr->clock.ReleaseAcquire(&s->clock); + else if (success && IsReleaseOrder(mo)) + thr->clock.Release(&s->clock); + else if (IsAcquireOrder(mo)) + thr->clock.Acquire(s->clock); } - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - - if (success && IsAcqRelOrder(mo)) - AcquireReleaseImpl(thr, pc, &s->clock); - else if (success && IsReleaseOrder(mo)) - ReleaseImpl(thr, pc, &s->clock); - else if (IsAcquireOrder(mo)) - AcquireImpl(thr, pc, &s->clock); + if (success && release) + IncrementEpoch(thr); return success; } diff --git a/libsanitizer/tsan/tsan_interface_java.cpp b/libsanitizer/tsan/tsan_interface_java.cpp index c090c1f..7c15a16 100644 --- a/libsanitizer/tsan/tsan_interface_java.cpp +++ b/libsanitizer/tsan/tsan_interface_java.cpp @@ -106,7 +106,7 @@ void __tsan_java_free(jptr ptr, jptr size) { DCHECK_GE(ptr, jctx->heap_begin); DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size); - ctx->metamap.FreeRange(thr->proc(), ptr, size); + ctx->metamap.FreeRange(thr->proc(), ptr, size, false); } void __tsan_java_move(jptr src, jptr dst, jptr size) { @@ -133,7 +133,7 @@ void __tsan_java_move(jptr src, jptr dst, jptr size) { // support that anymore as it contains addresses of accesses. RawShadow *d = MemToShadow(dst); RawShadow *dend = MemToShadow(dst + size); - internal_memset(d, 0, (dend - d) * sizeof(*d)); + ShadowSet(d, dend, Shadow::kEmpty); } jptr __tsan_java_find(jptr *from_ptr, jptr to) { diff --git a/libsanitizer/tsan/tsan_mman.cpp b/libsanitizer/tsan/tsan_mman.cpp index f1b6768..0937e52 100644 --- a/libsanitizer/tsan/tsan_mman.cpp +++ b/libsanitizer/tsan/tsan_mman.cpp @@ -20,18 +20,6 @@ #include "tsan_report.h" #include "tsan_flags.h" -// May be overriden by front-end. -SANITIZER_WEAK_DEFAULT_IMPL -void __sanitizer_malloc_hook(void *ptr, uptr size) { - (void)ptr; - (void)size; -} - -SANITIZER_WEAK_DEFAULT_IMPL -void __sanitizer_free_hook(void *ptr) { - (void)ptr; -} - namespace __tsan { struct MapUnmapCallback { @@ -69,8 +57,17 @@ Allocator *allocator() { struct GlobalProc { Mutex mtx; Processor *proc; - - GlobalProc() : mtx(MutexTypeGlobalProc), proc(ProcCreate()) {} + // This mutex represents the internal allocator combined for + // the purposes of deadlock detection. The internal allocator + // uses multiple mutexes, moreover they are locked only occasionally + // and they are spin mutexes which don't support deadlock detection. + // So we use this fake mutex to serve as a substitute for these mutexes. + CheckedMutex internal_alloc_mtx; + + GlobalProc() + : mtx(MutexTypeGlobalProc), + proc(ProcCreate()), + internal_alloc_mtx(MutexTypeInternalAlloc) {} }; static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64); @@ -78,6 +75,11 @@ GlobalProc *global_proc() { return reinterpret_cast<GlobalProc*>(&global_proc_placeholder); } +static void InternalAllocAccess() { + global_proc()->internal_alloc_mtx.Lock(); + global_proc()->internal_alloc_mtx.Unlock(); +} + ScopedGlobalProcessor::ScopedGlobalProcessor() { GlobalProc *gp = global_proc(); ThreadState *thr = cur_thread(); @@ -110,6 +112,24 @@ ScopedGlobalProcessor::~ScopedGlobalProcessor() { gp->mtx.Unlock(); } +void AllocatorLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + global_proc()->internal_alloc_mtx.Lock(); + InternalAllocatorLock(); +} + +void AllocatorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + InternalAllocatorUnlock(); + global_proc()->internal_alloc_mtx.Unlock(); +} + +void GlobalProcessorLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + global_proc()->mtx.Lock(); +} + +void GlobalProcessorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + global_proc()->mtx.Unlock(); +} + static constexpr uptr kMaxAllowedMallocSize = 1ull << 40; static uptr max_user_defined_malloc_size; @@ -166,6 +186,12 @@ void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz, uptr align, GET_STACK_TRACE_FATAL(thr, pc); ReportAllocationSizeTooBig(sz, malloc_limit, &stack); } + if (UNLIKELY(IsRssLimitExceeded())) { + if (AllocatorMayReturnNull()) + return nullptr; + GET_STACK_TRACE_FATAL(thr, pc); + ReportRssLimitExceeded(&stack); + } void *p = allocator()->Allocate(&thr->proc()->alloc_cache, sz, align); if (UNLIKELY(!p)) { SetAllocatorOutOfMemory(); @@ -219,8 +245,17 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) { void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p); + // Note: this can run before thread initialization/after finalization. + // As a result this is not necessarily synchronized with DoReset, + // which iterates over and resets all sync objects, + // but it is fine to create new MBlocks in this context. ctx->metamap.AllocBlock(thr, pc, p, sz); - if (write && thr->ignore_reads_and_writes == 0) + // If this runs before thread initialization/after finalization + // and we don't have trace initialized, we can't imitate writes. + // In such case just reset the shadow range, it is fine since + // it affects only a small fraction of special objects. + if (write && thr->ignore_reads_and_writes == 0 && + atomic_load_relaxed(&thr->trace_pos)) MemoryRangeImitateWrite(thr, pc, (uptr)p, sz); else MemoryResetRange(thr, pc, (uptr)p, sz); @@ -228,7 +263,14 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) { CHECK_NE(p, (void*)0); - uptr sz = ctx->metamap.FreeBlock(thr->proc(), p); + if (!thr->slot) { + // Very early/late in thread lifetime, or during fork. + UNUSED uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, false); + DPrintf("#%d: free(0x%zx, %zu) (no slot)\n", thr->tid, p, sz); + return; + } + SlotLocker locker(thr); + uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, true); DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz); if (write && thr->ignore_reads_and_writes == 0) MemoryRangeFreed(thr, pc, (uptr)p, sz); @@ -310,7 +352,7 @@ void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) { } uptr user_alloc_usable_size(const void *p) { - if (p == 0) + if (p == 0 || !IsAppMem((uptr)p)) return 0; MBlock *b = ctx->metamap.GetBlock((uptr)p); if (!b) @@ -324,7 +366,6 @@ void invoke_malloc_hook(void *ptr, uptr size) { ThreadState *thr = cur_thread(); if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors) return; - __sanitizer_malloc_hook(ptr, size); RunMallocHooks(ptr, size); } @@ -332,7 +373,6 @@ void invoke_free_hook(void *ptr) { ThreadState *thr = cur_thread(); if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors) return; - __sanitizer_free_hook(ptr); RunFreeHooks(ptr); } @@ -342,6 +382,7 @@ void *Alloc(uptr sz) { thr->nomalloc = 0; // CHECK calls internal_malloc(). CHECK(0); } + InternalAllocAccess(); return InternalAlloc(sz, &thr->proc()->internal_alloc_cache); } @@ -351,6 +392,7 @@ void FreeImpl(void *p) { thr->nomalloc = 0; // CHECK calls internal_malloc(). CHECK(0); } + InternalAllocAccess(); InternalFree(p, &thr->proc()->internal_alloc_cache); } @@ -393,8 +435,6 @@ uptr __sanitizer_get_allocated_size(const void *p) { void __tsan_on_thread_idle() { ThreadState *thr = cur_thread(); - thr->clock.ResetCached(&thr->proc()->clock_cache); - thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache); allocator()->SwallowCache(&thr->proc()->alloc_cache); internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache); ctx->metamap.OnProcIdle(thr->proc()); diff --git a/libsanitizer/tsan/tsan_mman.h b/libsanitizer/tsan/tsan_mman.h index efea5e5..2095f28 100644 --- a/libsanitizer/tsan/tsan_mman.h +++ b/libsanitizer/tsan/tsan_mman.h @@ -24,6 +24,10 @@ void ReplaceSystemMalloc(); void AllocatorProcStart(Processor *proc); void AllocatorProcFinish(Processor *proc); void AllocatorPrintStats(); +void AllocatorLock(); +void AllocatorUnlock(); +void GlobalProcessorLock(); +void GlobalProcessorUnlock(); // For user allocations. void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz, diff --git a/libsanitizer/tsan/tsan_mutexset.cpp b/libsanitizer/tsan/tsan_mutexset.cpp index 7351796..3a75b80a 100644 --- a/libsanitizer/tsan/tsan_mutexset.cpp +++ b/libsanitizer/tsan/tsan_mutexset.cpp @@ -19,57 +19,7 @@ namespace __tsan { MutexSet::MutexSet() { } -void MutexSet::Add(u64 id, bool write, u64 epoch) { - // Look up existing mutex with the same id. - for (uptr i = 0; i < size_; i++) { - if (descs_[i].id == id) { - descs_[i].count++; - descs_[i].epoch = epoch; - return; - } - } - // On overflow, find the oldest mutex and drop it. - if (size_ == kMaxSize) { - u64 minepoch = (u64)-1; - u64 mini = (u64)-1; - for (uptr i = 0; i < size_; i++) { - if (descs_[i].epoch < minepoch) { - minepoch = descs_[i].epoch; - mini = i; - } - } - RemovePos(mini); - CHECK_EQ(size_, kMaxSize - 1); - } - // Add new mutex descriptor. - descs_[size_].addr = 0; - descs_[size_].stack_id = kInvalidStackID; - descs_[size_].id = id; - descs_[size_].write = write; - descs_[size_].epoch = epoch; - descs_[size_].seq = seq_++; - descs_[size_].count = 1; - size_++; -} - -void MutexSet::Del(u64 id, bool write) { - for (uptr i = 0; i < size_; i++) { - if (descs_[i].id == id) { - if (--descs_[i].count == 0) - RemovePos(i); - return; - } - } -} - -void MutexSet::Remove(u64 id) { - for (uptr i = 0; i < size_; i++) { - if (descs_[i].id == id) { - RemovePos(i); - return; - } - } -} +void MutexSet::Reset() { internal_memset(this, 0, sizeof(*this)); } void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) { // Look up existing mutex with the same id. @@ -93,9 +43,7 @@ void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) { // Add new mutex descriptor. descs_[size_].addr = addr; descs_[size_].stack_id = stack_id; - descs_[size_].id = 0; descs_[size_].write = write; - descs_[size_].epoch = 0; descs_[size_].seq = seq_++; descs_[size_].count = 1; size_++; diff --git a/libsanitizer/tsan/tsan_mutexset.h b/libsanitizer/tsan/tsan_mutexset.h index 93776a6..aabd361 100644 --- a/libsanitizer/tsan/tsan_mutexset.h +++ b/libsanitizer/tsan/tsan_mutexset.h @@ -25,8 +25,6 @@ class MutexSet { struct Desc { uptr addr; StackID stack_id; - u64 id; - u64 epoch; u32 seq; u32 count; bool write; @@ -40,10 +38,7 @@ class MutexSet { }; MutexSet(); - // The 'id' is obtained from SyncVar::GetId(). - void Add(u64 id, bool write, u64 epoch); - void Del(u64 id, bool write); - void Remove(u64 id); // Removes the mutex completely (if it's destroyed). + void Reset(); void AddAddr(uptr addr, StackID stack_id, bool write); void DelAddr(uptr addr, bool destroy = false); uptr Size() const; @@ -82,9 +77,7 @@ class DynamicMutexSet { // in different goroutine). #if SANITIZER_GO MutexSet::MutexSet() {} -void MutexSet::Add(u64 id, bool write, u64 epoch) {} -void MutexSet::Del(u64 id, bool write) {} -void MutexSet::Remove(u64 id) {} +void MutexSet::Reset() {} void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {} void MutexSet::DelAddr(uptr addr, bool destroy) {} uptr MutexSet::Size() const { return 0; } diff --git a/libsanitizer/tsan/tsan_platform.h b/libsanitizer/tsan/tsan_platform.h index 7ff0aca..233bf0a 100644 --- a/libsanitizer/tsan/tsan_platform.h +++ b/libsanitizer/tsan/tsan_platform.h @@ -18,8 +18,8 @@ # error "Only 64-bit is supported" #endif +#include "sanitizer_common/sanitizer_common.h" #include "tsan_defs.h" -#include "tsan_trace.h" namespace __tsan { @@ -40,14 +40,12 @@ enum { C/C++ on linux/x86_64 and freebsd/x86_64 0000 0000 1000 - 0080 0000 0000: main binary and/or MAP_32BIT mappings (512GB) 0040 0000 0000 - 0100 0000 0000: - -0100 0000 0000 - 2000 0000 0000: shadow -2000 0000 0000 - 3000 0000 0000: - +0100 0000 0000 - 1000 0000 0000: shadow +1000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) 4000 0000 0000 - 5500 0000 0000: - 5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels -5680 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 7d00 0000 0000: - +5680 0000 0000 - 7d00 0000 0000: - 7b00 0000 0000 - 7c00 0000 0000: heap 7c00 0000 0000 - 7e80 0000 0000: - 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack @@ -67,10 +65,8 @@ C/C++ on netbsd/amd64 can reuse the same mapping: struct Mapping48AddressSpace { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x340000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x010000000000ull; - static const uptr kShadowEnd = 0x200000000000ull; + static const uptr kShadowEnd = 0x100000000000ull; static const uptr kHeapMemBeg = 0x7b0000000000ull; static const uptr kHeapMemEnd = 0x7c0000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -89,14 +85,13 @@ struct Mapping48AddressSpace { C/C++ on linux/mips64 (40-bit VMA) 0000 0000 00 - 0100 0000 00: - (4 GB) 0100 0000 00 - 0200 0000 00: main binary (4 GB) -0200 0000 00 - 2000 0000 00: - (120 GB) -2000 0000 00 - 4000 0000 00: shadow (128 GB) +0200 0000 00 - 1200 0000 00: - (64 GB) +1200 0000 00 - 2200 0000 00: shadow (64 GB) +2200 0000 00 - 4000 0000 00: - (120 GB) 4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects) (64 GB) 5000 0000 00 - aa00 0000 00: - (360 GB) aa00 0000 00 - ab00 0000 00: main binary (PIE) (4 GB) -ab00 0000 00 - b000 0000 00: - (20 GB) -b000 0000 00 - b200 0000 00: traces (8 GB) -b200 0000 00 - fe00 0000 00: - (304 GB) +ab00 0000 00 - fe00 0000 00: - (332 GB) fe00 0000 00 - ff00 0000 00: heap (4 GB) ff00 0000 00 - ff80 0000 00: - (2 GB) ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB) @@ -104,10 +99,8 @@ ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB) struct MappingMips64_40 { static const uptr kMetaShadowBeg = 0x4000000000ull; static const uptr kMetaShadowEnd = 0x5000000000ull; - static const uptr kTraceMemBeg = 0xb000000000ull; - static const uptr kTraceMemEnd = 0xb200000000ull; - static const uptr kShadowBeg = 0x2000000000ull; - static const uptr kShadowEnd = 0x4000000000ull; + static const uptr kShadowBeg = 0x1200000000ull; + static const uptr kShadowEnd = 0x2200000000ull; static const uptr kHeapMemBeg = 0xfe00000000ull; static const uptr kHeapMemEnd = 0xff00000000ull; static const uptr kLoAppMemBeg = 0x0100000000ull; @@ -128,12 +121,10 @@ C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM) 0100 0000 00 - 0200 0000 00: main binary, modules, thread stacks (4 GB) 0200 0000 00 - 0300 0000 00: heap (4 GB) 0300 0000 00 - 0400 0000 00: - (4 GB) -0400 0000 00 - 0c00 0000 00: shadow memory (32 GB) -0c00 0000 00 - 0d00 0000 00: - (4 GB) +0400 0000 00 - 0800 0000 00: shadow memory (16 GB) +0800 0000 00 - 0d00 0000 00: - (20 GB) 0d00 0000 00 - 0e00 0000 00: metainfo (4 GB) -0e00 0000 00 - 0f00 0000 00: - (4 GB) -0f00 0000 00 - 0fc0 0000 00: traces (3 GB) -0fc0 0000 00 - 1000 0000 00: - +0e00 0000 00 - 1000 0000 00: - */ struct MappingAppleAarch64 { static const uptr kLoAppMemBeg = 0x0100000000ull; @@ -141,16 +132,14 @@ struct MappingAppleAarch64 { static const uptr kHeapMemBeg = 0x0200000000ull; static const uptr kHeapMemEnd = 0x0300000000ull; static const uptr kShadowBeg = 0x0400000000ull; - static const uptr kShadowEnd = 0x0c00000000ull; + static const uptr kShadowEnd = 0x0800000000ull; static const uptr kMetaShadowBeg = 0x0d00000000ull; static const uptr kMetaShadowEnd = 0x0e00000000ull; - static const uptr kTraceMemBeg = 0x0f00000000ull; - static const uptr kTraceMemEnd = 0x0fc0000000ull; static const uptr kHiAppMemBeg = 0x0fc0000000ull; static const uptr kHiAppMemEnd = 0x0fc0000000ull; static const uptr kShadowMsk = 0x0ull; static const uptr kShadowXor = 0x0ull; - static const uptr kShadowAdd = 0x0ull; + static const uptr kShadowAdd = 0x0200000000ull; static const uptr kVdsoBeg = 0x7000000000000000ull; static const uptr kMidAppMemBeg = 0; static const uptr kMidAppMemEnd = 0; @@ -159,29 +148,25 @@ struct MappingAppleAarch64 { /* C/C++ on linux/aarch64 (39-bit VMA) 0000 0010 00 - 0100 0000 00: main binary -0100 0000 00 - 0800 0000 00: - -0800 0000 00 - 2000 0000 00: shadow memory +0100 0000 00 - 0400 0000 00: - +0400 0000 00 - 1000 0000 00: shadow memory 2000 0000 00 - 3100 0000 00: - 3100 0000 00 - 3400 0000 00: metainfo 3400 0000 00 - 5500 0000 00: - 5500 0000 00 - 5600 0000 00: main binary (PIE) -5600 0000 00 - 6000 0000 00: - -6000 0000 00 - 6200 0000 00: traces -6200 0000 00 - 7d00 0000 00: - +5600 0000 00 - 7c00 0000 00: - 7c00 0000 00 - 7d00 0000 00: heap 7d00 0000 00 - 7fff ffff ff: modules and main thread stack */ struct MappingAarch64_39 { static const uptr kLoAppMemBeg = 0x0000001000ull; static const uptr kLoAppMemEnd = 0x0100000000ull; - static const uptr kShadowBeg = 0x0800000000ull; - static const uptr kShadowEnd = 0x2000000000ull; + static const uptr kShadowBeg = 0x0400000000ull; + static const uptr kShadowEnd = 0x1000000000ull; static const uptr kMetaShadowBeg = 0x3100000000ull; static const uptr kMetaShadowEnd = 0x3400000000ull; static const uptr kMidAppMemBeg = 0x5500000000ull; - static const uptr kMidAppMemEnd = 0x5600000000ull; - static const uptr kTraceMemBeg = 0x6000000000ull; - static const uptr kTraceMemEnd = 0x6200000000ull; + static const uptr kMidAppMemEnd = 0x5600000000ull; static const uptr kHeapMemBeg = 0x7c00000000ull; static const uptr kHeapMemEnd = 0x7d00000000ull; static const uptr kHiAppMemBeg = 0x7e00000000ull; @@ -195,15 +180,13 @@ struct MappingAarch64_39 { /* C/C++ on linux/aarch64 (42-bit VMA) 00000 0010 00 - 01000 0000 00: main binary -01000 0000 00 - 10000 0000 00: - -10000 0000 00 - 20000 0000 00: shadow memory -20000 0000 00 - 26000 0000 00: - +01000 0000 00 - 08000 0000 00: - +08000 0000 00 - 10000 0000 00: shadow memory +10000 0000 00 - 26000 0000 00: - 26000 0000 00 - 28000 0000 00: metainfo 28000 0000 00 - 2aa00 0000 00: - 2aa00 0000 00 - 2ab00 0000 00: main binary (PIE) -2ab00 0000 00 - 36200 0000 00: - -36200 0000 00 - 36240 0000 00: traces -36240 0000 00 - 3e000 0000 00: - +2ab00 0000 00 - 3e000 0000 00: - 3e000 0000 00 - 3f000 0000 00: heap 3f000 0000 00 - 3ffff ffff ff: modules and main thread stack */ @@ -211,14 +194,12 @@ struct MappingAarch64_42 { static const uptr kBroken = kBrokenReverseMapping; static const uptr kLoAppMemBeg = 0x00000001000ull; static const uptr kLoAppMemEnd = 0x01000000000ull; - static const uptr kShadowBeg = 0x10000000000ull; - static const uptr kShadowEnd = 0x20000000000ull; + static const uptr kShadowBeg = 0x08000000000ull; + static const uptr kShadowEnd = 0x10000000000ull; static const uptr kMetaShadowBeg = 0x26000000000ull; static const uptr kMetaShadowEnd = 0x28000000000ull; static const uptr kMidAppMemBeg = 0x2aa00000000ull; - static const uptr kMidAppMemEnd = 0x2ab00000000ull; - static const uptr kTraceMemBeg = 0x36200000000ull; - static const uptr kTraceMemEnd = 0x36400000000ull; + static const uptr kMidAppMemEnd = 0x2ab00000000ull; static const uptr kHeapMemBeg = 0x3e000000000ull; static const uptr kHeapMemEnd = 0x3f000000000ull; static const uptr kHiAppMemBeg = 0x3f000000000ull; @@ -232,14 +213,12 @@ struct MappingAarch64_42 { struct MappingAarch64_48 { static const uptr kLoAppMemBeg = 0x0000000001000ull; static const uptr kLoAppMemEnd = 0x0000200000000ull; - static const uptr kShadowBeg = 0x0002000000000ull; - static const uptr kShadowEnd = 0x0004000000000ull; + static const uptr kShadowBeg = 0x0001000000000ull; + static const uptr kShadowEnd = 0x0002000000000ull; static const uptr kMetaShadowBeg = 0x0005000000000ull; static const uptr kMetaShadowEnd = 0x0006000000000ull; static const uptr kMidAppMemBeg = 0x0aaaa00000000ull; - static const uptr kMidAppMemEnd = 0x0aaaf00000000ull; - static const uptr kTraceMemBeg = 0x0f06000000000ull; - static const uptr kTraceMemEnd = 0x0f06200000000ull; + static const uptr kMidAppMemEnd = 0x0aaaf00000000ull; static const uptr kHeapMemBeg = 0x0ffff00000000ull; static const uptr kHeapMemEnd = 0x0ffff00000000ull; static const uptr kHiAppMemBeg = 0x0ffff00000000ull; @@ -257,9 +236,7 @@ C/C++ on linux/powerpc64 (44-bit VMA) 0001 0000 0000 - 0b00 0000 0000: shadow 0b00 0000 0000 - 0b00 0000 0000: - 0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects) -0d00 0000 0000 - 0d00 0000 0000: - -0d00 0000 0000 - 0f00 0000 0000: traces -0f00 0000 0000 - 0f00 0000 0000: - +0d00 0000 0000 - 0f00 0000 0000: - 0f00 0000 0000 - 0f50 0000 0000: heap 0f50 0000 0000 - 0f60 0000 0000: - 0f60 0000 0000 - 1000 0000 0000: modules and main thread stack @@ -269,8 +246,6 @@ struct MappingPPC64_44 { kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity; static const uptr kMetaShadowBeg = 0x0b0000000000ull; static const uptr kMetaShadowEnd = 0x0d0000000000ull; - static const uptr kTraceMemBeg = 0x0d0000000000ull; - static const uptr kTraceMemEnd = 0x0f0000000000ull; static const uptr kShadowBeg = 0x000100000000ull; static const uptr kShadowEnd = 0x0b0000000000ull; static const uptr kLoAppMemBeg = 0x000000000100ull; @@ -291,23 +266,19 @@ struct MappingPPC64_44 { C/C++ on linux/powerpc64 (46-bit VMA) 0000 0000 1000 - 0100 0000 0000: main binary 0100 0000 0000 - 0200 0000 0000: - -0100 0000 0000 - 1000 0000 0000: shadow -1000 0000 0000 - 1000 0000 0000: - -1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects) -2000 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 2200 0000 0000: traces -2200 0000 0000 - 3d00 0000 0000: - +0100 0000 0000 - 0800 0000 0000: shadow +0800 0000 0000 - 1000 0000 0000: - +1000 0000 0000 - 1200 0000 0000: metainfo (memory blocks and sync objects) +1200 0000 0000 - 3d00 0000 0000: - 3d00 0000 0000 - 3e00 0000 0000: heap 3e00 0000 0000 - 3e80 0000 0000: - 3e80 0000 0000 - 4000 0000 0000: modules and main thread stack */ struct MappingPPC64_46 { static const uptr kMetaShadowBeg = 0x100000000000ull; - static const uptr kMetaShadowEnd = 0x200000000000ull; - static const uptr kTraceMemBeg = 0x200000000000ull; - static const uptr kTraceMemEnd = 0x220000000000ull; + static const uptr kMetaShadowEnd = 0x120000000000ull; static const uptr kShadowBeg = 0x010000000000ull; - static const uptr kShadowEnd = 0x100000000000ull; + static const uptr kShadowEnd = 0x080000000000ull; static const uptr kHeapMemBeg = 0x3d0000000000ull; static const uptr kHeapMemEnd = 0x3e0000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -326,23 +297,19 @@ struct MappingPPC64_46 { C/C++ on linux/powerpc64 (47-bit VMA) 0000 0000 1000 - 0100 0000 0000: main binary 0100 0000 0000 - 0200 0000 0000: - -0100 0000 0000 - 1000 0000 0000: shadow -1000 0000 0000 - 1000 0000 0000: - -1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects) -2000 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 2200 0000 0000: traces -2200 0000 0000 - 7d00 0000 0000: - +0100 0000 0000 - 0800 0000 0000: shadow +0800 0000 0000 - 1000 0000 0000: - +1000 0000 0000 - 1200 0000 0000: metainfo (memory blocks and sync objects) +1200 0000 0000 - 7d00 0000 0000: - 7d00 0000 0000 - 7e00 0000 0000: heap 7e00 0000 0000 - 7e80 0000 0000: - 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack */ struct MappingPPC64_47 { static const uptr kMetaShadowBeg = 0x100000000000ull; - static const uptr kMetaShadowEnd = 0x200000000000ull; - static const uptr kTraceMemBeg = 0x200000000000ull; - static const uptr kTraceMemEnd = 0x220000000000ull; + static const uptr kMetaShadowEnd = 0x120000000000ull; static const uptr kShadowBeg = 0x010000000000ull; - static const uptr kShadowEnd = 0x100000000000ull; + static const uptr kShadowEnd = 0x080000000000ull; static const uptr kHeapMemBeg = 0x7d0000000000ull; static const uptr kHeapMemEnd = 0x7e0000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -362,22 +329,18 @@ C/C++ on linux/s390x While the kernel provides a 64-bit address space, we have to restrict ourselves to 48 bits due to how e.g. SyncVar::GetId() works. 0000 0000 1000 - 0e00 0000 0000: binary, modules, stacks - 14 TiB -0e00 0000 0000 - 4000 0000 0000: - -4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) -8000 0000 0000 - 9000 0000 0000: - +0e00 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 4000 0000 0000: shadow - 32TiB (2 * app) +4000 0000 0000 - 9000 0000 0000: - 9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) -9800 0000 0000 - a000 0000 0000: - -a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads) -b000 0000 0000 - be00 0000 0000: - +9800 0000 0000 - be00 0000 0000: - be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator) */ struct MappingS390x { static const uptr kMetaShadowBeg = 0x900000000000ull; static const uptr kMetaShadowEnd = 0x980000000000ull; - static const uptr kTraceMemBeg = 0xa00000000000ull; - static const uptr kTraceMemEnd = 0xb00000000000ull; - static const uptr kShadowBeg = 0x400000000000ull; - static const uptr kShadowEnd = 0x800000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kShadowEnd = 0x400000000000ull; static const uptr kHeapMemBeg = 0xbe0000000000ull; static const uptr kHeapMemEnd = 0xc00000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -397,21 +360,17 @@ struct MappingS390x { 0000 1000 0000 - 00c0 0000 0000: - 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 2380 0000 0000: shadow -2380 0000 0000 - 3000 0000 0000: - +2000 0000 0000 - 21c0 0000 0000: shadow +21c0 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 8000 0000 0000: - */ struct MappingGo48 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; - static const uptr kShadowEnd = 0x238000000000ull; + static const uptr kShadowEnd = 0x21c000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x00e000000000ull; static const uptr kMidAppMemBeg = 0; @@ -431,8 +390,8 @@ struct MappingGo48 { 0000 1000 0000 - 00f8 0000 0000: - 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 0100 0000 0000: - -0100 0000 0000 - 0500 0000 0000: shadow -0500 0000 0000 - 0700 0000 0000: traces +0100 0000 0000 - 0300 0000 0000: shadow +0300 0000 0000 - 0700 0000 0000: - 0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects) 07d0 0000 0000 - 8000 0000 0000: - */ @@ -440,10 +399,8 @@ struct MappingGo48 { struct MappingGoWindows { static const uptr kMetaShadowBeg = 0x070000000000ull; static const uptr kMetaShadowEnd = 0x077000000000ull; - static const uptr kTraceMemBeg = 0x050000000000ull; - static const uptr kTraceMemEnd = 0x070000000000ull; static const uptr kShadowBeg = 0x010000000000ull; - static const uptr kShadowEnd = 0x050000000000ull; + static const uptr kShadowEnd = 0x030000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x00e000000000ull; static const uptr kMidAppMemBeg = 0; @@ -463,21 +420,17 @@ struct MappingGoWindows { 0000 1000 0000 - 00c0 0000 0000: - 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 2380 0000 0000: shadow -2380 0000 0000 - 2400 0000 0000: - -2400 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects) -3400 0000 0000 - 3600 0000 0000: - -3600 0000 0000 - 3800 0000 0000: traces -3800 0000 0000 - 4000 0000 0000: - +2000 0000 0000 - 21c0 0000 0000: shadow +21c0 0000 0000 - 2400 0000 0000: - +2400 0000 0000 - 2470 0000 0000: metainfo (memory blocks and sync objects) +2470 0000 0000 - 4000 0000 0000: - */ struct MappingGoPPC64_46 { static const uptr kMetaShadowBeg = 0x240000000000ull; - static const uptr kMetaShadowEnd = 0x340000000000ull; - static const uptr kTraceMemBeg = 0x360000000000ull; - static const uptr kTraceMemEnd = 0x380000000000ull; + static const uptr kMetaShadowEnd = 0x247000000000ull; static const uptr kShadowBeg = 0x200000000000ull; - static const uptr kShadowEnd = 0x238000000000ull; + static const uptr kShadowEnd = 0x21c000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x00e000000000ull; static const uptr kMidAppMemBeg = 0; @@ -497,21 +450,17 @@ struct MappingGoPPC64_46 { 0000 1000 0000 - 00c0 0000 0000: - 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 3000 0000 0000: shadow -3000 0000 0000 - 3000 0000 0000: - -3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +2000 0000 0000 - 2800 0000 0000: shadow +2800 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects) +3200 0000 0000 - 8000 0000 0000: - */ struct MappingGoPPC64_47 { static const uptr kMetaShadowBeg = 0x300000000000ull; - static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; + static const uptr kMetaShadowEnd = 0x320000000000ull; static const uptr kShadowBeg = 0x200000000000ull; - static const uptr kShadowEnd = 0x300000000000ull; + static const uptr kShadowEnd = 0x280000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x00e000000000ull; static const uptr kMidAppMemBeg = 0; @@ -531,20 +480,16 @@ struct MappingGoPPC64_47 { 0000 1000 0000 - 00c0 0000 0000: - 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 3000 0000 0000: shadow -3000 0000 0000 - 3000 0000 0000: - -3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +2000 0000 0000 - 2800 0000 0000: shadow +2800 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects) +3200 0000 0000 - 8000 0000 0000: - */ struct MappingGoAarch64 { static const uptr kMetaShadowBeg = 0x300000000000ull; - static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; + static const uptr kMetaShadowEnd = 0x320000000000ull; static const uptr kShadowBeg = 0x200000000000ull; - static const uptr kShadowEnd = 0x300000000000ull; + static const uptr kShadowEnd = 0x280000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x00e000000000ull; static const uptr kMidAppMemBeg = 0; @@ -565,20 +510,16 @@ Go on linux/mips64 (47-bit VMA) 0000 1000 0000 - 00c0 0000 0000: - 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 3000 0000 0000: shadow -3000 0000 0000 - 3000 0000 0000: - -3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +2000 0000 0000 - 2800 0000 0000: shadow +2800 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects) +3200 0000 0000 - 8000 0000 0000: - */ struct MappingGoMips64_47 { static const uptr kMetaShadowBeg = 0x300000000000ull; - static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; + static const uptr kMetaShadowEnd = 0x320000000000ull; static const uptr kShadowBeg = 0x200000000000ull; - static const uptr kShadowEnd = 0x300000000000ull; + static const uptr kShadowEnd = 0x280000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x00e000000000ull; static const uptr kMidAppMemBeg = 0; @@ -597,19 +538,15 @@ struct MappingGoMips64_47 { Go on linux/s390x 0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB 1000 0000 0000 - 4000 0000 0000: - -4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) -8000 0000 0000 - 9000 0000 0000: - +4000 0000 0000 - 6000 0000 0000: shadow - 64TiB (4 * app) +6000 0000 0000 - 9000 0000 0000: - 9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) -9800 0000 0000 - a000 0000 0000: - -a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads) */ struct MappingGoS390x { static const uptr kMetaShadowBeg = 0x900000000000ull; static const uptr kMetaShadowEnd = 0x980000000000ull; - static const uptr kTraceMemBeg = 0xa00000000000ull; - static const uptr kTraceMemEnd = 0xb00000000000ull; static const uptr kShadowBeg = 0x400000000000ull; - static const uptr kShadowEnd = 0x800000000000ull; + static const uptr kShadowEnd = 0x600000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; static const uptr kLoAppMemEnd = 0x100000000000ull; static const uptr kMidAppMemBeg = 0; @@ -648,11 +585,11 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { return Func::template Apply<MappingGo48>(arg); # endif #else // SANITIZER_GO -# if defined(__x86_64__) || SANITIZER_IOSSIM || SANITIZER_MAC && !SANITIZER_IOS - return Func::template Apply<Mapping48AddressSpace>(arg); -# elif defined(__aarch64__) && defined(__APPLE__) +# if SANITIZER_IOS && !SANITIZER_IOSSIM return Func::template Apply<MappingAppleAarch64>(arg); -# elif defined(__aarch64__) && !defined(__APPLE__) +# elif defined(__x86_64__) || SANITIZER_MAC + return Func::template Apply<Mapping48AddressSpace>(arg); +# elif defined(__aarch64__) switch (vmaSize) { case 39: return Func::template Apply<MappingAarch64_39>(arg); @@ -715,8 +652,6 @@ enum MappingType { kShadowEnd, kMetaShadowBeg, kMetaShadowEnd, - kTraceMemBeg, - kTraceMemEnd, kVdsoBeg, }; @@ -750,10 +685,6 @@ struct MappingField { return Mapping::kMetaShadowBeg; case kMetaShadowEnd: return Mapping::kMetaShadowEnd; - case kTraceMemBeg: - return Mapping::kTraceMemBeg; - case kTraceMemEnd: - return Mapping::kTraceMemEnd; } Die(); } @@ -792,11 +723,6 @@ uptr MetaShadowBeg(void) { return SelectMapping<MappingField>(kMetaShadowBeg); } ALWAYS_INLINE uptr MetaShadowEnd(void) { return SelectMapping<MappingField>(kMetaShadowEnd); } -ALWAYS_INLINE -uptr TraceMemBeg(void) { return SelectMapping<MappingField>(kTraceMemBeg); } -ALWAYS_INLINE -uptr TraceMemEnd(void) { return SelectMapping<MappingField>(kTraceMemEnd); } - struct IsAppMemImpl { template <typename Mapping> static bool Apply(uptr mem) { @@ -934,43 +860,10 @@ inline uptr RestoreAddr(uptr addr) { return SelectMapping<RestoreAddrImpl>(addr); } -// The additional page is to catch shadow stack overflow as paging fault. -// Windows wants 64K alignment for mmaps. -const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace) - + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1); - -struct GetThreadTraceImpl { - template <typename Mapping> - static uptr Apply(uptr tid) { - uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize; - DCHECK_LT(p, Mapping::kTraceMemEnd); - return p; - } -}; - -ALWAYS_INLINE -uptr GetThreadTrace(int tid) { return SelectMapping<GetThreadTraceImpl>(tid); } - -struct GetThreadTraceHeaderImpl { - template <typename Mapping> - static uptr Apply(uptr tid) { - uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize + - kTraceSize * sizeof(Event); - DCHECK_LT(p, Mapping::kTraceMemEnd); - return p; - } -}; - -ALWAYS_INLINE -uptr GetThreadTraceHeader(int tid) { - return SelectMapping<GetThreadTraceHeaderImpl>(tid); -} - void InitializePlatform(); void InitializePlatformEarly(); void CheckAndProtect(); void InitializeShadowMemoryPlatform(); -void FlushShadowMemory(); void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns); int ExtractResolvFDs(void *state, int *fds, int nfd); int ExtractRecvmsgFDs(void *msg, int *fds, int nfd); diff --git a/libsanitizer/tsan/tsan_platform_linux.cpp b/libsanitizer/tsan/tsan_platform_linux.cpp index 73ec148..17dbdff 100644 --- a/libsanitizer/tsan/tsan_platform_linux.cpp +++ b/libsanitizer/tsan/tsan_platform_linux.cpp @@ -94,7 +94,6 @@ enum { MemMeta, MemFile, MemMmap, - MemTrace, MemHeap, MemOther, MemCount, @@ -112,8 +111,6 @@ void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) { mem[file ? MemFile : MemMmap] += rss; else if (p >= HeapMemBeg() && p < HeapMemEnd()) mem[MemHeap] += rss; - else if (p >= TraceMemBeg() && p < TraceMemEnd()) - mem[MemTrace] += rss; else mem[MemOther] += rss; } @@ -126,42 +123,33 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { StackDepotStats stacks = StackDepotGetStats(); uptr nthread, nlive; ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive); + uptr trace_mem; + { + Lock l(&ctx->slot_mtx); + trace_mem = ctx->trace_part_total_allocated * sizeof(TracePart); + } uptr internal_stats[AllocatorStatCount]; internal_allocator()->GetStats(internal_stats); // All these are allocated from the common mmap region. - mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated + - internal_stats[AllocatorStatMapped]; + mem[MemMmap] -= meta.mem_block + meta.sync_obj + trace_mem + + stacks.allocated + internal_stats[AllocatorStatMapped]; if (s64(mem[MemMmap]) < 0) mem[MemMmap] = 0; internal_snprintf( buf, buf_size, - "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd" - " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu" - " stacks=%zd[%zd] nthr=%zd/%zd\n", - uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20, - mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20, - mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20, + "==%zu== %llus [%zu]: RSS %zd MB: shadow:%zd meta:%zd file:%zd" + " mmap:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu" + " trace:%zu stacks=%zd threads=%zu/%zu\n", + internal_getpid(), uptime_ns / (1000 * 1000 * 1000), ctx->global_epoch, + mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20, + mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemHeap] >> 20, mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20, - meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20, - stacks.n_uniq_ids, nlive, nthread); -} - -# if SANITIZER_LINUX -void FlushShadowMemoryCallback( - const SuspendedThreadsList &suspended_threads_list, - void *argument) { - ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd()); -} -#endif - -void FlushShadowMemory() { -#if SANITIZER_LINUX - StopTheWorld(FlushShadowMemoryCallback, 0); -#endif + meta.mem_block >> 20, meta.sync_obj >> 20, trace_mem >> 20, + stacks.allocated >> 20, nlive, nthread); } #if !SANITIZER_GO -// Mark shadow for .rodata sections with the special kShadowRodata marker. +// Mark shadow for .rodata sections with the special Shadow::kRodata marker. // Accesses to .rodata can't race, so this saves time, memory and trace space. static void MapRodata() { // First create temp file. @@ -182,13 +170,13 @@ static void MapRodata() { return; internal_unlink(name); // Unlink it now, so that we can reuse the buffer. fd_t fd = openrv; - // Fill the file with kShadowRodata. + // Fill the file with Shadow::kRodata. const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow); InternalMmapVector<RawShadow> marker(kMarkerSize); // volatile to prevent insertion of memset for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize; p++) - *p = kShadowRodata; + *p = Shadow::kRodata; internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow)); // Map the file into memory. uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE, diff --git a/libsanitizer/tsan/tsan_platform_mac.cpp b/libsanitizer/tsan/tsan_platform_mac.cpp index 3faa2d0..44b98d4 100644 --- a/libsanitizer/tsan/tsan_platform_mac.cpp +++ b/libsanitizer/tsan/tsan_platform_mac.cpp @@ -25,6 +25,7 @@ #include "tsan_rtl.h" #include "tsan_flags.h" +#include <limits.h> #include <mach/mach.h> #include <pthread.h> #include <signal.h> @@ -45,76 +46,86 @@ namespace __tsan { #if !SANITIZER_GO -static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) { - atomic_uintptr_t *a = (atomic_uintptr_t *)dst; - void *val = (void *)atomic_load_relaxed(a); - atomic_signal_fence(memory_order_acquire); // Turns the previous load into - // acquire wrt signals. - if (UNLIKELY(val == nullptr)) { - val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); - CHECK(val); - void *cmp = nullptr; - if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val, - memory_order_acq_rel)) { - internal_munmap(val, size); - val = cmp; - } - } - return val; +static char main_thread_state[sizeof(ThreadState)] ALIGNED( + SANITIZER_CACHE_LINE_SIZE); +static ThreadState *dead_thread_state; +static pthread_key_t thread_state_key; + +// We rely on the following documented, but Darwin-specific behavior to keep the +// reference to the ThreadState object alive in TLS: +// pthread_key_create man page: +// If, after all the destructors have been called for all non-NULL values with +// associated destructors, there are still some non-NULL values with +// associated destructors, then the process is repeated. If, after at least +// [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for +// outstanding non-NULL values, there are still some non-NULL values with +// associated destructors, the implementation stops calling destructors. +static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations"); +static void ThreadStateDestructor(void *thr) { + int res = pthread_setspecific(thread_state_key, thr); + CHECK_EQ(res, 0); } -// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is -// problematic, because there are several places where interceptors are called -// when TLVs are not accessible (early process startup, thread cleanup, ...). -// The following provides a "poor man's TLV" implementation, where we use the -// shadow memory of the pointer returned by pthread_self() to store a pointer to -// the ThreadState object. The main thread's ThreadState is stored separately -// in a static variable, because we need to access it even before the -// shadow memory is set up. -static uptr main_thread_identity = 0; -ALIGNED(64) static char main_thread_state[sizeof(ThreadState)]; -static ThreadState *main_thread_state_loc = (ThreadState *)main_thread_state; - -// We cannot use pthread_self() before libpthread has been initialized. Our -// current heuristic for guarding this is checking `main_thread_identity` which -// is only assigned in `__tsan::InitializePlatform`. -static ThreadState **cur_thread_location() { - if (main_thread_identity == 0) - return &main_thread_state_loc; - uptr thread_identity = (uptr)pthread_self(); - if (thread_identity == main_thread_identity) - return &main_thread_state_loc; - return (ThreadState **)MemToShadow(thread_identity); +static void InitializeThreadStateStorage() { + int res; + CHECK_EQ(thread_state_key, 0); + res = pthread_key_create(&thread_state_key, ThreadStateDestructor); + CHECK_EQ(res, 0); + res = pthread_setspecific(thread_state_key, main_thread_state); + CHECK_EQ(res, 0); + + auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState"); + dts->fast_state.SetIgnoreBit(); + dts->ignore_interceptors = 1; + dts->is_dead = true; + const_cast<Tid &>(dts->tid) = kInvalidTid; + res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ); // immutable + CHECK_EQ(res, 0); + dead_thread_state = dts; } ThreadState *cur_thread() { - return (ThreadState *)SignalSafeGetOrAllocate( - (uptr *)cur_thread_location(), sizeof(ThreadState)); + // Some interceptors get called before libpthread has been initialized and in + // these cases we must avoid calling any pthread APIs. + if (UNLIKELY(!thread_state_key)) { + return (ThreadState *)main_thread_state; + } + + // We only reach this line after InitializeThreadStateStorage() ran, i.e, + // after TSan (and therefore libpthread) have been initialized. + ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key); + if (UNLIKELY(!thr)) { + thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState"); + int res = pthread_setspecific(thread_state_key, thr); + CHECK_EQ(res, 0); + } + return thr; } void set_cur_thread(ThreadState *thr) { - *cur_thread_location() = thr; + int res = pthread_setspecific(thread_state_key, thr); + CHECK_EQ(res, 0); } -// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call -// munmap first and then clear `fake_tls`; if we receive a signal in between, -// handler will try to access the unmapped ThreadState. void cur_thread_finalize() { - ThreadState **thr_state_loc = cur_thread_location(); - if (thr_state_loc == &main_thread_state_loc) { + ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key); + CHECK(thr); + if (thr == (ThreadState *)main_thread_state) { // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to // exit the main thread. Let's keep the main thread's ThreadState. return; } - internal_munmap(*thr_state_loc, sizeof(ThreadState)); - *thr_state_loc = nullptr; + // Intercepted functions can still get called after cur_thread_finalize() + // (called from DestroyThreadState()), so put a fake thread state for "dead" + // threads. An alternative solution would be to release the ThreadState + // object from THREAD_DESTROY (which is delivered later and on the parent + // thread) instead of THREAD_TERMINATE. + int res = pthread_setspecific(thread_state_key, dead_thread_state); + CHECK_EQ(res, 0); + UnmapOrDie(thr, sizeof(ThreadState)); } #endif -void FlushShadowMemory() { -} - static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) { vm_address_t address = start; vm_address_t end_address = end; @@ -142,12 +153,10 @@ static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) { void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { uptr shadow_res, shadow_dirty; uptr meta_res, meta_dirty; - uptr trace_res, trace_dirty; RegionMemUsage(ShadowBeg(), ShadowEnd(), &shadow_res, &shadow_dirty); RegionMemUsage(MetaShadowBeg(), MetaShadowEnd(), &meta_res, &meta_dirty); - RegionMemUsage(TraceMemBeg(), TraceMemEnd(), &trace_res, &trace_dirty); -#if !SANITIZER_GO +# if !SANITIZER_GO uptr low_res, low_dirty; uptr high_res, high_dirty; uptr heap_res, heap_dirty; @@ -166,7 +175,6 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { buf, buf_size, "shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" "meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" - "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" # if !SANITIZER_GO "low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" "high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" @@ -179,7 +187,6 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { "------------------------------\n", ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024, MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024, - TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024, # if !SANITIZER_GO LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024, HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024, @@ -222,11 +229,10 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread, ThreadStart(thr, tid, GetTid(), ThreadType::Worker); } } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) { - if (thread == pthread_self()) { - ThreadState *thr = cur_thread(); - if (thr->tctx) { - DestroyThreadState(); - } + CHECK_EQ(thread, pthread_self()); + ThreadState *thr = cur_thread(); + if (thr->tctx) { + DestroyThreadState(); } } @@ -253,8 +259,7 @@ void InitializePlatform() { #if !SANITIZER_GO CheckAndProtect(); - CHECK_EQ(main_thread_identity, 0); - main_thread_identity = (uptr)pthread_self(); + InitializeThreadStateStorage(); prev_pthread_introspection_hook = pthread_introspection_hook_install(&my_pthread_introspection_hook); @@ -286,24 +291,11 @@ uptr ExtractLongJmpSp(uptr *env) { extern "C" void __tsan_tls_initialization() {} void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) { - // The pointer to the ThreadState object is stored in the shadow memory - // of the tls. - uptr tls_end = tls_addr + tls_size; - uptr thread_identity = (uptr)pthread_self(); const uptr pc = StackTrace::GetNextInstructionPc( reinterpret_cast<uptr>(__tsan_tls_initialization)); - if (thread_identity == main_thread_identity) { - MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size); - } else { - uptr thr_state_start = thread_identity; - uptr thr_state_end = thr_state_start + sizeof(uptr); - CHECK_GE(thr_state_start, tls_addr); - CHECK_LE(thr_state_start, tls_addr + tls_size); - CHECK_GE(thr_state_end, tls_addr); - CHECK_LE(thr_state_end, tls_addr + tls_size); - MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr); - MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end); - } + // Unlike Linux, we only store a pointer to the ThreadState object in TLS; + // just mark the entire range as written to. + MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size); } #endif diff --git a/libsanitizer/tsan/tsan_platform_posix.cpp b/libsanitizer/tsan/tsan_platform_posix.cpp index 763ac44..71874aa 100644 --- a/libsanitizer/tsan/tsan_platform_posix.cpp +++ b/libsanitizer/tsan/tsan_platform_posix.cpp @@ -110,27 +110,23 @@ void CheckAndProtect() { Die(); } -# if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS +# if SANITIZER_IOS && !SANITIZER_IOSSIM ProtectRange(HeapMemEnd(), ShadowBeg()); ProtectRange(ShadowEnd(), MetaShadowBeg()); - ProtectRange(MetaShadowEnd(), TraceMemBeg()); -#else + ProtectRange(MetaShadowEnd(), HiAppMemBeg()); +# else ProtectRange(LoAppMemEnd(), ShadowBeg()); ProtectRange(ShadowEnd(), MetaShadowBeg()); if (MidAppMemBeg()) { ProtectRange(MetaShadowEnd(), MidAppMemBeg()); - ProtectRange(MidAppMemEnd(), TraceMemBeg()); + ProtectRange(MidAppMemEnd(), HeapMemBeg()); } else { - ProtectRange(MetaShadowEnd(), TraceMemBeg()); + ProtectRange(MetaShadowEnd(), HeapMemBeg()); } - // Memory for traces is mapped lazily in MapThreadTrace. - // Protect the whole range for now, so that user does not map something here. - ProtectRange(TraceMemBeg(), TraceMemEnd()); - ProtectRange(TraceMemEnd(), HeapMemBeg()); ProtectRange(HeapEnd(), HiAppMemBeg()); -#endif +# endif -#if defined(__s390x__) +# if defined(__s390x__) // Protect the rest of the address space. const uptr user_addr_max_l4 = 0x0020000000000000ull; const uptr user_addr_max_l5 = 0xfffffffffffff000ull; diff --git a/libsanitizer/tsan/tsan_platform_windows.cpp b/libsanitizer/tsan/tsan_platform_windows.cpp index fea8937..eb8f354 100644 --- a/libsanitizer/tsan/tsan_platform_windows.cpp +++ b/libsanitizer/tsan/tsan_platform_windows.cpp @@ -20,9 +20,6 @@ namespace __tsan { -void FlushShadowMemory() { -} - void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {} void InitializePlatformEarly() { diff --git a/libsanitizer/tsan/tsan_report.cpp b/libsanitizer/tsan/tsan_report.cpp index a926c37..9f15127 100644 --- a/libsanitizer/tsan/tsan_report.cpp +++ b/libsanitizer/tsan/tsan_report.cpp @@ -126,7 +126,7 @@ static void PrintMutexSet(Vector<ReportMopMutex> const& mset) { if (i == 0) Printf(" (mutexes:"); const ReportMopMutex m = mset[i]; - Printf(" %s M%llu", m.write ? "write" : "read", m.id); + Printf(" %s M%u", m.write ? "write" : "read", m.id); Printf(i == mset.Size() - 1 ? ")" : ","); } } @@ -211,29 +211,23 @@ static void PrintLocation(const ReportLocation *loc) { static void PrintMutexShort(const ReportMutex *rm, const char *after) { Decorator d; - Printf("%sM%lld%s%s", d.Mutex(), rm->id, d.Default(), after); + Printf("%sM%d%s%s", d.Mutex(), rm->id, d.Default(), after); } static void PrintMutexShortWithAddress(const ReportMutex *rm, const char *after) { Decorator d; - Printf("%sM%lld (%p)%s%s", d.Mutex(), rm->id, + Printf("%sM%d (%p)%s%s", d.Mutex(), rm->id, reinterpret_cast<void *>(rm->addr), d.Default(), after); } static void PrintMutex(const ReportMutex *rm) { Decorator d; - if (rm->destroyed) { - Printf("%s", d.Mutex()); - Printf(" Mutex M%llu is already destroyed.\n\n", rm->id); - Printf("%s", d.Default()); - } else { - Printf("%s", d.Mutex()); - Printf(" Mutex M%llu (%p) created at:\n", rm->id, - reinterpret_cast<void *>(rm->addr)); - Printf("%s", d.Default()); - PrintStack(rm->stack); - } + Printf("%s", d.Mutex()); + Printf(" Mutex M%u (%p) created at:\n", rm->id, + reinterpret_cast<void *>(rm->addr)); + Printf("%s", d.Default()); + PrintStack(rm->stack); } static void PrintThread(const ReportThread *rt) { @@ -312,6 +306,9 @@ void PrintReport(const ReportDesc *rep) { (int)internal_getpid()); Printf("%s", d.Default()); + if (rep->typ == ReportTypeErrnoInSignal) + Printf(" Signal %u handler invoked at:\n", rep->signum); + if (rep->typ == ReportTypeDeadlock) { char thrbuf[kThreadBufSize]; Printf(" Cycle in lock order graph: "); @@ -460,12 +457,12 @@ void PrintReport(const ReportDesc *rep) { } else if (rep->typ == ReportTypeDeadlock) { Printf("WARNING: DEADLOCK\n"); for (uptr i = 0; i < rep->mutexes.Size(); i++) { - Printf("Goroutine %d lock mutex %llu while holding mutex %llu:\n", 999, + Printf("Goroutine %d lock mutex %u while holding mutex %u:\n", 999, rep->mutexes[i]->id, rep->mutexes[(i + 1) % rep->mutexes.Size()]->id); PrintStack(rep->stacks[2*i]); Printf("\n"); - Printf("Mutex %llu was previously locked here:\n", + Printf("Mutex %u was previously locked here:\n", rep->mutexes[(i + 1) % rep->mutexes.Size()]->id); PrintStack(rep->stacks[2*i + 1]); Printf("\n"); diff --git a/libsanitizer/tsan/tsan_report.h b/libsanitizer/tsan/tsan_report.h index d68c2db..718eacd 100644 --- a/libsanitizer/tsan/tsan_report.h +++ b/libsanitizer/tsan/tsan_report.h @@ -43,7 +43,7 @@ struct ReportStack { }; struct ReportMopMutex { - u64 id; + int id; bool write; }; @@ -91,9 +91,8 @@ struct ReportThread { }; struct ReportMutex { - u64 id; + int id; uptr addr; - bool destroyed; ReportStack *stack; }; @@ -109,6 +108,7 @@ class ReportDesc { Vector<Tid> unique_tids; ReportStack *sleep; int count; + int signum = 0; ReportDesc(); ~ReportDesc(); diff --git a/libsanitizer/tsan/tsan_rtl.cpp b/libsanitizer/tsan/tsan_rtl.cpp index 46dec04..1d6fc72 100644 --- a/libsanitizer/tsan/tsan_rtl.cpp +++ b/libsanitizer/tsan/tsan_rtl.cpp @@ -16,6 +16,7 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_file.h" +#include "sanitizer_common/sanitizer_interface_internal.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_stackdepot.h" @@ -34,6 +35,9 @@ extern "C" void __tsan_resume() { __tsan_resumed = 1; } +SANITIZER_WEAK_DEFAULT_IMPL +void __tsan_test_only_on_fork() {} + namespace __tsan { #if !SANITIZER_GO @@ -54,109 +58,355 @@ Context *ctx; bool OnFinalize(bool failed); void OnInitialize(); #else -#include <dlfcn.h> SANITIZER_WEAK_CXX_DEFAULT_IMPL bool OnFinalize(bool failed) { -#if !SANITIZER_GO +# if !SANITIZER_GO if (on_finalize) return on_finalize(failed); -#endif +# endif return failed; } + SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnInitialize() { -#if !SANITIZER_GO +# if !SANITIZER_GO if (on_initialize) on_initialize(); -#endif +# endif } #endif -static ThreadContextBase *CreateThreadContext(Tid tid) { - // Map thread trace when context is created. - char name[50]; - internal_snprintf(name, sizeof(name), "trace %u", tid); - MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name); - const uptr hdr = GetThreadTraceHeader(tid); - internal_snprintf(name, sizeof(name), "trace header %u", tid); - MapThreadTrace(hdr, sizeof(Trace), name); - new((void*)hdr) Trace(); - // We are going to use only a small part of the trace with the default - // value of history_size. However, the constructor writes to the whole trace. - // Release the unused part. - uptr hdr_end = hdr + sizeof(Trace); - hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts()); - hdr_end = RoundUp(hdr_end, GetPageSizeCached()); - if (hdr_end < hdr + sizeof(Trace)) { - ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace)); - uptr unused = hdr + sizeof(Trace) - hdr_end; - if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) { - Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end, - unused); - CHECK("unable to mprotect" && 0); +static TracePart* TracePartAlloc(ThreadState* thr) { + TracePart* part = nullptr; + { + Lock lock(&ctx->slot_mtx); + uptr max_parts = Trace::kMinParts + flags()->history_size; + Trace* trace = &thr->tctx->trace; + if (trace->parts_allocated == max_parts || + ctx->trace_part_finished_excess) { + part = ctx->trace_part_recycle.PopFront(); + DPrintf("#%d: TracePartAlloc: part=%p\n", thr->tid, part); + if (part && part->trace) { + Trace* trace1 = part->trace; + Lock trace_lock(&trace1->mtx); + part->trace = nullptr; + TracePart* part1 = trace1->parts.PopFront(); + CHECK_EQ(part, part1); + if (trace1->parts_allocated > trace1->parts.Size()) { + ctx->trace_part_finished_excess += + trace1->parts_allocated - trace1->parts.Size(); + trace1->parts_allocated = trace1->parts.Size(); + } + } + } + if (trace->parts_allocated < max_parts) { + trace->parts_allocated++; + if (ctx->trace_part_finished_excess) + ctx->trace_part_finished_excess--; } + if (!part) + ctx->trace_part_total_allocated++; + else if (ctx->trace_part_recycle_finished) + ctx->trace_part_recycle_finished--; } - return New<ThreadContext>(tid); + if (!part) + part = new (MmapOrDie(sizeof(*part), "TracePart")) TracePart(); + return part; } +static void TracePartFree(TracePart* part) SANITIZER_REQUIRES(ctx->slot_mtx) { + DCHECK(part->trace); + part->trace = nullptr; + ctx->trace_part_recycle.PushFront(part); +} + +void TraceResetForTesting() { + Lock lock(&ctx->slot_mtx); + while (auto* part = ctx->trace_part_recycle.PopFront()) { + if (auto trace = part->trace) + CHECK_EQ(trace->parts.PopFront(), part); + UnmapOrDie(part, sizeof(*part)); + } + ctx->trace_part_total_allocated = 0; + ctx->trace_part_recycle_finished = 0; + ctx->trace_part_finished_excess = 0; +} + +static void DoResetImpl(uptr epoch) { + ThreadRegistryLock lock0(&ctx->thread_registry); + Lock lock1(&ctx->slot_mtx); + CHECK_EQ(ctx->global_epoch, epoch); + ctx->global_epoch++; + CHECK(!ctx->resetting); + ctx->resetting = true; + for (u32 i = ctx->thread_registry.NumThreadsLocked(); i--;) { + ThreadContext* tctx = (ThreadContext*)ctx->thread_registry.GetThreadLocked( + static_cast<Tid>(i)); + // Potentially we could purge all ThreadStatusDead threads from the + // registry. Since we reset all shadow, they can't race with anything + // anymore. However, their tid's can still be stored in some aux places + // (e.g. tid of thread that created something). + auto trace = &tctx->trace; + Lock lock(&trace->mtx); + bool attached = tctx->thr && tctx->thr->slot; + auto parts = &trace->parts; + bool local = false; + while (!parts->Empty()) { + auto part = parts->Front(); + local = local || part == trace->local_head; + if (local) + CHECK(!ctx->trace_part_recycle.Queued(part)); + else + ctx->trace_part_recycle.Remove(part); + if (attached && parts->Size() == 1) { + // The thread is running and this is the last/current part. + // Set the trace position to the end of the current part + // to force the thread to call SwitchTracePart and re-attach + // to a new slot and allocate a new trace part. + // Note: the thread is concurrently modifying the position as well, + // so this is only best-effort. The thread can only modify position + // within this part, because switching parts is protected by + // slot/trace mutexes that we hold here. + atomic_store_relaxed( + &tctx->thr->trace_pos, + reinterpret_cast<uptr>(&part->events[TracePart::kSize])); + break; + } + parts->Remove(part); + TracePartFree(part); + } + CHECK_LE(parts->Size(), 1); + trace->local_head = parts->Front(); + if (tctx->thr && !tctx->thr->slot) { + atomic_store_relaxed(&tctx->thr->trace_pos, 0); + tctx->thr->trace_prev_pc = 0; + } + if (trace->parts_allocated > trace->parts.Size()) { + ctx->trace_part_finished_excess += + trace->parts_allocated - trace->parts.Size(); + trace->parts_allocated = trace->parts.Size(); + } + } + while (ctx->slot_queue.PopFront()) { + } + for (auto& slot : ctx->slots) { + slot.SetEpoch(kEpochZero); + slot.journal.Reset(); + slot.thr = nullptr; + ctx->slot_queue.PushBack(&slot); + } + + DPrintf("Resetting shadow...\n"); + if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(), + "shadow")) { + Printf("failed to reset shadow memory\n"); + Die(); + } + DPrintf("Resetting meta shadow...\n"); + ctx->metamap.ResetClocks(); + ctx->resetting = false; +} + +// Clang does not understand locking all slots in the loop: +// error: expecting mutex 'slot.mtx' to be held at start of each loop +void DoReset(ThreadState* thr, uptr epoch) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + { + for (auto& slot : ctx->slots) { + slot.mtx.Lock(); + if (UNLIKELY(epoch == 0)) + epoch = ctx->global_epoch; + if (UNLIKELY(epoch != ctx->global_epoch)) { + // Epoch can't change once we've locked the first slot. + CHECK_EQ(slot.sid, 0); + slot.mtx.Unlock(); + return; + } + } + } + DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); + DoResetImpl(epoch); + for (auto& slot : ctx->slots) slot.mtx.Unlock(); +} + +void FlushShadowMemory() { DoReset(nullptr, 0); } + +static TidSlot* FindSlotAndLock(ThreadState* thr) + SANITIZER_ACQUIRE(thr->slot->mtx) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + CHECK(!thr->slot); + TidSlot* slot = nullptr; + for (;;) { + uptr epoch; + { + Lock lock(&ctx->slot_mtx); + epoch = ctx->global_epoch; + if (slot) { + // This is an exhausted slot from the previous iteration. + if (ctx->slot_queue.Queued(slot)) + ctx->slot_queue.Remove(slot); + thr->slot_locked = false; + slot->mtx.Unlock(); + } + for (;;) { + slot = ctx->slot_queue.PopFront(); + if (!slot) + break; + if (slot->epoch() != kEpochLast) { + ctx->slot_queue.PushBack(slot); + break; + } + } + } + if (!slot) { + DoReset(thr, epoch); + continue; + } + slot->mtx.Lock(); + CHECK(!thr->slot_locked); + thr->slot_locked = true; + if (slot->thr) { + DPrintf("#%d: preempting sid=%d tid=%d\n", thr->tid, (u32)slot->sid, + slot->thr->tid); + slot->SetEpoch(slot->thr->fast_state.epoch()); + slot->thr = nullptr; + } + if (slot->epoch() != kEpochLast) + return slot; + } +} + +void SlotAttachAndLock(ThreadState* thr) { + TidSlot* slot = FindSlotAndLock(thr); + DPrintf("#%d: SlotAttach: slot=%u\n", thr->tid, static_cast<int>(slot->sid)); + CHECK(!slot->thr); + CHECK(!thr->slot); + slot->thr = thr; + thr->slot = slot; + Epoch epoch = EpochInc(slot->epoch()); + CHECK(!EpochOverflow(epoch)); + slot->SetEpoch(epoch); + thr->fast_state.SetSid(slot->sid); + thr->fast_state.SetEpoch(epoch); + if (thr->slot_epoch != ctx->global_epoch) { + thr->slot_epoch = ctx->global_epoch; + thr->clock.Reset(); #if !SANITIZER_GO -static const u32 kThreadQuarantineSize = 16; -#else -static const u32 kThreadQuarantineSize = 64; + thr->last_sleep_stack_id = kInvalidStackID; + thr->last_sleep_clock.Reset(); +#endif + } + thr->clock.Set(slot->sid, epoch); + slot->journal.PushBack({thr->tid, epoch}); +} + +static void SlotDetachImpl(ThreadState* thr, bool exiting) { + TidSlot* slot = thr->slot; + thr->slot = nullptr; + if (thr != slot->thr) { + slot = nullptr; // we don't own the slot anymore + if (thr->slot_epoch != ctx->global_epoch) { + TracePart* part = nullptr; + auto* trace = &thr->tctx->trace; + { + Lock l(&trace->mtx); + auto* parts = &trace->parts; + // The trace can be completely empty in an unlikely event + // the thread is preempted right after it acquired the slot + // in ThreadStart and did not trace any events yet. + CHECK_LE(parts->Size(), 1); + part = parts->PopFront(); + thr->tctx->trace.local_head = nullptr; + atomic_store_relaxed(&thr->trace_pos, 0); + thr->trace_prev_pc = 0; + } + if (part) { + Lock l(&ctx->slot_mtx); + TracePartFree(part); + } + } + return; + } + CHECK(exiting || thr->fast_state.epoch() == kEpochLast); + slot->SetEpoch(thr->fast_state.epoch()); + slot->thr = nullptr; +} + +void SlotDetach(ThreadState* thr) { + Lock lock(&thr->slot->mtx); + SlotDetachImpl(thr, true); +} + +void SlotLock(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + DCHECK(!thr->slot_locked); +#if SANITIZER_DEBUG + // Check these mutexes are not locked. + // We can call DoReset from SlotAttachAndLock, which will lock + // these mutexes, but it happens only every once in a while. + { ThreadRegistryLock lock(&ctx->thread_registry); } + { Lock lock(&ctx->slot_mtx); } #endif + TidSlot* slot = thr->slot; + slot->mtx.Lock(); + thr->slot_locked = true; + if (LIKELY(thr == slot->thr && thr->fast_state.epoch() != kEpochLast)) + return; + SlotDetachImpl(thr, false); + thr->slot_locked = false; + slot->mtx.Unlock(); + SlotAttachAndLock(thr); +} + +void SlotUnlock(ThreadState* thr) { + DCHECK(thr->slot_locked); + thr->slot_locked = false; + thr->slot->mtx.Unlock(); +} Context::Context() : initialized(), report_mtx(MutexTypeReport), nreported(), - thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize, - kMaxTidReuse), + thread_registry([](Tid tid) -> ThreadContextBase* { + return new (Alloc(sizeof(ThreadContext))) ThreadContext(tid); + }), racy_mtx(MutexTypeRacy), racy_stacks(), racy_addresses(), fired_suppressions_mtx(MutexTypeFired), - clock_alloc(LINKER_INITIALIZED, "clock allocator") { + slot_mtx(MutexTypeSlots), + resetting() { fired_suppressions.reserve(8); + for (uptr i = 0; i < ARRAY_SIZE(slots); i++) { + TidSlot* slot = &slots[i]; + slot->sid = static_cast<Sid>(i); + slot_queue.PushBack(slot); + } + global_epoch = 1; } +TidSlot::TidSlot() : mtx(MutexTypeSlot) {} + // The objects are allocated in TLS, so one may rely on zero-initialization. -ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, - unsigned reuse_count, uptr stk_addr, uptr stk_size, - uptr tls_addr, uptr tls_size) - : fast_state(tid, epoch) - // Do not touch these, rely on zero initialization, - // they may be accessed before the ctor. - // , ignore_reads_and_writes() - // , ignore_interceptors() - , - clock(tid, reuse_count) -#if !SANITIZER_GO - , - jmp_bufs() -#endif - , - tid(tid), - unique_id(unique_id), - stk_addr(stk_addr), - stk_size(stk_size), - tls_addr(tls_addr), - tls_size(tls_size) -#if !SANITIZER_GO - , - last_sleep_clock(tid) -#endif -{ +ThreadState::ThreadState(Tid tid) + // Do not touch these, rely on zero initialization, + // they may be accessed before the ctor. + // ignore_reads_and_writes() + // ignore_interceptors() + : tid(tid) { CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0); #if !SANITIZER_GO - shadow_stack_pos = shadow_stack; - shadow_stack_end = shadow_stack + kShadowStackSize; + // C/C++ uses fixed size shadow stack. + const int kInitStackSize = kShadowStackSize; + shadow_stack = static_cast<uptr*>( + MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack")); + SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack), + kInitStackSize * sizeof(uptr)); #else - // Setup dynamic shadow stack. + // Go uses malloc-allocated shadow stack with dynamic size. const int kInitStackSize = 8; - shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr)); + shadow_stack = static_cast<uptr*>(Alloc(kInitStackSize * sizeof(uptr))); +#endif shadow_stack_pos = shadow_stack; shadow_stack_end = shadow_stack + kInitStackSize; -#endif } #if !SANITIZER_GO @@ -168,11 +418,11 @@ void MemoryProfiler(u64 uptime) { WriteToFile(ctx->memprof_fd, buf.data(), internal_strlen(buf.data())); } -void InitializeMemoryProfiler() { +static bool InitializeMemoryProfiler() { ctx->memprof_fd = kInvalidFd; const char *fname = flags()->profile_memory; if (!fname || !fname[0]) - return; + return false; if (internal_strcmp(fname, "stdout") == 0) { ctx->memprof_fd = 1; } else if (internal_strcmp(fname, "stderr") == 0) { @@ -184,11 +434,11 @@ void InitializeMemoryProfiler() { if (ctx->memprof_fd == kInvalidFd) { Printf("ThreadSanitizer: failed to open memory profile file '%s'\n", filename.data()); - return; + return false; } } MemoryProfiler(0); - MaybeSpawnBackgroundThread(); + return true; } static void *BackgroundThread(void *arg) { @@ -200,33 +450,34 @@ static void *BackgroundThread(void *arg) { const u64 kMs2Ns = 1000 * 1000; const u64 start = NanoTime(); - u64 last_flush = NanoTime(); + u64 last_flush = start; uptr last_rss = 0; - for (int i = 0; - atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0; - i++) { + while (!atomic_load_relaxed(&ctx->stop_background_thread)) { SleepForMillis(100); u64 now = NanoTime(); // Flush memory if requested. if (flags()->flush_memory_ms > 0) { if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) { - VPrintf(1, "ThreadSanitizer: periodic memory flush\n"); + VReport(1, "ThreadSanitizer: periodic memory flush\n"); FlushShadowMemory(); - last_flush = NanoTime(); + now = last_flush = NanoTime(); } } if (flags()->memory_limit_mb > 0) { uptr rss = GetRSS(); uptr limit = uptr(flags()->memory_limit_mb) << 20; - VPrintf(1, "ThreadSanitizer: memory flush check" - " RSS=%llu LAST=%llu LIMIT=%llu\n", + VReport(1, + "ThreadSanitizer: memory flush check" + " RSS=%llu LAST=%llu LIMIT=%llu\n", (u64)rss >> 20, (u64)last_rss >> 20, (u64)limit >> 20); if (2 * rss > limit + last_rss) { - VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n"); + VReport(1, "ThreadSanitizer: flushing memory due to RSS\n"); FlushShadowMemory(); rss = GetRSS(); - VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20); + now = NanoTime(); + VReport(1, "ThreadSanitizer: memory flushed RSS=%llu\n", + (u64)rss >> 20); } last_rss = rss; } @@ -267,11 +518,43 @@ void DontNeedShadowFor(uptr addr, uptr size) { } #if !SANITIZER_GO +// We call UnmapShadow before the actual munmap, at that point we don't yet +// know if the provided address/size are sane. We can't call UnmapShadow +// after the actual munmap becuase at that point the memory range can +// already be reused for something else, so we can't rely on the munmap +// return value to understand is the values are sane. +// While calling munmap with insane values (non-canonical address, negative +// size, etc) is an error, the kernel won't crash. We must also try to not +// crash as the failure mode is very confusing (paging fault inside of the +// runtime on some derived shadow address). +static bool IsValidMmapRange(uptr addr, uptr size) { + if (size == 0) + return true; + if (static_cast<sptr>(size) < 0) + return false; + if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) + return false; + // Check that if the start of the region belongs to one of app ranges, + // end of the region belongs to the same region. + const uptr ranges[][2] = { + {LoAppMemBeg(), LoAppMemEnd()}, + {MidAppMemBeg(), MidAppMemEnd()}, + {HiAppMemBeg(), HiAppMemEnd()}, + }; + for (auto range : ranges) { + if (addr >= range[0] && addr < range[1]) + return addr + size <= range[1]; + } + return false; +} + void UnmapShadow(ThreadState *thr, uptr addr, uptr size) { - if (size == 0) return; + if (size == 0 || !IsValidMmapRange(addr, size)) + return; DontNeedShadowFor(addr, size); ScopedGlobalProcessor sgp; - ctx->metamap.ResetRange(thr->proc(), addr, size); + SlotLocker locker(thr, true); + ctx->metamap.ResetRange(thr->proc(), addr, size, true); } #endif @@ -317,18 +600,6 @@ void MapShadow(uptr addr, uptr size) { addr + size, meta_begin, meta_end); } -void MapThreadTrace(uptr addr, uptr size, const char *name) { - DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size); - CHECK_GE(addr, TraceMemBeg()); - CHECK_LE(addr + size, TraceMemEnd()); - CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment - if (!MmapFixedSuperNoReserve(addr, size, name)) { - Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n", - addr, size); - Die(); - } -} - #if !SANITIZER_GO static void OnStackUnwind(const SignalContext &sig, const void *, BufferedStackTrace *stack) { @@ -347,8 +618,11 @@ void CheckUnwind() { // since we are going to die soon. ScopedIgnoreInterceptors ignore; #if !SANITIZER_GO - cur_thread()->ignore_sync++; - cur_thread()->ignore_reads_and_writes++; + ThreadState* thr = cur_thread(); + thr->nomalloc = false; + thr->ignore_sync++; + thr->ignore_reads_and_writes++; + atomic_store_relaxed(&thr->in_signal_handler, 0); #endif PrintCurrentStackSlow(StackTrace::GetCurrentPc()); } @@ -403,22 +677,23 @@ void Initialize(ThreadState *thr) { Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer); #endif - VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n", + VPrintf(1, "***** Running under ThreadSanitizer v3 (pid %d) *****\n", (int)internal_getpid()); // Initialize thread 0. - Tid tid = ThreadCreate(thr, 0, 0, true); + Tid tid = ThreadCreate(nullptr, 0, 0, true); CHECK_EQ(tid, kMainTid); ThreadStart(thr, tid, GetTid(), ThreadType::Regular); #if TSAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif - ctx->initialized = true; #if !SANITIZER_GO Symbolizer::LateInitialize(); - InitializeMemoryProfiler(); + if (InitializeMemoryProfiler() || flags()->force_background_thread) + MaybeSpawnBackgroundThread(); #endif + ctx->initialized = true; if (flags()->stop_on_start) { Printf("ThreadSanitizer is suspended at startup (pid %d)." @@ -444,7 +719,6 @@ void MaybeSpawnBackgroundThread() { #endif } - int Finalize(ThreadState *thr) { bool failed = false; @@ -452,12 +726,12 @@ int Finalize(ThreadState *thr) { DumpProcessMap(); if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) - SleepForMillis(flags()->atexit_sleep_ms); + internal_usleep(u64(flags()->atexit_sleep_ms) * 1000); - // Wait for pending reports. - ctx->report_mtx.Lock(); - { ScopedErrorReportLock l; } - ctx->report_mtx.Unlock(); + { + // Wait for pending reports. + ScopedErrorReportLock lock; + } #if !SANITIZER_GO if (Verbosity()) AllocatorPrintStats(); @@ -483,10 +757,16 @@ int Finalize(ThreadState *thr) { } #if !SANITIZER_GO -void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { +void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + GlobalProcessorLock(); + // Detaching from the slot makes OnUserFree skip writing to the shadow. + // The slot will be locked so any attempts to use it will deadlock anyway. + SlotDetach(thr); + for (auto& slot : ctx->slots) slot.mtx.Lock(); ctx->thread_registry.Lock(); - ctx->report_mtx.Lock(); + ctx->slot_mtx.Lock(); ScopedErrorReportLock::Lock(); + AllocatorLock(); // Suppress all reports in the pthread_atfork callbacks. // Reports will deadlock on the report_mtx. // We could ignore sync operations as well, @@ -495,29 +775,38 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports++; // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and // we'll assert in CheckNoLocks() unless we ignore interceptors. + // On OS X libSystem_atfork_prepare/parent/child callbacks are called + // after/before our callbacks and they call free. thr->ignore_interceptors++; + // Disables memory write in OnUserAlloc/Free. + thr->ignore_reads_and_writes++; + + __tsan_test_only_on_fork(); } -void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { +static void ForkAfter(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports--; // Enabled in ForkBefore. thr->ignore_interceptors--; + thr->ignore_reads_and_writes--; + AllocatorUnlock(); ScopedErrorReportLock::Unlock(); - ctx->report_mtx.Unlock(); + ctx->slot_mtx.Unlock(); ctx->thread_registry.Unlock(); + for (auto& slot : ctx->slots) slot.mtx.Unlock(); + SlotAttachAndLock(thr); + SlotUnlock(thr); + GlobalProcessorUnlock(); } -void ForkChildAfter(ThreadState *thr, uptr pc, - bool start_thread) NO_THREAD_SAFETY_ANALYSIS { - thr->suppress_reports--; // Enabled in ForkBefore. - thr->ignore_interceptors--; - ScopedErrorReportLock::Unlock(); - ctx->report_mtx.Unlock(); - ctx->thread_registry.Unlock(); +void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); } - uptr nthread = 0; - ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */); - VPrintf(1, "ThreadSanitizer: forked new process with pid %d," - " parent had %d threads\n", (int)internal_getpid(), (int)nthread); +void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) { + ForkAfter(thr); + u32 nthread = ctx->thread_registry.OnFork(thr->tid); + VPrintf(1, + "ThreadSanitizer: forked new process with pid %d," + " parent had %d threads\n", + (int)internal_getpid(), (int)nthread); if (nthread == 1) { if (start_thread) StartBackgroundThread(); @@ -527,6 +816,7 @@ void ForkChildAfter(ThreadState *thr, uptr pc, // ignores for everything in the hope that we will exec soon. ctx->after_multithreaded_fork = true; thr->ignore_interceptors++; + thr->suppress_reports++; ThreadIgnoreBegin(thr, pc); ThreadIgnoreSyncBegin(thr, pc); } @@ -548,8 +838,10 @@ void GrowShadowStack(ThreadState *thr) { #endif StackID CurrentStackId(ThreadState *thr, uptr pc) { +#if !SANITIZER_GO if (!thr->is_inited) // May happen during bootstrap. return kInvalidStackID; +#endif if (pc != 0) { #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -567,53 +859,72 @@ StackID CurrentStackId(ThreadState *thr, uptr pc) { return id; } -namespace v3 { - -NOINLINE -void TraceSwitchPart(ThreadState *thr) { +static bool TraceSkipGap(ThreadState* thr) { Trace *trace = &thr->tctx->trace; Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos)); DCHECK_EQ(reinterpret_cast<uptr>(pos + 1) & TracePart::kAlignment, 0); auto *part = trace->parts.Back(); - DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos); - if (part) { - // We can get here when we still have space in the current trace part. - // The fast-path check in TraceAcquire has false positives in the middle of - // the part. Check if we are indeed at the end of the current part or not, - // and fill any gaps with NopEvent's. - Event *end = &part->events[TracePart::kSize]; - DCHECK_GE(pos, &part->events[0]); - DCHECK_LE(pos, end); - if (pos + 1 < end) { - if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) == - TracePart::kAlignment) - *pos++ = NopEvent; + DPrintf("#%d: TraceSwitchPart enter trace=%p parts=%p-%p pos=%p\n", thr->tid, + trace, trace->parts.Front(), part, pos); + if (!part) + return false; + // We can get here when we still have space in the current trace part. + // The fast-path check in TraceAcquire has false positives in the middle of + // the part. Check if we are indeed at the end of the current part or not, + // and fill any gaps with NopEvent's. + Event* end = &part->events[TracePart::kSize]; + DCHECK_GE(pos, &part->events[0]); + DCHECK_LE(pos, end); + if (pos + 1 < end) { + if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) == + TracePart::kAlignment) *pos++ = NopEvent; - DCHECK_LE(pos + 2, end); - atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos)); - // Ensure we setup trace so that the next TraceAcquire - // won't detect trace part end. - Event *ev; - CHECK(TraceAcquire(thr, &ev)); - return; - } - // We are indeed at the end. - for (; pos < end; pos++) *pos = NopEvent; + *pos++ = NopEvent; + DCHECK_LE(pos + 2, end); + atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos)); + return true; } + // We are indeed at the end. + for (; pos < end; pos++) *pos = NopEvent; + return false; +} + +NOINLINE +void TraceSwitchPart(ThreadState* thr) { + if (TraceSkipGap(thr)) + return; #if !SANITIZER_GO if (ctx->after_multithreaded_fork) { // We just need to survive till exec. - CHECK(part); - atomic_store_relaxed(&thr->trace_pos, - reinterpret_cast<uptr>(&part->events[0])); - return; + TracePart* part = thr->tctx->trace.parts.Back(); + if (part) { + atomic_store_relaxed(&thr->trace_pos, + reinterpret_cast<uptr>(&part->events[0])); + return; + } } #endif - part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart(); + TraceSwitchPartImpl(thr); +} + +void TraceSwitchPartImpl(ThreadState* thr) { + SlotLocker locker(thr, true); + Trace* trace = &thr->tctx->trace; + TracePart* part = TracePartAlloc(thr); part->trace = trace; thr->trace_prev_pc = 0; + TracePart* recycle = nullptr; + // Keep roughly half of parts local to the thread + // (not queued into the recycle queue). + uptr local_parts = (Trace::kMinParts + flags()->history_size + 1) / 2; { Lock lock(&trace->mtx); + if (trace->parts.Empty()) + trace->local_head = part; + if (trace->parts.Size() >= local_parts) { + recycle = trace->local_head; + trace->local_head = trace->parts.Next(recycle); + } trace->parts.PushBack(part); atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(&part->events[0])); @@ -621,60 +932,49 @@ void TraceSwitchPart(ThreadState *thr) { // Make this part self-sufficient by restoring the current stack // and mutex set in the beginning of the trace. TraceTime(thr); - for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++) - CHECK(TryTraceFunc(thr, *pos)); + { + // Pathologically large stacks may not fit into the part. + // In these cases we log only fixed number of top frames. + const uptr kMaxFrames = 1000; + // Check that kMaxFrames won't consume the whole part. + static_assert(kMaxFrames < TracePart::kSize / 2, "kMaxFrames is too big"); + uptr* pos = Max(&thr->shadow_stack[0], thr->shadow_stack_pos - kMaxFrames); + for (; pos < thr->shadow_stack_pos; pos++) { + if (TryTraceFunc(thr, *pos)) + continue; + CHECK(TraceSkipGap(thr)); + CHECK(TryTraceFunc(thr, *pos)); + } + } for (uptr i = 0; i < thr->mset.Size(); i++) { MutexSet::Desc d = thr->mset.Get(i); - TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, - d.addr, d.stack_id); + for (uptr i = 0; i < d.count; i++) + TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, + d.addr, d.stack_id); } + { + Lock lock(&ctx->slot_mtx); + // There is a small chance that the slot may be not queued at this point. + // This can happen if the slot has kEpochLast epoch and another thread + // in FindSlotAndLock discovered that it's exhausted and removed it from + // the slot queue. kEpochLast can happen in 2 cases: (1) if TraceSwitchPart + // was called with the slot locked and epoch already at kEpochLast, + // or (2) if we've acquired a new slot in SlotLock in the beginning + // of the function and the slot was at kEpochLast - 1, so after increment + // in SlotAttachAndLock it become kEpochLast. + if (ctx->slot_queue.Queued(thr->slot)) { + ctx->slot_queue.Remove(thr->slot); + ctx->slot_queue.PushBack(thr->slot); + } + if (recycle) + ctx->trace_part_recycle.PushBack(recycle); + } + DPrintf("#%d: TraceSwitchPart exit parts=%p-%p pos=0x%zx\n", thr->tid, + trace->parts.Front(), trace->parts.Back(), + atomic_load_relaxed(&thr->trace_pos)); } -} // namespace v3 - -void TraceSwitch(ThreadState *thr) { -#if !SANITIZER_GO - if (ctx->after_multithreaded_fork) - return; -#endif - thr->nomalloc++; - Trace *thr_trace = ThreadTrace(thr->tid); - Lock l(&thr_trace->mtx); - unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); - TraceHeader *hdr = &thr_trace->headers[trace]; - hdr->epoch0 = thr->fast_state.epoch(); - ObtainCurrentStack(thr, 0, &hdr->stack0); - hdr->mset0 = thr->mset; - thr->nomalloc--; -} - -Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); } - -uptr TraceTopPC(ThreadState *thr) { - Event *events = (Event*)GetThreadTrace(thr->tid); - uptr pc = events[thr->fast_state.GetTracePos()]; - return pc; -} - -uptr TraceSize() { - return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); -} - -uptr TraceParts() { - return TraceSize() / kTracePartSize; -} - -#if !SANITIZER_GO -extern "C" void __tsan_trace_switch() { - TraceSwitch(cur_thread()); -} - -extern "C" void __tsan_report_race() { - ReportRace(cur_thread()); -} -#endif - -void ThreadIgnoreBegin(ThreadState *thr, uptr pc) { +void ThreadIgnoreBegin(ThreadState* thr, uptr pc) { DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); thr->ignore_reads_and_writes++; CHECK_GT(thr->ignore_reads_and_writes, 0); @@ -734,7 +1034,6 @@ void build_consistency_debug() {} #else void build_consistency_release() {} #endif - } // namespace __tsan #if SANITIZER_CHECK_DEADLOCKS @@ -742,18 +1041,27 @@ namespace __sanitizer { using namespace __tsan; MutexMeta mutex_meta[] = { {MutexInvalid, "Invalid", {}}, - {MutexThreadRegistry, "ThreadRegistry", {}}, - {MutexTypeTrace, "Trace", {MutexLeaf}}, - {MutexTypeReport, "Report", {MutexTypeSyncVar}}, - {MutexTypeSyncVar, "SyncVar", {}}, + {MutexThreadRegistry, + "ThreadRegistry", + {MutexTypeSlots, MutexTypeTrace, MutexTypeReport}}, + {MutexTypeReport, "Report", {MutexTypeTrace}}, + {MutexTypeSyncVar, "SyncVar", {MutexTypeReport, MutexTypeTrace}}, {MutexTypeAnnotations, "Annotations", {}}, - {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}}, + {MutexTypeAtExit, "AtExit", {}}, {MutexTypeFired, "Fired", {MutexLeaf}}, {MutexTypeRacy, "Racy", {MutexLeaf}}, - {MutexTypeGlobalProc, "GlobalProc", {}}, + {MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}}, + {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}}, + {MutexTypeTrace, "Trace", {}}, + {MutexTypeSlot, + "Slot", + {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry, + MutexTypeSlots}}, + {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}}, {}, }; void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); } + } // namespace __sanitizer #endif diff --git a/libsanitizer/tsan/tsan_rtl.h b/libsanitizer/tsan/tsan_rtl.h index eab8370..b472c0f 100644 --- a/libsanitizer/tsan/tsan_rtl.h +++ b/libsanitizer/tsan/tsan_rtl.h @@ -34,10 +34,10 @@ #include "sanitizer_common/sanitizer_suppressions.h" #include "sanitizer_common/sanitizer_thread_registry.h" #include "sanitizer_common/sanitizer_vector.h" -#include "tsan_clock.h" #include "tsan_defs.h" #include "tsan_flags.h" #include "tsan_ignoreset.h" +#include "tsan_ilist.h" #include "tsan_mman.h" #include "tsan_mutexset.h" #include "tsan_platform.h" @@ -46,6 +46,7 @@ #include "tsan_stack_trace.h" #include "tsan_sync.h" #include "tsan_trace.h" +#include "tsan_vector_clock.h" #if SANITIZER_WORDSIZE != 64 # error "ThreadSanitizer is supported only on 64-bit platforms" @@ -116,7 +117,6 @@ struct Processor { #endif DenseSlabAllocCache block_cache; DenseSlabAllocCache sync_cache; - DenseSlabAllocCache clock_cache; DDPhysicalThread *dd_pt; }; @@ -130,67 +130,85 @@ struct ScopedGlobalProcessor { }; #endif +struct TidEpoch { + Tid tid; + Epoch epoch; +}; + +struct TidSlot { + Mutex mtx; + Sid sid; + atomic_uint32_t raw_epoch; + ThreadState *thr; + Vector<TidEpoch> journal; + INode node; + + Epoch epoch() const { + return static_cast<Epoch>(atomic_load(&raw_epoch, memory_order_relaxed)); + } + + void SetEpoch(Epoch v) { + atomic_store(&raw_epoch, static_cast<u32>(v), memory_order_relaxed); + } + + TidSlot(); +} ALIGNED(SANITIZER_CACHE_LINE_SIZE); + // This struct is stored in TLS. struct ThreadState { FastState fast_state; - // Synch epoch represents the threads's epoch before the last synchronization - // action. It allows to reduce number of shadow state updates. - // For example, fast_synch_epoch=100, last write to addr X was at epoch=150, - // if we are processing write to X from the same thread at epoch=200, - // we do nothing, because both writes happen in the same 'synch epoch'. - // That is, if another memory access does not race with the former write, - // it does not race with the latter as well. - // QUESTION: can we can squeeze this into ThreadState::Fast? - // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are - // taken by epoch between synchs. - // This way we can save one load from tls. - u64 fast_synch_epoch; + int ignore_sync; +#if !SANITIZER_GO + int ignore_interceptors; +#endif + uptr *shadow_stack_pos; + + // Current position in tctx->trace.Back()->events (Event*). + atomic_uintptr_t trace_pos; + // PC of the last memory access, used to compute PC deltas in the trace. + uptr trace_prev_pc; + // Technically `current` should be a separate THREADLOCAL variable; // but it is placed here in order to share cache line with previous fields. ThreadState* current; + + atomic_sint32_t pending_signals; + + VectorClock clock; + // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read. // We do not distinguish beteween ignoring reads and writes // for better performance. int ignore_reads_and_writes; - atomic_sint32_t pending_signals; - int ignore_sync; int suppress_reports; // Go does not support ignores. #if !SANITIZER_GO IgnoreSet mop_ignore_set; IgnoreSet sync_ignore_set; - // C/C++ uses fixed size shadow stack. - uptr shadow_stack[kShadowStackSize]; -#else - // Go uses malloc-allocated shadow stack with dynamic size. - uptr *shadow_stack; #endif + uptr *shadow_stack; uptr *shadow_stack_end; - uptr *shadow_stack_pos; - RawShadow *racy_shadow_addr; - RawShadow racy_state[2]; - MutexSet mset; - ThreadClock clock; #if !SANITIZER_GO Vector<JmpBuf> jmp_bufs; - int ignore_interceptors; -#endif - const Tid tid; - const int unique_id; - bool in_symbolizer; + int in_symbolizer; bool in_ignored_lib; bool is_inited; +#endif + MutexSet mset; bool is_dead; - bool is_freeing; - bool is_vptr_access; - const uptr stk_addr; - const uptr stk_size; - const uptr tls_addr; - const uptr tls_size; + const Tid tid; + uptr stk_addr; + uptr stk_size; + uptr tls_addr; + uptr tls_size; ThreadContext *tctx; DDLogicalThread *dd_lt; + TidSlot *slot; + uptr slot_epoch; + bool slot_locked; + // Current wired Processor, or nullptr. Required to handle any events. Processor *proc1; #if !SANITIZER_GO @@ -204,7 +222,7 @@ struct ThreadState { #if !SANITIZER_GO StackID last_sleep_stack_id; - ThreadClock last_sleep_clock; + VectorClock last_sleep_clock; #endif // Set in regions of runtime that must be signal-safe and fork-safe. @@ -213,16 +231,7 @@ struct ThreadState { const ReportDesc *current_report; - // Current position in tctx->trace.Back()->events (Event*). - atomic_uintptr_t trace_pos; - // PC of the last memory access, used to compute PC deltas in the trace. - uptr trace_prev_pc; - Sid sid; - Epoch epoch; - - explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, - unsigned reuse_count, uptr stk_addr, uptr stk_size, - uptr tls_addr, uptr tls_size); + explicit ThreadState(Tid tid); } ALIGNED(SANITIZER_CACHE_LINE_SIZE); #if !SANITIZER_GO @@ -256,14 +265,9 @@ class ThreadContext final : public ThreadContextBase { ~ThreadContext(); ThreadState *thr; StackID creation_stack_id; - SyncClock sync; - // Epoch at which the thread had started. - // If we see an event from the thread stamped by an older epoch, - // the event is from a dead thread that shared tid with this thread. - u64 epoch0; - u64 epoch1; - - v3::Trace trace; + VectorClock *sync; + uptr sync_epoch; + Trace trace; // Override superclass callbacks. void OnDead() override; @@ -318,12 +322,22 @@ struct Context { InternalMmapVector<FiredSuppression> fired_suppressions; DDetector *dd; - ClockAlloc clock_alloc; - Flags flags; fd_t memprof_fd; + // The last slot index (kFreeSid) is used to denote freed memory. + TidSlot slots[kThreadSlotCount - 1]; + + // Protects global_epoch, slot_queue, trace_part_recycle. Mutex slot_mtx; + uptr global_epoch; // guarded by slot_mtx and by all slot mutexes + bool resetting; // global reset is in progress + IList<TidSlot, &TidSlot::node> slot_queue SANITIZER_GUARDED_BY(slot_mtx); + IList<TraceHeader, &TraceHeader::global, TracePart> trace_part_recycle + SANITIZER_GUARDED_BY(slot_mtx); + uptr trace_part_total_allocated SANITIZER_GUARDED_BY(slot_mtx); + uptr trace_part_recycle_finished SANITIZER_GUARDED_BY(slot_mtx); + uptr trace_part_finished_excess SANITIZER_GUARDED_BY(slot_mtx); }; extern Context *ctx; // The one and the only global runtime context. @@ -352,17 +366,17 @@ uptr TagFromShadowStackFrame(uptr pc); class ScopedReportBase { public: - void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack, - const MutexSet *mset); + void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, Tid tid, + StackTrace stack, const MutexSet *mset); void AddStack(StackTrace stack, bool suppressable = false); void AddThread(const ThreadContext *tctx, bool suppressable = false); - void AddThread(Tid unique_tid, bool suppressable = false); + void AddThread(Tid tid, bool suppressable = false); void AddUniqueTid(Tid unique_tid); - void AddMutex(const SyncVar *s); - u64 AddMutex(u64 id); + int AddMutex(uptr addr, StackID creation_stack_id); void AddLocation(uptr addr, uptr size); void AddSleep(StackID stack_id); void SetCount(int count); + void SetSigNum(int sig); const ReportDesc *GetReport() const; @@ -376,8 +390,6 @@ class ScopedReportBase { // at best it will cause deadlocks on internal mutexes. ScopedIgnoreInterceptors ignore_interceptors_; - void AddDeadMutex(u64 id); - ScopedReportBase(const ScopedReportBase &) = delete; void operator=(const ScopedReportBase &) = delete; }; @@ -393,8 +405,6 @@ class ScopedReport : public ScopedReportBase { bool ShouldReport(ThreadState *thr, ReportType typ); ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack); -void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk, - MutexSet *mset, uptr *tag = nullptr); // The stack could look like: // <start> | <main> | <foo> | tag | <bar> @@ -442,7 +452,8 @@ void ForkBefore(ThreadState *thr, uptr pc); void ForkParentAfter(ThreadState *thr, uptr pc); void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread); -void ReportRace(ThreadState *thr); +void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, + AccessType typ); bool OutputReport(ThreadState *thr, const ScopedReport &srep); bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace); bool IsExpectedReport(uptr addr, uptr size); @@ -472,55 +483,28 @@ int Finalize(ThreadState *thr); void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write); void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write); -void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic); -void MemoryAccessImpl(ThreadState *thr, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, - u64 *shadow_mem, Shadow cur); -void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, - uptr size, bool is_write); +void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, + AccessType typ); void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, AccessType typ); - -const int kSizeLog1 = 0; -const int kSizeLog2 = 1; -const int kSizeLog4 = 2; -const int kSizeLog8 = 3; +// This creates 2 non-inlined specialized versions of MemoryAccessRange. +template <bool is_read> +void MemoryAccessRangeT(ThreadState *thr, uptr pc, uptr addr, uptr size); ALWAYS_INLINE -void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, - AccessType typ) { - int size_log; - switch (size) { - case 1: - size_log = kSizeLog1; - break; - case 2: - size_log = kSizeLog2; - break; - case 4: - size_log = kSizeLog4; - break; - default: - DCHECK_EQ(size, 8); - size_log = kSizeLog8; - break; - } - bool is_write = !(typ & kAccessRead); - bool is_atomic = typ & kAccessAtomic; - if (typ & kAccessVptr) - thr->is_vptr_access = true; - if (typ & kAccessFree) - thr->is_freeing = true; - MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic); - if (typ & kAccessVptr) - thr->is_vptr_access = false; - if (typ & kAccessFree) - thr->is_freeing = false; +void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, + bool is_write) { + if (size == 0) + return; + if (is_write) + MemoryAccessRangeT<false>(thr, pc, addr, size); + else + MemoryAccessRangeT<true>(thr, pc, addr, size); } -void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); +void ShadowSet(RawShadow *p, RawShadow *end, RawShadow v); void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size); +void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); @@ -530,9 +514,6 @@ void ThreadIgnoreEnd(ThreadState *thr); void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc); void ThreadIgnoreSyncEnd(ThreadState *thr); -void FuncEntry(ThreadState *thr, uptr pc); -void FuncExit(ThreadState *thr); - Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached); void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, ThreadType thread_type); @@ -578,60 +559,7 @@ void Release(ThreadState *thr, uptr pc, uptr addr); void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr); void ReleaseStore(ThreadState *thr, uptr pc, uptr addr); void AfterSleep(ThreadState *thr, uptr pc); -void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c); -void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); -void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c); -void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c); -void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); - -// The hacky call uses custom calling convention and an assembly thunk. -// It is considerably faster that a normal call for the caller -// if it is not executed (it is intended for slow paths from hot functions). -// The trick is that the call preserves all registers and the compiler -// does not treat it as a call. -// If it does not work for you, use normal call. -#if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_MAC -// The caller may not create the stack frame for itself at all, -// so we create a reserve stack frame for it (1024b must be enough). -#define HACKY_CALL(f) \ - __asm__ __volatile__("sub $1024, %%rsp;" \ - CFI_INL_ADJUST_CFA_OFFSET(1024) \ - ".hidden " #f "_thunk;" \ - "call " #f "_thunk;" \ - "add $1024, %%rsp;" \ - CFI_INL_ADJUST_CFA_OFFSET(-1024) \ - ::: "memory", "cc"); -#else -#define HACKY_CALL(f) f() -#endif - -void TraceSwitch(ThreadState *thr); -uptr TraceTopPC(ThreadState *thr); -uptr TraceSize(); -uptr TraceParts(); -Trace *ThreadTrace(Tid tid); - -extern "C" void __tsan_trace_switch(); -void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs, - EventType typ, u64 addr) { - if (!kCollectHistory) - return; - DCHECK_GE((int)typ, 0); - DCHECK_LE((int)typ, 7); - DCHECK_EQ(GetLsb(addr, kEventPCBits), addr); - u64 pos = fs.GetTracePos(); - if (UNLIKELY((pos % kTracePartSize) == 0)) { -#if !SANITIZER_GO - HACKY_CALL(__tsan_trace_switch); -#else - TraceSwitch(thr); -#endif - } - Event *trace = (Event*)GetThreadTrace(fs.tid()); - Event *evp = &trace[pos]; - Event ev = (u64)addr | ((u64)typ << kEventPCBits); - *evp = ev; -} +void IncrementEpoch(ThreadState *thr); #if !SANITIZER_GO uptr ALWAYS_INLINE HeapEnd() { @@ -639,6 +567,13 @@ uptr ALWAYS_INLINE HeapEnd() { } #endif +void SlotAttachAndLock(ThreadState *thr) SANITIZER_ACQUIRE(thr->slot->mtx); +void SlotDetach(ThreadState *thr); +void SlotLock(ThreadState *thr) SANITIZER_ACQUIRE(thr->slot->mtx); +void SlotUnlock(ThreadState *thr) SANITIZER_RELEASE(thr->slot->mtx); +void DoReset(ThreadState *thr, uptr epoch); +void FlushShadowMemory(); + ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags); void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber); void FiberSwitch(ThreadState *thr, uptr pc, ThreadState *fiber, unsigned flags); @@ -649,6 +584,43 @@ enum FiberSwitchFlags { FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync }; +class SlotLocker { + public: + ALWAYS_INLINE + SlotLocker(ThreadState *thr, bool recursive = false) + : thr_(thr), locked_(recursive ? thr->slot_locked : false) { + if (!locked_) + SlotLock(thr_); + } + + ALWAYS_INLINE + ~SlotLocker() { + if (!locked_) + SlotUnlock(thr_); + } + + private: + ThreadState *thr_; + bool locked_; +}; + +class SlotUnlocker { + public: + SlotUnlocker(ThreadState *thr) : thr_(thr), locked_(thr->slot_locked) { + if (locked_) + SlotUnlock(thr_); + } + + ~SlotUnlocker() { + if (locked_) + SlotLock(thr_); + } + + private: + ThreadState *thr_; + bool locked_; +}; + ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) { if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals))) ProcessPendingSignalsImpl(thr); @@ -667,16 +639,19 @@ void LazyInitialize(ThreadState *thr) { #endif } -namespace v3 { - +void TraceResetForTesting(); void TraceSwitchPart(ThreadState *thr); -bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, - uptr size, AccessType typ, VarSizeStackTrace *pstk, +void TraceSwitchPartImpl(ThreadState *thr); +bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, + AccessType typ, Tid *ptid, VarSizeStackTrace *pstk, MutexSet *pmset, uptr *ptag); template <typename EventT> ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr, EventT **ev) { + // TraceSwitchPart accesses shadow_stack, but it's called infrequently, + // so we check it here proactively. + DCHECK(thr->shadow_stack); Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos)); #if SANITIZER_DEBUG // TraceSwitch acquires these mutexes, @@ -747,20 +722,16 @@ void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr, void TraceMutexUnlock(ThreadState *thr, uptr addr); void TraceTime(ThreadState *thr); -} // namespace v3 +void TraceRestartFuncExit(ThreadState *thr); +void TraceRestartFuncEntry(ThreadState *thr, uptr pc); void GrowShadowStack(ThreadState *thr); ALWAYS_INLINE void FuncEntry(ThreadState *thr, uptr pc) { - DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc); - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); - } - - // Shadow stack maintenance can be replaced with - // stack unwinding during trace switch (which presumably must be faster). + DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.sid(), (void *)pc); + if (UNLIKELY(!TryTraceFunc(thr, pc))) + return TraceRestartFuncEntry(thr, pc); DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -774,12 +745,9 @@ void FuncEntry(ThreadState *thr, uptr pc) { ALWAYS_INLINE void FuncExit(ThreadState *thr) { - DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); - } - + DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.sid()); + if (UNLIKELY(!TryTraceFunc(thr, 0))) + return TraceRestartFuncExit(thr); DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -791,7 +759,6 @@ void FuncExit(ThreadState *thr) { extern void (*on_initialize)(void); extern int (*on_finalize)(int); #endif - } // namespace __tsan #endif // TSAN_RTL_H diff --git a/libsanitizer/tsan/tsan_rtl_access.cpp b/libsanitizer/tsan/tsan_rtl_access.cpp index 7365fda..7d771bfa 100644 --- a/libsanitizer/tsan/tsan_rtl_access.cpp +++ b/libsanitizer/tsan/tsan_rtl_access.cpp @@ -15,15 +15,13 @@ namespace __tsan { -namespace v3 { - -ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc, +ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, uptr addr, uptr size, AccessType typ) { DCHECK(size == 1 || size == 2 || size == 4 || size == 8); if (!kCollectHistory) return true; - EventAccess *ev; + EventAccess* ev; if (UNLIKELY(!TraceAcquire(thr, &ev))) return false; u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3; @@ -40,25 +38,27 @@ ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc, TraceRelease(thr, ev); return true; } - auto *evex = reinterpret_cast<EventAccessExt *>(ev); + auto* evex = reinterpret_cast<EventAccessExt*>(ev); evex->is_access = 0; evex->is_func = 0; evex->type = EventType::kAccessExt; evex->is_read = !!(typ & kAccessRead); evex->is_atomic = !!(typ & kAccessAtomic); evex->size_log = size_log; + // Note: this is important, see comment in EventAccessExt. + evex->_ = 0; evex->addr = CompressAddr(addr); evex->pc = pc; TraceRelease(thr, evex); return true; } -ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc, - uptr addr, uptr size, - AccessType typ) { +ALWAYS_INLINE +bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, + AccessType typ) { if (!kCollectHistory) return true; - EventAccessRange *ev; + EventAccessRange* ev; if (UNLIKELY(!TraceAcquire(thr, &ev))) return false; thr->trace_prev_pc = pc; @@ -75,7 +75,7 @@ ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc, return true; } -void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, +void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, AccessType typ) { if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ))) return; @@ -84,7 +84,7 @@ void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, DCHECK(res); } -void TraceFunc(ThreadState *thr, uptr pc) { +void TraceFunc(ThreadState* thr, uptr pc) { if (LIKELY(TryTraceFunc(thr, pc))) return; TraceSwitchPart(thr); @@ -92,7 +92,17 @@ void TraceFunc(ThreadState *thr, uptr pc) { DCHECK(res); } -void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr, +NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) { + TraceSwitchPart(thr); + FuncEntry(thr, pc); +} + +NOINLINE void TraceRestartFuncExit(ThreadState* thr) { + TraceSwitchPart(thr); + FuncExit(thr); +} + +void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, StackID stk) { DCHECK(type == EventType::kLock || type == EventType::kRLock); if (!kCollectHistory) @@ -109,7 +119,7 @@ void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr, TraceEvent(thr, ev); } -void TraceMutexUnlock(ThreadState *thr, uptr addr) { +void TraceMutexUnlock(ThreadState* thr, uptr addr) { if (!kCollectHistory) return; EventUnlock ev; @@ -121,396 +131,523 @@ void TraceMutexUnlock(ThreadState *thr, uptr addr) { TraceEvent(thr, ev); } -void TraceTime(ThreadState *thr) { +void TraceTime(ThreadState* thr) { if (!kCollectHistory) return; + FastState fast_state = thr->fast_state; EventTime ev; ev.is_access = 0; ev.is_func = 0; ev.type = EventType::kTime; - ev.sid = static_cast<u64>(thr->sid); - ev.epoch = static_cast<u64>(thr->epoch); + ev.sid = static_cast<u64>(fast_state.sid()); + ev.epoch = static_cast<u64>(fast_state.epoch()); ev._ = 0; TraceEvent(thr, ev); } -} // namespace v3 +ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) { + return static_cast<RawShadow>( + atomic_load((atomic_uint32_t*)p, memory_order_relaxed)); +} -ALWAYS_INLINE -Shadow LoadShadow(u64 *p) { - u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed); - return Shadow(raw); +ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) { + atomic_store((atomic_uint32_t*)sp, static_cast<u32>(s), memory_order_relaxed); } -ALWAYS_INLINE -void StoreShadow(u64 *sp, u64 s) { - atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed); +NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + Shadow old, + AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { + // For the free shadow markers the first element (that contains kFreeSid) + // triggers the race, but the second element contains info about the freeing + // thread, take it. + if (old.sid() == kFreeSid) + old = Shadow(LoadShadow(&shadow_mem[1])); + // This prevents trapping on this address in future. + for (uptr i = 0; i < kShadowCnt; i++) + StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty); + // See the comment in MemoryRangeFreed as to why the slot is locked + // for free memory accesses. ReportRace must not be called with + // the slot locked because of the fork. But MemoryRangeFreed is not + // called during fork because fork sets ignore_reads_and_writes, + // so simply unlocking the slot should be fine. + if (typ & kAccessSlotLocked) + SlotUnlock(thr); + ReportRace(thr, shadow_mem, cur, Shadow(old), typ); + if (typ & kAccessSlotLocked) + SlotLock(thr); } +#if !TSAN_VECTORIZE ALWAYS_INLINE -void StoreIfNotYetStored(u64 *sp, u64 *s) { - StoreShadow(sp, *s); - *s = 0; +bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1, + AccessType typ) { + for (uptr i = 0; i < kShadowCnt; i++) { + auto old = LoadShadow(&s[i]); + if (!(typ & kAccessRead)) { + if (old == cur.raw()) + return true; + continue; + } + auto masked = static_cast<RawShadow>(static_cast<u32>(old) | + static_cast<u32>(Shadow::kRodata)); + if (masked == cur.raw()) + return true; + if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { + if (old == Shadow::kRodata) + return true; + } + } + return false; } -extern "C" void __tsan_report_race(); - ALWAYS_INLINE -void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) { - thr->racy_state[0] = cur.raw(); - thr->racy_state[1] = old.raw(); - thr->racy_shadow_addr = shadow_mem; -#if !SANITIZER_GO - HACKY_CALL(__tsan_report_race); -#else - ReportRace(thr); -#endif +bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + int unused0, int unused1, AccessType typ) { + bool stored = false; + for (uptr idx = 0; idx < kShadowCnt; idx++) { + RawShadow* sp = &shadow_mem[idx]; + Shadow old(LoadShadow(sp)); + if (LIKELY(old.raw() == Shadow::kEmpty)) { + if (!(typ & kAccessCheckOnly) && !stored) + StoreShadow(sp, cur.raw()); + return false; + } + if (LIKELY(!(cur.access() & old.access()))) + continue; + if (LIKELY(cur.sid() == old.sid())) { + if (!(typ & kAccessCheckOnly) && + LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) { + StoreShadow(sp, cur.raw()); + stored = true; + } + continue; + } + if (LIKELY(old.IsBothReadsOrAtomic(typ))) + continue; + if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch())) + continue; + DoReportRace(thr, shadow_mem, cur, old, typ); + return true; + } + // We did not find any races and had already stored + // the current access info, so we are done. + if (LIKELY(stored)) + return false; + // Choose a random candidate slot and replace it. + uptr index = + atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt; + StoreShadow(&shadow_mem[index], cur.raw()); + return false; } -static inline bool HappensBefore(Shadow old, ThreadState *thr) { - return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); -} +# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0 -ALWAYS_INLINE -void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog, - bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem, - Shadow cur) { - // This potentially can live in an MMX/SSE scratch register. - // The required intrinsics are: - // __m128i _mm_move_epi64(__m128i*); - // _mm_storel_epi64(u64*, __m128i); - u64 store_word = cur.raw(); - bool stored = false; +#else /* !TSAN_VECTORIZE */ - // scan all the shadow values and dispatch to 4 categories: - // same, replace, candidate and race (see comments below). - // we consider only 3 cases regarding access sizes: - // equal, intersect and not intersect. initially I considered - // larger and smaller as well, it allowed to replace some - // 'candidates' with 'same' or 'replace', but I think - // it's just not worth it (performance- and complexity-wise). - - Shadow old(0); - - // It release mode we manually unroll the loop, - // because empirically gcc generates better code this way. - // However, we can't afford unrolling in debug mode, because the function - // consumes almost 4K of stack. Gtest gives only 4K of stack to death test - // threads, which is not enough for the unrolled loop. -#if SANITIZER_DEBUG - for (int idx = 0; idx < 4; idx++) { -# include "tsan_update_shadow_word.inc" - } -#else - int idx = 0; -# include "tsan_update_shadow_word.inc" - idx = 1; - if (stored) { -# include "tsan_update_shadow_word.inc" - } else { -# include "tsan_update_shadow_word.inc" - } - idx = 2; - if (stored) { -# include "tsan_update_shadow_word.inc" - } else { -# include "tsan_update_shadow_word.inc" +ALWAYS_INLINE +bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow, + m128 access, AccessType typ) { + // Note: we could check if there is a larger access of the same type, + // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes) + // and now do smaller reads/writes, these can also be considered as "same + // access". However, it will make the check more expensive, so it's unclear + // if it's worth it. But this would conserve trace space, so it's useful + // besides potential speed up. + if (!(typ & kAccessRead)) { + const m128 same = _mm_cmpeq_epi32(shadow, access); + return _mm_movemask_epi8(same); } - idx = 3; - if (stored) { -# include "tsan_update_shadow_word.inc" - } else { -# include "tsan_update_shadow_word.inc" + // For reads we need to reset read bit in the shadow, + // because we need to match read with both reads and writes. + // Shadow::kRodata has only read bit set, so it does what we want. + // We also abuse it for rodata check to save few cycles + // since we already loaded Shadow::kRodata into a register. + // Reads from rodata can't race. + // Measurements show that they can be 10-20% of all memory accesses. + // Shadow::kRodata has epoch 0 which cannot appear in shadow normally + // (thread epochs start from 1). So the same read bit mask + // serves as rodata indicator. + const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata)); + const m128 masked_shadow = _mm_or_si128(shadow, read_mask); + m128 same = _mm_cmpeq_epi32(masked_shadow, access); + // Range memory accesses check Shadow::kRodata before calling this, + // Shadow::kRodatas is not possible for free memory access + // and Go does not use Shadow::kRodata. + if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { + const m128 ro = _mm_cmpeq_epi32(shadow, read_mask); + same = _mm_or_si128(ro, same); } -#endif - - // we did not find any races and had already stored - // the current access info, so we are done - if (LIKELY(stored)) - return; - // choose a random candidate slot and replace it - StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); - return; -RACE: - HandleRace(thr, shadow_mem, cur, old); - return; + return _mm_movemask_epi8(same); } -void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, - AccessType typ) { - DCHECK(!(typ & kAccessAtomic)); - const bool kAccessIsWrite = !(typ & kAccessRead); - const bool kIsAtomic = false; - while (size) { - int size1 = 1; - int kAccessSizeLog = kSizeLog1; - if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) { - size1 = 8; - kAccessSizeLog = kSizeLog8; - } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) { - size1 = 4; - kAccessSizeLog = kSizeLog4; - } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) { - size1 = 2; - kAccessSizeLog = kSizeLog2; - } - MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); - addr += size1; - size -= size1; +NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + u32 race_mask, m128 shadow, AccessType typ) { + // race_mask points which of the shadow elements raced with the current + // access. Extract that element. + CHECK_NE(race_mask, 0); + u32 old; + // Note: _mm_extract_epi32 index must be a constant value. + switch (__builtin_ffs(race_mask) / 4) { + case 0: + old = _mm_extract_epi32(shadow, 0); + break; + case 1: + old = _mm_extract_epi32(shadow, 1); + break; + case 2: + old = _mm_extract_epi32(shadow, 2); + break; + case 3: + old = _mm_extract_epi32(shadow, 3); + break; } + Shadow prev(static_cast<RawShadow>(old)); + // For the free shadow markers the first element (that contains kFreeSid) + // triggers the race, but the second element contains info about the freeing + // thread, take it. + if (prev.sid() == kFreeSid) + prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1))); + DoReportRace(thr, shadow_mem, cur, prev, typ); } ALWAYS_INLINE -bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { - Shadow cur(a); - for (uptr i = 0; i < kShadowCnt; i++) { - Shadow old(LoadShadow(&s[i])); - if (Shadow::Addr0AndSizeAreEqual(cur, old) && - old.TidWithIgnore() == cur.TidWithIgnore() && - old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() && - old.IsRead() <= cur.IsRead()) - return true; +bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + m128 shadow, m128 access, AccessType typ) { + // Note: empty/zero slots don't intersect with any access. + const m128 zero = _mm_setzero_si128(); + const m128 mask_access = _mm_set1_epi32(0x000000ff); + const m128 mask_sid = _mm_set1_epi32(0x0000ff00); + const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000); + const m128 access_and = _mm_and_si128(access, shadow); + const m128 access_xor = _mm_xor_si128(access, shadow); + const m128 intersect = _mm_and_si128(access_and, mask_access); + const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero); + const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid); + const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero); + const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic); + const m128 no_race = + _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic); + const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero)); + if (UNLIKELY(race_mask)) + goto SHARED; + +STORE : { + if (typ & kAccessCheckOnly) + return false; + // We could also replace different sid's if access is the same, + // rw weaker and happens before. However, just checking access below + // is not enough because we also need to check that !both_read_or_atomic + // (reads from different sids can be concurrent). + // Theoretically we could replace smaller accesses with larger accesses, + // but it's unclear if it's worth doing. + const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff); + const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid); + const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero); + const m128 access_read_atomic = + _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30); + const m128 rw_weaker = + _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow); + const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker); + const int rewrite_mask = _mm_movemask_epi8(rewrite); + int index = __builtin_ffs(rewrite_mask); + if (UNLIKELY(index == 0)) { + const m128 empty = _mm_cmpeq_epi32(shadow, zero); + const int empty_mask = _mm_movemask_epi8(empty); + index = __builtin_ffs(empty_mask); + if (UNLIKELY(index == 0)) + index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16; } + StoreShadow(&shadow_mem[index / 4], cur.raw()); + // We could zero other slots determined by rewrite_mask. + // That would help other threads to evict better slots, + // but it's unclear if it's worth it. return false; } -#if TSAN_VECTORIZE -# define SHUF(v0, v1, i0, i1, i2, i3) \ - _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \ - _mm_castsi128_ps(v1), \ - (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) -ALWAYS_INLINE -bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { - // This is an optimized version of ContainsSameAccessSlow. - // load current access into access[0:63] - const m128 access = _mm_cvtsi64_si128(a); - // duplicate high part of access in addr0: - // addr0[0:31] = access[32:63] - // addr0[32:63] = access[32:63] - // addr0[64:95] = access[32:63] - // addr0[96:127] = access[32:63] - const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); - // load 4 shadow slots - const m128 shadow0 = _mm_load_si128((__m128i *)s); - const m128 shadow1 = _mm_load_si128((__m128i *)s + 1); - // load high parts of 4 shadow slots into addr_vect: - // addr_vect[0:31] = shadow0[32:63] - // addr_vect[32:63] = shadow0[96:127] - // addr_vect[64:95] = shadow1[32:63] - // addr_vect[96:127] = shadow1[96:127] - m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); - if (!is_write) { - // set IsRead bit in addr_vect - const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15); - const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); - addr_vect = _mm_or_si128(addr_vect, rw_mask); - } - // addr0 == addr_vect? - const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); - // epoch1[0:63] = sync_epoch - const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); - // epoch[0:31] = sync_epoch[0:31] - // epoch[32:63] = sync_epoch[0:31] - // epoch[64:95] = sync_epoch[0:31] - // epoch[96:127] = sync_epoch[0:31] - const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); - // load low parts of shadow cell epochs into epoch_vect: - // epoch_vect[0:31] = shadow0[0:31] - // epoch_vect[32:63] = shadow0[64:95] - // epoch_vect[64:95] = shadow1[0:31] - // epoch_vect[96:127] = shadow1[64:95] - const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); - // epoch_vect >= sync_epoch? - const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); - // addr_res & epoch_res - const m128 res = _mm_and_si128(addr_res, epoch_res); - // mask[0] = res[7] - // mask[1] = res[15] - // ... - // mask[15] = res[127] - const int mask = _mm_movemask_epi8(res); - return mask != 0; +SHARED: + m128 thread_epochs = _mm_set1_epi32(0x7fffffff); + // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32 + // indexes must be constants. +# define LOAD_EPOCH(idx) \ + if (LIKELY(race_mask & (1 << (idx * 4)))) { \ + u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \ + u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \ + thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \ + } + LOAD_EPOCH(0); + LOAD_EPOCH(1); + LOAD_EPOCH(2); + LOAD_EPOCH(3); +# undef LOAD_EPOCH + const m128 mask_epoch = _mm_set1_epi32(0x3fff0000); + const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch); + const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs); + const int concurrent_mask = _mm_movemask_epi8(concurrent); + if (LIKELY(concurrent_mask == 0)) + goto STORE; + + DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ); + return true; } -#endif -ALWAYS_INLINE -bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { -#if TSAN_VECTORIZE - bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); - // NOTE: this check can fail if the shadow is concurrently mutated - // by other threads. But it still can be useful if you modify - // ContainsSameAccessFast and want to ensure that it's not completely broken. - // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); - return res; -#else - return ContainsSameAccessSlow(s, a, sync_epoch, is_write); +# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \ + const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \ + const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem)) #endif -} -ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, - bool kIsAtomic) { - RawShadow *shadow_mem = MemToShadow(addr); - DPrintf2( - "#%d: MemoryAccess: @%p %p size=%d" - " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", - (int)thr->fast_state.tid(), (void *)pc, (void *)addr, - (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, - (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2], - (uptr)shadow_mem[3]); -#if SANITIZER_DEBUG - if (!IsAppMem(addr)) { - Printf("Access to non app mem %zx\n", addr); - DCHECK(IsAppMem(addr)); +char* DumpShadow(char* buf, RawShadow raw) { + if (raw == Shadow::kEmpty) { + internal_snprintf(buf, 64, "0"); + return buf; } - if (!IsShadowMem(shadow_mem)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); - DCHECK(IsShadowMem(shadow_mem)); - } -#endif + Shadow s(raw); + AccessType typ; + s.GetAccess(nullptr, nullptr, &typ); + internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}", + static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()), + s.access(), static_cast<u32>(typ)); + return buf; +} - if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) { - // Access to .rodata section, no races here. - // Measurements show that it can be 10-20% of all memory accesses. - return; - } +// TryTrace* and TraceRestart* functions allow to turn memory access and func +// entry/exit callbacks into leaf functions with all associated performance +// benefits. These hottest callbacks do only 2 slow path calls: report a race +// and trace part switching. Race reporting is easy to turn into a tail call, we +// just always return from the runtime after reporting a race. But trace part +// switching is harder because it needs to be in the middle of callbacks. To +// turn it into a tail call we immidiately return after TraceRestart* functions, +// but TraceRestart* functions themselves recurse into the callback after +// switching trace part. As the result the hottest callbacks contain only tail +// calls, which effectively makes them leaf functions (can use all registers, +// no frame setup, etc). +NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr, + uptr size, AccessType typ) { + TraceSwitchPart(thr); + MemoryAccess(thr, pc, addr, size, typ); +} + +ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr, + uptr size, AccessType typ) { + RawShadow* shadow_mem = MemToShadow(addr); + UNUSED char memBuf[4][64]; + DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid, + static_cast<int>(thr->fast_state.sid()), + static_cast<int>(thr->fast_state.epoch()), (void*)addr, size, + static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]), + DumpShadow(memBuf[1], shadow_mem[1]), + DumpShadow(memBuf[2], shadow_mem[2]), + DumpShadow(memBuf[3], shadow_mem[3])); FastState fast_state = thr->fast_state; - if (UNLIKELY(fast_state.GetIgnoreBit())) { + Shadow cur(fast_state, addr, size, typ); + + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) return; - } + if (UNLIKELY(fast_state.GetIgnoreBit())) + return; + if (!TryTraceMemoryAccess(thr, pc, addr, size, typ)) + return TraceRestartMemoryAccess(thr, pc, addr, size, typ); + CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} - Shadow cur(fast_state); - cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); - cur.SetWrite(kAccessIsWrite); - cur.SetAtomic(kIsAtomic); +void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ); - if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch, - kAccessIsWrite))) { - return; - } +NOINLINE +void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr, + AccessType typ) { + TraceSwitchPart(thr); + MemoryAccess16(thr, pc, addr, typ); +} - if (kCollectHistory) { - fast_state.IncrementEpoch(); - thr->fast_state = fast_state; - TraceAddEvent(thr, fast_state, EventTypeMop, pc); - cur.IncrementEpoch(); +ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, + AccessType typ) { + const uptr size = 16; + FastState fast_state = thr->fast_state; + if (UNLIKELY(fast_state.GetIgnoreBit())) + return; + Shadow cur(fast_state, 0, 8, typ); + RawShadow* shadow_mem = MemToShadow(addr); + bool traced = false; + { + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + goto SECOND; + if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartMemoryAccess16(thr, pc, addr, typ); + traced = true; + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) + return; } +SECOND: + shadow_mem += kShadowCnt; + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + return; + if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartMemoryAccess16(thr, pc, addr, typ); + CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} - MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, - shadow_mem, cur); +NOINLINE +void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr, + uptr size, AccessType typ) { + TraceSwitchPart(thr); + UnalignedMemoryAccess(thr, pc, addr, size, typ); } -// Called by MemoryAccessRange in tsan_rtl_thread.cpp -ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr, - int kAccessSizeLog, - bool kAccessIsWrite, bool kIsAtomic, - u64 *shadow_mem, Shadow cur) { - if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch, - kAccessIsWrite))) { +ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc, + uptr addr, uptr size, + AccessType typ) { + DCHECK_LE(size, 8); + FastState fast_state = thr->fast_state; + if (UNLIKELY(fast_state.GetIgnoreBit())) return; + RawShadow* shadow_mem = MemToShadow(addr); + bool traced = false; + uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr); + { + Shadow cur(fast_state, addr, size1, typ); + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + goto SECOND; + if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); + traced = true; + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) + return; } +SECOND: + uptr size2 = size - size1; + if (LIKELY(size2 == 0)) + return; + shadow_mem += kShadowCnt; + Shadow cur(fast_state, 0, size2, typ); + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + return; + if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); + CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} - MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, - shadow_mem, cur); +void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) { + DCHECK_LE(p, end); + DCHECK(IsShadowMem(p)); + DCHECK(IsShadowMem(end)); + UNUSED const uptr kAlign = kShadowCnt * kShadowSize; + DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0); + DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0); +#if !TSAN_VECTORIZE + for (; p < end; p += kShadowCnt) { + p[0] = v; + for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty; + } +#else + m128 vv = _mm_setr_epi32( + static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty), + static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty)); + m128* vp = reinterpret_cast<m128*>(p); + m128* vend = reinterpret_cast<m128*>(end); + for (; vp < vend; vp++) _mm_store_si128(vp, vv); +#endif } -static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, - u64 val) { - (void)thr; - (void)pc; +static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) { if (size == 0) return; - // FIXME: fix me. - uptr offset = addr % kShadowCell; - if (offset) { - offset = kShadowCell - offset; - if (size <= offset) - return; - addr += offset; - size -= offset; - } - DCHECK_EQ(addr % 8, 0); + DCHECK_EQ(addr % kShadowCell, 0); + DCHECK_EQ(size % kShadowCell, 0); // If a user passes some insane arguments (memset(0)), // let it just crash as usual. if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) return; + RawShadow* begin = MemToShadow(addr); + RawShadow* end = begin + size / kShadowCell * kShadowCnt; // Don't want to touch lots of shadow memory. // If a program maps 10MB stack, there is no need reset the whole range. - size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); // UnmapOrDie/MmapFixedNoReserve does not work on Windows. - if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) { - RawShadow *p = MemToShadow(addr); - CHECK(IsShadowMem(p)); - CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1)); - // FIXME: may overwrite a part outside the region - for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { - p[i++] = val; - for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0; - } - } else { - // The region is big, reset only beginning and end. - const uptr kPageSize = GetPageSizeCached(); - RawShadow *begin = MemToShadow(addr); - RawShadow *end = begin + size / kShadowCell * kShadowCnt; - RawShadow *p = begin; - // Set at least first kPageSize/2 to page boundary. - while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { - *p++ = val; - for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0; - } - // Reset middle part. - RawShadow *p1 = p; - p = RoundDown(end, kPageSize); - if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1)) + if (SANITIZER_WINDOWS || + size <= common_flags()->clear_shadow_mmap_threshold) { + ShadowSet(begin, end, val); + return; + } + // The region is big, reset only beginning and end. + const uptr kPageSize = GetPageSizeCached(); + // Set at least first kPageSize/2 to page boundary. + RawShadow* mid1 = + Min(end, reinterpret_cast<RawShadow*>(RoundUp( + reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize))); + ShadowSet(begin, mid1, val); + // Reset middle part. + RawShadow* mid2 = RoundDown(end, kPageSize); + if (mid2 > mid1) { + if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1)) Die(); - // Set the ending. - while (p < end) { - *p++ = val; - for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0; - } } + // Set the ending. + ShadowSet(mid2, end, val); } -void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { - MemoryRangeSet(thr, pc, addr, size, 0); +void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { + uptr addr1 = RoundDown(addr, kShadowCell); + uptr size1 = RoundUp(size + addr - addr1, kShadowCell); + MemoryRangeSet(addr1, size1, Shadow::kEmpty); } -void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { - // Processing more than 1k (4k of shadow) is expensive, +void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) { + // Callers must lock the slot to ensure synchronization with the reset. + // The problem with "freed" memory is that it's not "monotonic" + // with respect to bug detection: freed memory is bad to access, + // but then if the heap block is reallocated later, it's good to access. + // As the result a garbage "freed" shadow can lead to a false positive + // if it happens to match a real free in the thread trace, + // but the heap block was reallocated before the current memory access, + // so it's still good to access. It's not the case with data races. + DCHECK(thr->slot_locked); + DCHECK_EQ(addr % kShadowCell, 0); + size = RoundUp(size, kShadowCell); + // Processing more than 1k (2k of shadow) is expensive, // can cause excessive memory consumption (user does not necessary touch // the whole range) and most likely unnecessary. - if (size > 1024) - size = 1024; - CHECK_EQ(thr->is_freeing, false); - thr->is_freeing = true; - MemoryAccessRange(thr, pc, addr, size, true); - thr->is_freeing = false; - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); + size = Min<uptr>(size, 1024); + const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked | + kAccessCheckOnly | kAccessNoRodata; + TraceMemoryAccessRange(thr, pc, addr, size, typ); + RawShadow* shadow_mem = MemToShadow(addr); + Shadow cur(thr->fast_state, 0, kShadowCell, typ); +#if TSAN_VECTORIZE + const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw())); + const m128 freed = _mm_setr_epi32( + static_cast<u32>(Shadow::FreedMarker()), + static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0); + for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { + const m128 shadow = _mm_load_si128((m128*)shadow_mem); + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) + return; + _mm_store_si128((m128*)shadow_mem, freed); } - Shadow s(thr->fast_state); - s.ClearIgnoreBit(); - s.MarkAsFreed(); - s.SetWrite(true); - s.SetAddr0AndSizeLog(0, 3); - MemoryRangeSet(thr, pc, addr, size, s.raw()); -} - -void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); +#else + for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ))) + return; + StoreShadow(&shadow_mem[0], Shadow::FreedMarker()); + StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch())); + StoreShadow(&shadow_mem[2], Shadow::kEmpty); + StoreShadow(&shadow_mem[3], Shadow::kEmpty); } - Shadow s(thr->fast_state); - s.ClearIgnoreBit(); - s.SetWrite(true); - s.SetAddr0AndSizeLog(0, 3); - MemoryRangeSet(thr, pc, addr, size, s.raw()); +#endif +} + +void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) { + DCHECK_EQ(addr % kShadowCell, 0); + size = RoundUp(size, kShadowCell); + TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite); + Shadow cur(thr->fast_state, 0, 8, kAccessWrite); + MemoryRangeSet(addr, size, cur.raw()); } -void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, +void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { if (thr->ignore_reads_and_writes == 0) MemoryRangeImitateWrite(thr, pc, addr, size); @@ -518,14 +655,29 @@ void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, MemoryResetRange(thr, pc, addr, size); } -void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, - bool is_write) { - if (size == 0) - return; +ALWAYS_INLINE +bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + AccessType typ) { + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + return false; + return CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} + +template <bool is_read> +NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, + uptr size) { + TraceSwitchPart(thr); + MemoryAccessRangeT<is_read>(thr, pc, addr, size); +} - RawShadow *shadow_mem = MemToShadow(addr); - DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid, - (void *)pc, (void *)addr, (int)size, is_write); +template <bool is_read> +void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { + const AccessType typ = + (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata; + RawShadow* shadow_mem = MemToShadow(addr); + DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid, + (void*)pc, (void*)addr, (int)size, is_read); #if SANITIZER_DEBUG if (!IsAppMem(addr)) { @@ -537,65 +689,62 @@ void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, DCHECK(IsAppMem(addr + size - 1)); } if (!IsShadowMem(shadow_mem)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); + Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr); DCHECK(IsShadowMem(shadow_mem)); } - if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1, + if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) { + Printf("Bad shadow addr %p (%zx)\n", + static_cast<void*>(shadow_mem + size * kShadowCnt - 1), addr + size - 1); - DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)); + DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1)); } #endif - if (*shadow_mem == kShadowRodata) { - DCHECK(!is_write); - // Access to .rodata section, no races here. - // Measurements show that it can be 10-20% of all memory accesses. + // Access to .rodata section, no races here. + // Measurements show that it can be 10-20% of all memory accesses. + // Check here once to not check for every access separately. + // Note: we could (and should) do this only for the is_read case + // (writes shouldn't go to .rodata). But it happens in Chromium tests: + // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19 + // Details are unknown since it happens only on CI machines. + if (*shadow_mem == Shadow::kRodata) return; - } FastState fast_state = thr->fast_state; - if (fast_state.GetIgnoreBit()) + if (UNLIKELY(fast_state.GetIgnoreBit())) return; - fast_state.IncrementEpoch(); - thr->fast_state = fast_state; - TraceAddEvent(thr, fast_state, EventTypeMop, pc); + if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartMemoryAccessRange<is_read>(thr, pc, addr, size); - bool unaligned = (addr % kShadowCell) != 0; - - // Handle unaligned beginning, if any. - for (; addr % kShadowCell && size; addr++, size--) { - int const kAccessSizeLog = 0; - Shadow cur(fast_state); - cur.SetWrite(is_write); - cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog); - MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, - cur); - } - if (unaligned) + if (UNLIKELY(addr % kShadowCell)) { + // Handle unaligned beginning, if any. + uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr); + size -= size1; + Shadow cur(fast_state, addr, size1, typ); + if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) + return; shadow_mem += kShadowCnt; + } // Handle middle part, if any. - for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) { - int const kAccessSizeLog = 3; - Shadow cur(fast_state); - cur.SetWrite(is_write); - cur.SetAddr0AndSizeLog(0, kAccessSizeLog); - MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, - cur); - shadow_mem += kShadowCnt; + Shadow cur(fast_state, 0, kShadowCell, typ); + for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) { + if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) + return; } // Handle ending, if any. - for (; size; addr++, size--) { - int const kAccessSizeLog = 0; - Shadow cur(fast_state); - cur.SetWrite(is_write); - cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog); - MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, - cur); + if (UNLIKELY(size)) { + Shadow cur(fast_state, 0, size, typ); + if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) + return; } } +template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr, + uptr size); +template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr, + uptr size); + } // namespace __tsan #if !SANITIZER_GO diff --git a/libsanitizer/tsan/tsan_rtl_amd64.S b/libsanitizer/tsan/tsan_rtl_amd64.S index c15b01e..f848be9 100644 --- a/libsanitizer/tsan/tsan_rtl_amd64.S +++ b/libsanitizer/tsan/tsan_rtl_amd64.S @@ -9,242 +9,6 @@ .section __TEXT,__text #endif -ASM_HIDDEN(__tsan_trace_switch) -.globl ASM_SYMBOL(__tsan_trace_switch_thunk) -ASM_SYMBOL(__tsan_trace_switch_thunk): - CFI_STARTPROC - _CET_ENDBR - # Save scratch registers. - push %rax - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rax, 0) - push %rcx - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rcx, 0) - push %rdx - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rdx, 0) - push %rsi - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rsi, 0) - push %rdi - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rdi, 0) - push %r8 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r8, 0) - push %r9 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r9, 0) - push %r10 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r10, 0) - push %r11 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r11, 0) - # All XMM registers are caller-saved. - sub $0x100, %rsp - CFI_ADJUST_CFA_OFFSET(0x100) - movdqu %xmm0, 0x0(%rsp) - movdqu %xmm1, 0x10(%rsp) - movdqu %xmm2, 0x20(%rsp) - movdqu %xmm3, 0x30(%rsp) - movdqu %xmm4, 0x40(%rsp) - movdqu %xmm5, 0x50(%rsp) - movdqu %xmm6, 0x60(%rsp) - movdqu %xmm7, 0x70(%rsp) - movdqu %xmm8, 0x80(%rsp) - movdqu %xmm9, 0x90(%rsp) - movdqu %xmm10, 0xa0(%rsp) - movdqu %xmm11, 0xb0(%rsp) - movdqu %xmm12, 0xc0(%rsp) - movdqu %xmm13, 0xd0(%rsp) - movdqu %xmm14, 0xe0(%rsp) - movdqu %xmm15, 0xf0(%rsp) - # Align stack frame. - push %rbx # non-scratch - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rbx, 0) - mov %rsp, %rbx # save current rsp - CFI_DEF_CFA_REGISTER(%rbx) - shr $4, %rsp # clear 4 lsb, align to 16 - shl $4, %rsp - - call ASM_SYMBOL(__tsan_trace_switch) - - # Unalign stack frame back. - mov %rbx, %rsp # restore the original rsp - CFI_DEF_CFA_REGISTER(%rsp) - pop %rbx - CFI_ADJUST_CFA_OFFSET(-8) - # Restore scratch registers. - movdqu 0x0(%rsp), %xmm0 - movdqu 0x10(%rsp), %xmm1 - movdqu 0x20(%rsp), %xmm2 - movdqu 0x30(%rsp), %xmm3 - movdqu 0x40(%rsp), %xmm4 - movdqu 0x50(%rsp), %xmm5 - movdqu 0x60(%rsp), %xmm6 - movdqu 0x70(%rsp), %xmm7 - movdqu 0x80(%rsp), %xmm8 - movdqu 0x90(%rsp), %xmm9 - movdqu 0xa0(%rsp), %xmm10 - movdqu 0xb0(%rsp), %xmm11 - movdqu 0xc0(%rsp), %xmm12 - movdqu 0xd0(%rsp), %xmm13 - movdqu 0xe0(%rsp), %xmm14 - movdqu 0xf0(%rsp), %xmm15 - add $0x100, %rsp - CFI_ADJUST_CFA_OFFSET(-0x100) - pop %r11 - CFI_ADJUST_CFA_OFFSET(-8) - pop %r10 - CFI_ADJUST_CFA_OFFSET(-8) - pop %r9 - CFI_ADJUST_CFA_OFFSET(-8) - pop %r8 - CFI_ADJUST_CFA_OFFSET(-8) - pop %rdi - CFI_ADJUST_CFA_OFFSET(-8) - pop %rsi - CFI_ADJUST_CFA_OFFSET(-8) - pop %rdx - CFI_ADJUST_CFA_OFFSET(-8) - pop %rcx - CFI_ADJUST_CFA_OFFSET(-8) - pop %rax - CFI_ADJUST_CFA_OFFSET(-8) - CFI_RESTORE(%rax) - CFI_RESTORE(%rbx) - CFI_RESTORE(%rcx) - CFI_RESTORE(%rdx) - CFI_RESTORE(%rsi) - CFI_RESTORE(%rdi) - CFI_RESTORE(%r8) - CFI_RESTORE(%r9) - CFI_RESTORE(%r10) - CFI_RESTORE(%r11) - ret - CFI_ENDPROC - -ASM_HIDDEN(__tsan_report_race) -.globl ASM_SYMBOL(__tsan_report_race_thunk) -ASM_SYMBOL(__tsan_report_race_thunk): - CFI_STARTPROC - _CET_ENDBR - # Save scratch registers. - push %rax - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rax, 0) - push %rcx - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rcx, 0) - push %rdx - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rdx, 0) - push %rsi - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rsi, 0) - push %rdi - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rdi, 0) - push %r8 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r8, 0) - push %r9 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r9, 0) - push %r10 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r10, 0) - push %r11 - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%r11, 0) - # All XMM registers are caller-saved. - sub $0x100, %rsp - CFI_ADJUST_CFA_OFFSET(0x100) - movdqu %xmm0, 0x0(%rsp) - movdqu %xmm1, 0x10(%rsp) - movdqu %xmm2, 0x20(%rsp) - movdqu %xmm3, 0x30(%rsp) - movdqu %xmm4, 0x40(%rsp) - movdqu %xmm5, 0x50(%rsp) - movdqu %xmm6, 0x60(%rsp) - movdqu %xmm7, 0x70(%rsp) - movdqu %xmm8, 0x80(%rsp) - movdqu %xmm9, 0x90(%rsp) - movdqu %xmm10, 0xa0(%rsp) - movdqu %xmm11, 0xb0(%rsp) - movdqu %xmm12, 0xc0(%rsp) - movdqu %xmm13, 0xd0(%rsp) - movdqu %xmm14, 0xe0(%rsp) - movdqu %xmm15, 0xf0(%rsp) - # Align stack frame. - push %rbx # non-scratch - CFI_ADJUST_CFA_OFFSET(8) - CFI_REL_OFFSET(%rbx, 0) - mov %rsp, %rbx # save current rsp - CFI_DEF_CFA_REGISTER(%rbx) - shr $4, %rsp # clear 4 lsb, align to 16 - shl $4, %rsp - - call ASM_SYMBOL(__tsan_report_race) - - # Unalign stack frame back. - mov %rbx, %rsp # restore the original rsp - CFI_DEF_CFA_REGISTER(%rsp) - pop %rbx - CFI_ADJUST_CFA_OFFSET(-8) - # Restore scratch registers. - movdqu 0x0(%rsp), %xmm0 - movdqu 0x10(%rsp), %xmm1 - movdqu 0x20(%rsp), %xmm2 - movdqu 0x30(%rsp), %xmm3 - movdqu 0x40(%rsp), %xmm4 - movdqu 0x50(%rsp), %xmm5 - movdqu 0x60(%rsp), %xmm6 - movdqu 0x70(%rsp), %xmm7 - movdqu 0x80(%rsp), %xmm8 - movdqu 0x90(%rsp), %xmm9 - movdqu 0xa0(%rsp), %xmm10 - movdqu 0xb0(%rsp), %xmm11 - movdqu 0xc0(%rsp), %xmm12 - movdqu 0xd0(%rsp), %xmm13 - movdqu 0xe0(%rsp), %xmm14 - movdqu 0xf0(%rsp), %xmm15 - add $0x100, %rsp - CFI_ADJUST_CFA_OFFSET(-0x100) - pop %r11 - CFI_ADJUST_CFA_OFFSET(-8) - pop %r10 - CFI_ADJUST_CFA_OFFSET(-8) - pop %r9 - CFI_ADJUST_CFA_OFFSET(-8) - pop %r8 - CFI_ADJUST_CFA_OFFSET(-8) - pop %rdi - CFI_ADJUST_CFA_OFFSET(-8) - pop %rsi - CFI_ADJUST_CFA_OFFSET(-8) - pop %rdx - CFI_ADJUST_CFA_OFFSET(-8) - pop %rcx - CFI_ADJUST_CFA_OFFSET(-8) - pop %rax - CFI_ADJUST_CFA_OFFSET(-8) - CFI_RESTORE(%rax) - CFI_RESTORE(%rbx) - CFI_RESTORE(%rcx) - CFI_RESTORE(%rdx) - CFI_RESTORE(%rsi) - CFI_RESTORE(%rdi) - CFI_RESTORE(%r8) - CFI_RESTORE(%r9) - CFI_RESTORE(%r10) - CFI_RESTORE(%r11) - ret - CFI_ENDPROC - ASM_HIDDEN(__tsan_setjmp) #if defined(__NetBSD__) .comm _ZN14__interception15real___setjmp14E,8,8 diff --git a/libsanitizer/tsan/tsan_rtl_mutex.cpp b/libsanitizer/tsan/tsan_rtl_mutex.cpp index 7d6b411..2e97885 100644 --- a/libsanitizer/tsan/tsan_rtl_mutex.cpp +++ b/libsanitizer/tsan/tsan_rtl_mutex.cpp @@ -23,6 +23,8 @@ namespace __tsan { void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r); +void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr, + FastState last_lock, StackID creation_stack_id); struct Callback final : public DDCallback { ThreadState *thr; @@ -36,17 +38,17 @@ struct Callback final : public DDCallback { } StackID Unwind() override { return CurrentStackId(thr, pc); } - int UniqueTid() override { return thr->unique_id; } + int UniqueTid() override { return thr->tid; } }; void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s) { Callback cb(thr, pc); ctx->dd->MutexInit(&cb, &s->dd); - s->dd.ctx = s->GetId(); + s->dd.ctx = s->addr; } static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, - uptr addr, u64 mid) { + uptr addr, StackID creation_stack_id) { // In Go, these misuses are either impossible, or detected by std lib, // or false positives (e.g. unlock in a different thread). if (SANITIZER_GO) @@ -55,7 +57,7 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, return; ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(typ); - rep.AddMutex(mid); + rep.AddMutex(addr, creation_stack_id); VarSizeStackTrace trace; ObtainCurrentStack(thr, pc, &trace); rep.AddStack(trace, true); @@ -63,95 +65,94 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, OutputReport(thr, rep); } +static void RecordMutexLock(ThreadState *thr, uptr pc, uptr addr, + StackID stack_id, bool write) { + auto typ = write ? EventType::kLock : EventType::kRLock; + // Note: it's important to trace before modifying mutex set + // because tracing can switch trace part and we write the current + // mutex set in the beginning of each part. + // If we do it in the opposite order, we will write already reduced + // mutex set in the beginning of the part and then trace unlock again. + TraceMutexLock(thr, typ, pc, addr, stack_id); + thr->mset.AddAddr(addr, stack_id, write); +} + +static void RecordMutexUnlock(ThreadState *thr, uptr addr) { + // See the comment in RecordMutexLock re order of operations. + TraceMutexUnlock(thr, addr); + thr->mset.DelAddr(addr); +} + void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) { - CHECK(!thr->is_freeing); - thr->is_freeing = true; + if (!(flagz & MutexFlagLinkerInit) && pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessWrite); - thr->is_freeing = false; - } - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); s->SetFlags(flagz & MutexCreationFlagMask); // Save stack in the case the sync object was created before as atomic. - if (!SANITIZER_GO && s->creation_stack_id == 0) + if (!SANITIZER_GO && s->creation_stack_id == kInvalidStackID) s->creation_stack_id = CurrentStackId(thr, pc); } void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr); bool unlock_locked = false; - u64 mid = 0; - u64 last_lock = 0; + StackID creation_stack_id; + FastState last_lock; { - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); - if (s == 0) + auto s = ctx->metamap.GetSyncIfExists(addr); + if (!s) return; - Lock l(&s->mtx); - if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) || - ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) { - // Destroy is no-op for linker-initialized mutexes. - return; - } - if (common_flags()->detect_deadlocks) { - Callback cb(thr, pc); - ctx->dd->MutexDestroy(&cb, &s->dd); - ctx->dd->MutexInit(&cb, &s->dd); - } - if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid && - !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - unlock_locked = true; - } - mid = s->GetId(); - last_lock = s->last_lock; - if (!unlock_locked) - s->Reset(thr->proc()); // must not reset it before the report is printed - } - if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) { - ThreadRegistryLock l(&ctx->thread_registry); - ScopedReport rep(ReportTypeMutexDestroyLocked); - rep.AddMutex(mid); - VarSizeStackTrace trace; - ObtainCurrentStack(thr, pc, &trace); - rep.AddStack(trace, true); - FastState last(last_lock); - RestoreStack(last.tid(), last.epoch(), &trace, 0); - rep.AddStack(trace, true); - rep.AddLocation(addr, 1); - OutputReport(thr, rep); - - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); - if (s != 0) { - Lock l(&s->mtx); - s->Reset(thr->proc()); + SlotLocker locker(thr); + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + last_lock = s->last_lock; + if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) || + ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) { + // Destroy is no-op for linker-initialized mutexes. + return; + } + if (common_flags()->detect_deadlocks) { + Callback cb(thr, pc); + ctx->dd->MutexDestroy(&cb, &s->dd); + ctx->dd->MutexInit(&cb, &s->dd); + } + if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid && + !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + unlock_locked = true; + } + s->Reset(); } + // Imitate a memory write to catch unlock-destroy races. + if (pc && IsAppMem(addr)) + MemoryAccess(thr, pc, addr, 1, + kAccessWrite | kAccessFree | kAccessSlotLocked); } - thr->mset.Remove(mid); - // Imitate a memory write to catch unlock-destroy races. - // Do this outside of sync mutex, because it can report a race which locks - // sync mutexes. - if (IsAppMem(addr)) - MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree); + if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) + ReportDestroyLocked(thr, pc, addr, last_lock, creation_stack_id); + thr->mset.DelAddr(addr, true); // s will be destroyed and freed in MetaMap::FreeBlock. } void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - { - ReadLock l(&s->mtx); - s->UpdateFlags(flagz); - if (s->owner_tid != thr->tid) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeLock(&cb, &s->dd, true); - } - } - Callback cb(thr, pc); - ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); + if (flagz & MutexFlagTryLock) + return; + if (!common_flags()->detect_deadlocks) + return; + Callback cb(thr, pc); + { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReadLock lock(&s->mtx); + s->UpdateFlags(flagz); + if (s->owner_tid != thr->tid) + ctx->dd->MutexBeforeLock(&cb, &s->dd, true); } + ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { @@ -161,48 +162,51 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { CHECK_GT(rec, 0); else rec = 1; - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + bool report_double_lock = false; bool pre_lock = false; bool first = false; - bool report_double_lock = false; + StackID creation_stack_id = kInvalidStackID; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - s->UpdateFlags(flagz); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId()); - if (s->owner_tid == kInvalidTid) { - CHECK_EQ(s->recursion, 0); - s->owner_tid = thr->tid; - s->last_lock = thr->fast_state.raw(); - } else if (s->owner_tid == thr->tid) { - CHECK_GT(s->recursion, 0); - } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_double_lock = true; - } - first = s->recursion == 0; - s->recursion += rec; - if (first) { - AcquireImpl(thr, pc, &s->clock); - AcquireImpl(thr, pc, &s->read_clock); - } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) { - } - thr->mset.Add(s->GetId(), true, thr->fast_state.epoch()); - if (first && common_flags()->detect_deadlocks) { - pre_lock = - (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock); - Callback cb(thr, pc); - if (pre_lock) - ctx->dd->MutexBeforeLock(&cb, &s->dd, true); - ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + creation_stack_id = s->creation_stack_id; + RecordMutexLock(thr, pc, addr, creation_stack_id, true); + { + Lock lock(&s->mtx); + first = s->recursion == 0; + s->UpdateFlags(flagz); + if (s->owner_tid == kInvalidTid) { + CHECK_EQ(s->recursion, 0); + s->owner_tid = thr->tid; + s->last_lock = thr->fast_state; + } else if (s->owner_tid == thr->tid) { + CHECK_GT(s->recursion, 0); + } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_double_lock = true; + } + s->recursion += rec; + if (first) { + if (!thr->ignore_sync) { + thr->clock.Acquire(s->clock); + thr->clock.Acquire(s->read_clock); + } + } + if (first && common_flags()->detect_deadlocks) { + pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) && + !(flagz & MutexFlagTryLock); + Callback cb(thr, pc); + if (pre_lock) + ctx->dd->MutexBeforeLock(&cb, &s->dd, true); + ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock); + } } - mid = s->GetId(); } if (report_double_lock) - ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, + creation_stack_id); if (first && pre_lock && common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -211,40 +215,47 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + StackID creation_stack_id; + RecordMutexUnlock(thr, addr); bool report_bad_unlock = false; int rec = 0; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId()); - if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - } else { - rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1; - s->recursion -= rec; - if (s->recursion == 0) { - s->owner_tid = kInvalidTid; - ReleaseStoreImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + bool released = false; + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } } else { + rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1; + s->recursion -= rec; + if (s->recursion == 0) { + s->owner_tid = kInvalidTid; + if (!thr->ignore_sync) { + thr->clock.ReleaseStore(&s->clock); + released = true; + } + } + } + if (common_flags()->detect_deadlocks && s->recursion == 0 && + !report_bad_unlock) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true); } } - thr->mset.Del(s->GetId(), true); - if (common_flags()->detect_deadlocks && s->recursion == 0 && - !report_bad_unlock) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true); - } - mid = s->GetId(); + if (released) + IncrementEpoch(thr); } if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, + creation_stack_id); if (common_flags()->detect_deadlocks && !report_bad_unlock) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -254,53 +265,56 @@ int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) { - { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReadLock l(&s->mtx); - s->UpdateFlags(flagz); - Callback cb(thr, pc); - ctx->dd->MutexBeforeLock(&cb, &s->dd, false); - } - Callback cb(thr, pc); - ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); + if ((flagz & MutexFlagTryLock) || !common_flags()->detect_deadlocks) + return; + Callback cb(thr, pc); + { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReadLock lock(&s->mtx); + s->UpdateFlags(flagz); + ctx->dd->MutexBeforeLock(&cb, &s->dd, false); } + ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; bool report_bad_lock = false; bool pre_lock = false; + StackID creation_stack_id = kInvalidStackID; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReadLock l(&s->mtx); - s->UpdateFlags(flagz); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId()); - if (s->owner_tid != kInvalidTid) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_lock = true; + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + creation_stack_id = s->creation_stack_id; + RecordMutexLock(thr, pc, addr, creation_stack_id, false); + { + ReadLock lock(&s->mtx); + s->UpdateFlags(flagz); + if (s->owner_tid != kInvalidTid) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_lock = true; + } + } + if (!thr->ignore_sync) + thr->clock.Acquire(s->clock); + s->last_lock = thr->fast_state; + if (common_flags()->detect_deadlocks) { + pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) && + !(flagz & MutexFlagTryLock); + Callback cb(thr, pc); + if (pre_lock) + ctx->dd->MutexBeforeLock(&cb, &s->dd, false); + ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock); } } - AcquireImpl(thr, pc, &s->clock); - s->last_lock = thr->fast_state.raw(); - thr->mset.Add(s->GetId(), false, thr->fast_state.epoch()); - if (common_flags()->detect_deadlocks) { - pre_lock = - (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock); - Callback cb(thr, pc); - if (pre_lock) - ctx->dd->MutexBeforeLock(&cb, &s->dd, false); - ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock); - } - mid = s->GetId(); } if (report_bad_lock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, + creation_stack_id); if (pre_lock && common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -309,31 +323,39 @@ void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + RecordMutexUnlock(thr, addr); + StackID creation_stack_id; bool report_bad_unlock = false; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId()); - if (s->owner_tid != kInvalidTid) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + bool released = false; + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + if (s->owner_tid != kInvalidTid) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } + } + if (!thr->ignore_sync) { + thr->clock.Release(&s->read_clock); + released = true; + } + if (common_flags()->detect_deadlocks && s->recursion == 0) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false); } } - ReleaseImpl(thr, pc, &s->read_clock); - if (common_flags()->detect_deadlocks && s->recursion == 0) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false); - } - mid = s->GetId(); + if (released) + IncrementEpoch(thr); } - thr->mset.Del(mid, false); if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, + creation_stack_id); if (common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -342,44 +364,52 @@ void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) { void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + RecordMutexUnlock(thr, addr); + StackID creation_stack_id; bool report_bad_unlock = false; + bool write = true; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - bool write = true; - if (s->owner_tid == kInvalidTid) { - // Seems to be read unlock. - write = false; - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId()); - ReleaseImpl(thr, pc, &s->read_clock); - } else if (s->owner_tid == thr->tid) { - // Seems to be write unlock. - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId()); - CHECK_GT(s->recursion, 0); - s->recursion--; - if (s->recursion == 0) { - s->owner_tid = kInvalidTid; - ReleaseStoreImpl(thr, pc, &s->clock); - } else { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + bool released = false; + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + if (s->owner_tid == kInvalidTid) { + // Seems to be read unlock. + write = false; + if (!thr->ignore_sync) { + thr->clock.Release(&s->read_clock); + released = true; + } + } else if (s->owner_tid == thr->tid) { + // Seems to be write unlock. + CHECK_GT(s->recursion, 0); + s->recursion--; + if (s->recursion == 0) { + s->owner_tid = kInvalidTid; + if (!thr->ignore_sync) { + thr->clock.ReleaseStore(&s->clock); + released = true; + } + } + } else if (!s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } + if (common_flags()->detect_deadlocks && s->recursion == 0) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write); } - } else if (!s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - thr->mset.Del(s->GetId(), write); - if (common_flags()->detect_deadlocks && s->recursion == 0) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write); } - mid = s->GetId(); + if (released) + IncrementEpoch(thr); } if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, + creation_stack_id); if (common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -388,143 +418,112 @@ void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) { void MutexRepair(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock lock(&s->mtx); s->owner_tid = kInvalidTid; s->recursion = 0; } void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexInvalidAccess %zx\n", thr->tid, addr); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, s->GetId()); + StackID creation_stack_id = kInvalidStackID; + { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + if (s) + creation_stack_id = s->creation_stack_id; + } + ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, + creation_stack_id); } void Acquire(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: Acquire %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); + auto s = ctx->metamap.GetSyncIfExists(addr); if (!s) return; - ReadLock l(&s->mtx); - AcquireImpl(thr, pc, &s->clock); -} - -static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) { - ThreadState *thr = reinterpret_cast<ThreadState*>(arg); - ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base); - u64 epoch = tctx->epoch1; - if (tctx->status == ThreadStatusRunning) { - epoch = tctx->thr->fast_state.epoch(); - tctx->thr->clock.NoteGlobalAcquire(epoch); - } - thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch); + SlotLocker locker(thr); + if (!s->clock) + return; + ReadLock lock(&s->mtx); + thr->clock.Acquire(s->clock); } void AcquireGlobal(ThreadState *thr) { DPrintf("#%d: AcquireGlobal\n", thr->tid); if (thr->ignore_sync) return; - ThreadRegistryLock l(&ctx->thread_registry); - ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateClockCallback, thr); -} - -void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) { - DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr); - if (thr->ignore_sync) - return; - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseStoreAcquireImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + for (auto &slot : ctx->slots) thr->clock.Set(slot.sid, slot.epoch()); } void Release(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: Release %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock lock(&s->mtx); + thr->clock.Release(&s->clock); + } + IncrementEpoch(thr); } void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseStoreImpl(thr, pc, &s->clock); -} - -#if !SANITIZER_GO -static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) { - ThreadState *thr = reinterpret_cast<ThreadState*>(arg); - ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base); - u64 epoch = tctx->epoch1; - if (tctx->status == ThreadStatusRunning) - epoch = tctx->thr->fast_state.epoch(); - thr->last_sleep_clock.set(&thr->proc()->clock_cache, tctx->tid, epoch); -} - -void AfterSleep(ThreadState *thr, uptr pc) { - DPrintf("#%d: AfterSleep\n", thr->tid); - if (thr->ignore_sync) - return; - thr->last_sleep_stack_id = CurrentStackId(thr, pc); - ThreadRegistryLock l(&ctx->thread_registry); - ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateSleepClockCallback, - thr); -} -#endif - -void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) { - if (thr->ignore_sync) - return; - thr->clock.set(thr->fast_state.epoch()); - thr->clock.acquire(&thr->proc()->clock_cache, c); -} - -void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) { - if (thr->ignore_sync) - return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.releaseStoreAcquire(&thr->proc()->clock_cache, c); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock lock(&s->mtx); + thr->clock.ReleaseStore(&s->clock); + } + IncrementEpoch(thr); } -void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { +void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) { + DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.release(&thr->proc()->clock_cache, c); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock lock(&s->mtx); + thr->clock.ReleaseStoreAcquire(&s->clock); + } + IncrementEpoch(thr); } -void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) { - if (thr->ignore_sync) - return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.ReleaseStore(&thr->proc()->clock_cache, c); +void IncrementEpoch(ThreadState *thr) { + DCHECK(!thr->ignore_sync); + DCHECK(thr->slot_locked); + Epoch epoch = EpochInc(thr->fast_state.epoch()); + if (!EpochOverflow(epoch)) { + Sid sid = thr->fast_state.sid(); + thr->clock.Set(sid, epoch); + thr->fast_state.SetEpoch(epoch); + thr->slot->SetEpoch(epoch); + TraceTime(thr); + } } -void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { +#if !SANITIZER_GO +void AfterSleep(ThreadState *thr, uptr pc) { + DPrintf("#%d: AfterSleep\n", thr->tid); if (thr->ignore_sync) return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.acq_rel(&thr->proc()->clock_cache, c); + thr->last_sleep_stack_id = CurrentStackId(thr, pc); + thr->last_sleep_clock.Reset(); + SlotLocker locker(thr); + for (auto &slot : ctx->slots) + thr->last_sleep_clock.Set(slot.sid, slot.epoch()); } +#endif void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock)) @@ -532,7 +531,7 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(ReportTypeDeadlock); for (int i = 0; i < r->n; i++) { - rep.AddMutex(r->loop[i].mtx_ctx0); + rep.AddMutex(r->loop[i].mtx_ctx0, r->loop[i].stk[0]); rep.AddUniqueTid((int)r->loop[i].thr_ctx); rep.AddThread((int)r->loop[i].thr_ctx); } @@ -540,7 +539,7 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { for (int i = 0; i < r->n; i++) { for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) { u32 stk = r->loop[i].stk[j]; - if (stk && stk != 0xffffffff) { + if (stk && stk != kInvalidStackID) { rep.AddStack(StackDepotGet(stk), true); } else { // Sometimes we fail to extract the stack trace (FIXME: investigate), @@ -552,4 +551,28 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { OutputReport(thr, rep); } +void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr, + FastState last_lock, StackID creation_stack_id) { + // We need to lock the slot during RestoreStack because it protects + // the slot journal. + Lock slot_lock(&ctx->slots[static_cast<uptr>(last_lock.sid())].mtx); + ThreadRegistryLock l0(&ctx->thread_registry); + Lock slots_lock(&ctx->slot_mtx); + ScopedReport rep(ReportTypeMutexDestroyLocked); + rep.AddMutex(addr, creation_stack_id); + VarSizeStackTrace trace; + ObtainCurrentStack(thr, pc, &trace); + rep.AddStack(trace, true); + + Tid tid; + DynamicMutexSet mset; + uptr tag; + if (!RestoreStack(EventType::kLock, last_lock.sid(), last_lock.epoch(), addr, + 0, kAccessWrite, &tid, &trace, mset, &tag)) + return; + rep.AddStack(trace, true); + rep.AddLocation(addr, 1); + OutputReport(thr, rep); +} + } // namespace __tsan diff --git a/libsanitizer/tsan/tsan_rtl_ppc64.S b/libsanitizer/tsan/tsan_rtl_ppc64.S index 9e533a7..8285e21 100644 --- a/libsanitizer/tsan/tsan_rtl_ppc64.S +++ b/libsanitizer/tsan/tsan_rtl_ppc64.S @@ -1,6 +1,5 @@ #include "tsan_ppc_regs.h" - .machine altivec .section .text .hidden __tsan_setjmp .globl _setjmp diff --git a/libsanitizer/tsan/tsan_rtl_proc.cpp b/libsanitizer/tsan/tsan_rtl_proc.cpp index def61cc..5acc396 100644 --- a/libsanitizer/tsan/tsan_rtl_proc.cpp +++ b/libsanitizer/tsan/tsan_rtl_proc.cpp @@ -35,7 +35,6 @@ void ProcDestroy(Processor *proc) { #if !SANITIZER_GO AllocatorProcFinish(proc); #endif - ctx->clock_alloc.FlushCache(&proc->clock_cache); ctx->metamap.OnProcIdle(proc); if (common_flags()->detect_deadlocks) ctx->dd->DestroyPhysicalThread(proc->dd_pt); diff --git a/libsanitizer/tsan/tsan_rtl_report.cpp b/libsanitizer/tsan/tsan_rtl_report.cpp index 811695d..4cf8816 100644 --- a/libsanitizer/tsan/tsan_rtl_report.cpp +++ b/libsanitizer/tsan/tsan_rtl_report.cpp @@ -175,22 +175,26 @@ void ScopedReportBase::AddStack(StackTrace stack, bool suppressable) { } void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, - StackTrace stack, const MutexSet *mset) { + Tid tid, StackTrace stack, + const MutexSet *mset) { + uptr addr0, size; + AccessType typ; + s.GetAccess(&addr0, &size, &typ); auto *mop = New<ReportMop>(); rep_->mops.PushBack(mop); - mop->tid = s.tid(); - mop->addr = addr + s.addr0(); - mop->size = s.size(); - mop->write = s.IsWrite(); - mop->atomic = s.IsAtomic(); + mop->tid = tid; + mop->addr = addr + addr0; + mop->size = size; + mop->write = !(typ & kAccessRead); + mop->atomic = typ & kAccessAtomic; mop->stack = SymbolizeStack(stack); mop->external_tag = external_tag; if (mop->stack) mop->stack->suppressable = true; for (uptr i = 0; i < mset->Size(); i++) { MutexSet::Desc d = mset->Get(i); - u64 mid = this->AddMutex(d.id); - ReportMopMutex mtx = {mid, d.write}; + int id = this->AddMutex(d.addr, d.stack_id); + ReportMopMutex mtx = {id, d.write}; mop->mset.PushBack(mtx); } } @@ -219,18 +223,6 @@ void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) { } #if !SANITIZER_GO -static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) { - int unique_id = *(int *)arg; - return tctx->unique_id == (u32)unique_id; -} - -static ThreadContext *FindThreadByUidLocked(Tid unique_id) { - ctx->thread_registry.CheckLocked(); - return static_cast<ThreadContext *>( - ctx->thread_registry.FindThreadContextLocked( - FindThreadByUidLockedCallback, &unique_id)); -} - static ThreadContext *FindThreadByTidLocked(Tid tid) { ctx->thread_registry.CheckLocked(); return static_cast<ThreadContext *>( @@ -262,55 +254,24 @@ ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) { } #endif -void ScopedReportBase::AddThread(Tid unique_tid, bool suppressable) { +void ScopedReportBase::AddThread(Tid tid, bool suppressable) { #if !SANITIZER_GO - if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid)) + if (const ThreadContext *tctx = FindThreadByTidLocked(tid)) AddThread(tctx, suppressable); #endif } -void ScopedReportBase::AddMutex(const SyncVar *s) { - for (uptr i = 0; i < rep_->mutexes.Size(); i++) { - if (rep_->mutexes[i]->id == s->uid) - return; - } - auto *rm = New<ReportMutex>(); - rep_->mutexes.PushBack(rm); - rm->id = s->uid; - rm->addr = s->addr; - rm->destroyed = false; - rm->stack = SymbolizeStackId(s->creation_stack_id); -} - -u64 ScopedReportBase::AddMutex(u64 id) { - u64 uid = 0; - u64 mid = id; - uptr addr = SyncVar::SplitId(id, &uid); - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); - // Check that the mutex is still alive. - // Another mutex can be created at the same address, - // so check uid as well. - if (s && s->CheckId(uid)) { - Lock l(&s->mtx); - mid = s->uid; - AddMutex(s); - } else { - AddDeadMutex(id); - } - return mid; -} - -void ScopedReportBase::AddDeadMutex(u64 id) { +int ScopedReportBase::AddMutex(uptr addr, StackID creation_stack_id) { for (uptr i = 0; i < rep_->mutexes.Size(); i++) { - if (rep_->mutexes[i]->id == id) - return; + if (rep_->mutexes[i]->addr == addr) + return rep_->mutexes[i]->id; } auto *rm = New<ReportMutex>(); rep_->mutexes.PushBack(rm); - rm->id = id; - rm->addr = 0; - rm->destroyed = true; - rm->stack = 0; + rm->id = rep_->mutexes.Size() - 1; + rm->addr = addr; + rm->stack = SymbolizeStackId(creation_stack_id); + return rm->id; } void ScopedReportBase::AddLocation(uptr addr, uptr size) { @@ -327,7 +288,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) { loc->tid = creat_tid; loc->stack = SymbolizeStackId(creat_stack); rep_->locs.PushBack(loc); - ThreadContext *tctx = FindThreadByUidLocked(creat_tid); + ThreadContext *tctx = FindThreadByTidLocked(creat_tid); if (tctx) AddThread(tctx); return; @@ -343,16 +304,15 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) { if (!b) b = JavaHeapBlock(addr, &block_begin); if (b != 0) { - ThreadContext *tctx = FindThreadByTidLocked(b->tid); auto *loc = New<ReportLocation>(); loc->type = ReportLocationHeap; - loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr); + loc->heap_chunk_start = block_begin; loc->heap_chunk_size = b->siz; loc->external_tag = b->tag; - loc->tid = tctx ? tctx->tid : b->tid; + loc->tid = b->tid; loc->stack = SymbolizeStackId(b->stk); rep_->locs.PushBack(loc); - if (tctx) + if (ThreadContext *tctx = FindThreadByTidLocked(b->tid)) AddThread(tctx); return; } @@ -380,6 +340,8 @@ void ScopedReportBase::AddSleep(StackID stack_id) { void ScopedReportBase::SetCount(int count) { rep_->count = count; } +void ScopedReportBase::SetSigNum(int sig) { rep_->signum = sig; } + const ReportDesc *ScopedReportBase::GetReport() const { return rep_; } ScopedReport::ScopedReport(ReportType typ, uptr tag) @@ -387,71 +349,6 @@ ScopedReport::ScopedReport(ReportType typ, uptr tag) ScopedReport::~ScopedReport() {} -void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk, - MutexSet *mset, uptr *tag) { - // This function restores stack trace and mutex set for the thread/epoch. - // It does so by getting stack trace and mutex set at the beginning of - // trace part, and then replaying the trace till the given epoch. - Trace* trace = ThreadTrace(tid); - ReadLock l(&trace->mtx); - const int partidx = (epoch / kTracePartSize) % TraceParts(); - TraceHeader* hdr = &trace->headers[partidx]; - if (epoch < hdr->epoch0 || epoch >= hdr->epoch0 + kTracePartSize) - return; - CHECK_EQ(RoundDown(epoch, kTracePartSize), hdr->epoch0); - const u64 epoch0 = RoundDown(epoch, TraceSize()); - const u64 eend = epoch % TraceSize(); - const u64 ebegin = RoundDown(eend, kTracePartSize); - DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n", - tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx); - Vector<uptr> stack; - stack.Resize(hdr->stack0.size + 64); - for (uptr i = 0; i < hdr->stack0.size; i++) { - stack[i] = hdr->stack0.trace[i]; - DPrintf2(" #%02zu: pc=%zx\n", i, stack[i]); - } - if (mset) - *mset = hdr->mset0; - uptr pos = hdr->stack0.size; - Event *events = (Event*)GetThreadTrace(tid); - for (uptr i = ebegin; i <= eend; i++) { - Event ev = events[i]; - EventType typ = (EventType)(ev >> kEventPCBits); - uptr pc = (uptr)(ev & ((1ull << kEventPCBits) - 1)); - DPrintf2(" %zu typ=%d pc=%zx\n", i, typ, pc); - if (typ == EventTypeMop) { - stack[pos] = pc; - } else if (typ == EventTypeFuncEnter) { - if (stack.Size() < pos + 2) - stack.Resize(pos + 2); - stack[pos++] = pc; - } else if (typ == EventTypeFuncExit) { - if (pos > 0) - pos--; - } - if (mset) { - if (typ == EventTypeLock) { - mset->Add(pc, true, epoch0 + i); - } else if (typ == EventTypeUnlock) { - mset->Del(pc, true); - } else if (typ == EventTypeRLock) { - mset->Add(pc, false, epoch0 + i); - } else if (typ == EventTypeRUnlock) { - mset->Del(pc, false); - } - } - for (uptr j = 0; j <= pos; j++) - DPrintf2(" #%zu: %zx\n", j, stack[j]); - } - if (pos == 0 && stack[0] == 0) - return; - pos++; - stk->Init(&stack[0], pos); - ExtractTagFromStack(stk, tag); -} - -namespace v3 { - // Replays the trace up to last_pos position in the last part // or up to the provided epoch/sid (whichever is earlier) // and calls the provided function f for each event. @@ -469,6 +366,7 @@ void TraceReplay(Trace *trace, TracePart *last, Event *last_pos, Sid sid, Event *end = &part->events[TracePart::kSize - 1]; if (part == last) end = last_pos; + f(kFreeSid, kEpochOver, nullptr); // notify about part start for (Event *evp = &part->events[0]; evp < end; evp++) { Event *evp0 = evp; if (!evp->is_access && !evp->is_func) { @@ -528,21 +426,36 @@ static constexpr bool IsWithinAccess(uptr addr1, uptr size1, uptr addr2, return addr1 >= addr2 && addr1 + size1 <= addr2 + size2; } -// Replays the trace of thread tid up to the target event identified -// by sid/epoch/addr/size/typ and restores and returns stack, mutex set +// Replays the trace of slot sid up to the target event identified +// by epoch/addr/size/typ and restores and returns tid, stack, mutex set // and tag for that event. If there are multiple such events, it returns // the last one. Returns false if the event is not present in the trace. -bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, - uptr size, AccessType typ, VarSizeStackTrace *pstk, +bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, + AccessType typ, Tid *ptid, VarSizeStackTrace *pstk, MutexSet *pmset, uptr *ptag) { // This function restores stack trace and mutex set for the thread/epoch. // It does so by getting stack trace and mutex set at the beginning of // trace part, and then replaying the trace till the given epoch. - DPrintf2("RestoreStack: tid=%u sid=%u@%u addr=0x%zx/%zu typ=%x\n", tid, + DPrintf2("RestoreStack: sid=%u@%u addr=0x%zx/%zu typ=%x\n", static_cast<int>(sid), static_cast<int>(epoch), addr, size, static_cast<int>(typ)); ctx->slot_mtx.CheckLocked(); // needed to prevent trace part recycling ctx->thread_registry.CheckLocked(); + TidSlot *slot = &ctx->slots[static_cast<uptr>(sid)]; + Tid tid = kInvalidTid; + // Need to lock the slot mutex as it protects slot->journal. + slot->mtx.CheckLocked(); + for (uptr i = 0; i < slot->journal.Size(); i++) { + DPrintf2(" journal: epoch=%d tid=%d\n", + static_cast<int>(slot->journal[i].epoch), slot->journal[i].tid); + if (i == slot->journal.Size() - 1 || slot->journal[i + 1].epoch > epoch) { + tid = slot->journal[i].tid; + break; + } + } + if (tid == kInvalidTid) + return false; + *ptid = tid; ThreadContext *tctx = static_cast<ThreadContext *>(ctx->thread_registry.GetThreadLocked(tid)); Trace *trace = &tctx->trace; @@ -553,8 +466,10 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, { Lock lock(&trace->mtx); first_part = trace->parts.Front(); - if (!first_part) + if (!first_part) { + DPrintf2("RestoreStack: tid=%d trace=%p no trace parts\n", tid, trace); return false; + } last_part = trace->parts.Back(); last_pos = trace->final_pos; if (tctx->thr) @@ -567,9 +482,18 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, bool is_read = typ & kAccessRead; bool is_atomic = typ & kAccessAtomic; bool is_free = typ & kAccessFree; + DPrintf2("RestoreStack: tid=%d parts=[%p-%p] last_pos=%p\n", tid, + trace->parts.Front(), last_part, last_pos); TraceReplay( trace, last_part, last_pos, sid, epoch, [&](Sid ev_sid, Epoch ev_epoch, Event *evp) { + if (evp == nullptr) { + // Each trace part is self-consistent, so we reset state. + stack.Resize(0); + mset->Reset(); + prev_pc = 0; + return; + } bool match = ev_sid == sid && ev_epoch == epoch; if (evp->is_access) { if (evp->is_func == 0 && evp->type == EventType::kAccessExt && @@ -592,12 +516,15 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, if (evp->is_func) { auto *ev = reinterpret_cast<EventFunc *>(evp); if (ev->pc) { - DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc); + DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc); stack.PushBack(ev->pc); } else { - DPrintf2(" FuncExit\n"); - CHECK(stack.Size()); - stack.PopBack(); + DPrintf2(" FuncExit\n"); + // We don't log pathologically large stacks in each part, + // if the stack was truncated we can have more func exits than + // entries. + if (stack.Size()) + stack.PopBack(); } return; } @@ -666,8 +593,6 @@ bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, return found; } -} // namespace v3 - bool RacyStacks::operator==(const RacyStacks &other) const { if (hash[0] == other.hash[0] && hash[1] == other.hash[1]) return true; @@ -758,10 +683,7 @@ bool OutputReport(ThreadState *thr, const ScopedReport &srep) { ctx->fired_suppressions.push_back(s); } { - bool old_is_freeing = thr->is_freeing; - thr->is_freeing = false; bool suppressed = OnReport(rep, pc_or_addr != 0); - thr->is_freeing = old_is_freeing; if (suppressed) { thr->current_report = nullptr; return false; @@ -808,97 +730,72 @@ static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) { return false; } -static bool RaceBetweenAtomicAndFree(ThreadState *thr) { - Shadow s0(thr->racy_state[0]); - Shadow s1(thr->racy_state[1]); - CHECK(!(s0.IsAtomic() && s1.IsAtomic())); - if (!s0.IsAtomic() && !s1.IsAtomic()) - return true; - if (s0.IsAtomic() && s1.IsFreed()) - return true; - if (s1.IsAtomic() && thr->is_freeing) - return true; - return false; -} - -void ReportRace(ThreadState *thr) { +void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, + AccessType typ0) { CheckedMutex::CheckNoLocks(); // Symbolizer makes lots of intercepted calls. If we try to process them, // at best it will cause deadlocks on internal mutexes. ScopedIgnoreInterceptors ignore; + uptr addr = ShadowToMem(shadow_mem); + DPrintf("#%d: ReportRace %p\n", thr->tid, (void *)addr); if (!ShouldReport(thr, ReportTypeRace)) return; - if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr)) + uptr addr_off0, size0; + cur.GetAccess(&addr_off0, &size0, nullptr); + uptr addr_off1, size1, typ1; + old.GetAccess(&addr_off1, &size1, &typ1); + if (!flags()->report_atomic_races && + ((typ0 & kAccessAtomic) || (typ1 & kAccessAtomic)) && + !(typ0 & kAccessFree) && !(typ1 & kAccessFree)) return; - bool freed = false; - { - Shadow s(thr->racy_state[1]); - freed = s.GetFreedAndReset(); - thr->racy_state[1] = s.raw(); - } - - uptr addr = ShadowToMem(thr->racy_shadow_addr); - uptr addr_min = 0; - uptr addr_max = 0; - { - uptr a0 = addr + Shadow(thr->racy_state[0]).addr0(); - uptr a1 = addr + Shadow(thr->racy_state[1]).addr0(); - uptr e0 = a0 + Shadow(thr->racy_state[0]).size(); - uptr e1 = a1 + Shadow(thr->racy_state[1]).size(); - addr_min = min(a0, a1); - addr_max = max(e0, e1); - if (IsExpectedReport(addr_min, addr_max - addr_min)) - return; - } + const uptr kMop = 2; + Shadow s[kMop] = {cur, old}; + uptr addr0 = addr + addr_off0; + uptr addr1 = addr + addr_off1; + uptr end0 = addr0 + size0; + uptr end1 = addr1 + size1; + uptr addr_min = min(addr0, addr1); + uptr addr_max = max(end0, end1); + if (IsExpectedReport(addr_min, addr_max - addr_min)) + return; if (HandleRacyAddress(thr, addr_min, addr_max)) return; - ReportType typ = ReportTypeRace; - if (thr->is_vptr_access && freed) - typ = ReportTypeVptrUseAfterFree; - else if (thr->is_vptr_access) - typ = ReportTypeVptrRace; - else if (freed) - typ = ReportTypeUseAfterFree; + ReportType rep_typ = ReportTypeRace; + if ((typ0 & kAccessVptr) && (typ1 & kAccessFree)) + rep_typ = ReportTypeVptrUseAfterFree; + else if (typ0 & kAccessVptr) + rep_typ = ReportTypeVptrRace; + else if (typ1 & kAccessFree) + rep_typ = ReportTypeUseAfterFree; - if (IsFiredSuppression(ctx, typ, addr)) + if (IsFiredSuppression(ctx, rep_typ, addr)) return; - const uptr kMop = 2; VarSizeStackTrace traces[kMop]; - uptr tags[kMop] = {kExternalTagNone}; - uptr toppc = TraceTopPC(thr); - if (toppc >> kEventPCBits) { - // This is a work-around for a known issue. - // The scenario where this happens is rather elaborate and requires - // an instrumented __sanitizer_report_error_summary callback and - // a __tsan_symbolize_external callback and a race during a range memory - // access larger than 8 bytes. MemoryAccessRange adds the current PC to - // the trace and starts processing memory accesses. A first memory access - // triggers a race, we report it and call the instrumented - // __sanitizer_report_error_summary, which adds more stuff to the trace - // since it is intrumented. Then a second memory access in MemoryAccessRange - // also triggers a race and we get here and call TraceTopPC to get the - // current PC, however now it contains some unrelated events from the - // callback. Most likely, TraceTopPC will now return a EventTypeFuncExit - // event. Later we subtract -1 from it (in GetPreviousInstructionPc) - // and the resulting PC has kExternalPCBit set, so we pass it to - // __tsan_symbolize_external_ex. __tsan_symbolize_external_ex is within its - // rights to crash since the PC is completely bogus. - // test/tsan/double_race.cpp contains a test case for this. - toppc = 0; - } - ObtainCurrentStack(thr, toppc, &traces[0], &tags[0]); - if (IsFiredSuppression(ctx, typ, traces[0])) + Tid tids[kMop] = {thr->tid, kInvalidTid}; + uptr tags[kMop] = {kExternalTagNone, kExternalTagNone}; + + ObtainCurrentStack(thr, thr->trace_prev_pc, &traces[0], &tags[0]); + if (IsFiredSuppression(ctx, rep_typ, traces[0])) return; - DynamicMutexSet mset2; - Shadow s2(thr->racy_state[1]); - RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]); - if (IsFiredSuppression(ctx, typ, traces[1])) + DynamicMutexSet mset1; + MutexSet *mset[kMop] = {&thr->mset, mset1}; + + // We need to lock the slot during RestoreStack because it protects + // the slot journal. + Lock slot_lock(&ctx->slots[static_cast<uptr>(s[1].sid())].mtx); + ThreadRegistryLock l0(&ctx->thread_registry); + Lock slots_lock(&ctx->slot_mtx); + if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1, + size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) + return; + + if (IsFiredSuppression(ctx, rep_typ, traces[1])) return; if (HandleRacyStacks(thr, traces)) @@ -908,39 +805,41 @@ void ReportRace(ThreadState *thr) { uptr tag = kExternalTagNone; for (uptr i = 0; i < kMop; i++) { if (tags[i] != kExternalTagNone) { - typ = ReportTypeExternalRace; + rep_typ = ReportTypeExternalRace; tag = tags[i]; break; } } - ThreadRegistryLock l0(&ctx->thread_registry); - ScopedReport rep(typ, tag); - for (uptr i = 0; i < kMop; i++) { - Shadow s(thr->racy_state[i]); - rep.AddMemoryAccess(addr, tags[i], s, traces[i], - i == 0 ? &thr->mset : mset2); - } + ScopedReport rep(rep_typ, tag); + for (uptr i = 0; i < kMop; i++) + rep.AddMemoryAccess(addr, tags[i], s[i], tids[i], traces[i], mset[i]); for (uptr i = 0; i < kMop; i++) { - FastState s(thr->racy_state[i]); ThreadContext *tctx = static_cast<ThreadContext *>( - ctx->thread_registry.GetThreadLocked(s.tid())); - if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1) - continue; + ctx->thread_registry.GetThreadLocked(tids[i])); rep.AddThread(tctx); } rep.AddLocation(addr_min, addr_max - addr_min); -#if !SANITIZER_GO - { - Shadow s(thr->racy_state[1]); - if (s.epoch() <= thr->last_sleep_clock.get(s.tid())) - rep.AddSleep(thr->last_sleep_stack_id); + if (flags()->print_full_thread_history) { + const ReportDesc *rep_desc = rep.GetReport(); + for (uptr i = 0; i < rep_desc->threads.Size(); i++) { + Tid parent_tid = rep_desc->threads[i]->parent_tid; + if (parent_tid == kMainTid || parent_tid == kInvalidTid) + continue; + ThreadContext *parent_tctx = static_cast<ThreadContext *>( + ctx->thread_registry.GetThreadLocked(parent_tid)); + rep.AddThread(parent_tctx); + } } -#endif +#if !SANITIZER_GO + if (!((typ0 | typ1) & kAccessFree) && + s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid())) + rep.AddSleep(thr->last_sleep_stack_id); +#endif OutputReport(thr, rep); } diff --git a/libsanitizer/tsan/tsan_rtl_thread.cpp b/libsanitizer/tsan/tsan_rtl_thread.cpp index 6e652ee..86c8b37 100644 --- a/libsanitizer/tsan/tsan_rtl_thread.cpp +++ b/libsanitizer/tsan/tsan_rtl_thread.cpp @@ -21,20 +21,14 @@ namespace __tsan { // ThreadContext implementation. -ThreadContext::ThreadContext(Tid tid) - : ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {} +ThreadContext::ThreadContext(Tid tid) : ThreadContextBase(tid), thr(), sync() {} #if !SANITIZER_GO ThreadContext::~ThreadContext() { } #endif -void ThreadContext::OnReset() { - CHECK_EQ(sync.size(), 0); - uptr trace_p = GetThreadTrace(tid); - ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event)); - //!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace)); -} +void ThreadContext::OnReset() { CHECK(!sync); } #if !SANITIZER_GO struct ThreadLeak { @@ -57,7 +51,9 @@ static void CollectThreadLeaks(ThreadContextBase *tctx_base, void *arg) { } #endif -#if !SANITIZER_GO +// Disabled on Mac because lldb test TestTsanBasic fails: +// https://reviews.llvm.org/D112603#3163158 +#if !SANITIZER_GO && !SANITIZER_MAC static void ReportIgnoresEnabled(ThreadContext *tctx, IgnoreSet *set) { if (tctx->tid == kMainTid) { Printf("ThreadSanitizer: main thread finished with ignores enabled\n"); @@ -112,30 +108,35 @@ int ThreadCount(ThreadState *thr) { } struct OnCreatedArgs { - ThreadState *thr; - uptr pc; + VectorClock *sync; + uptr sync_epoch; + StackID stack; }; Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) { - OnCreatedArgs args = { thr, pc }; - u32 parent_tid = thr ? thr->tid : kInvalidTid; // No parent for GCD workers. - Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args); - DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid); + // The main thread and GCD workers don't have a parent thread. + Tid parent = kInvalidTid; + OnCreatedArgs arg = {nullptr, 0, kInvalidStackID}; + if (thr) { + parent = thr->tid; + arg.stack = CurrentStackId(thr, pc); + if (!thr->ignore_sync) { + SlotLocker locker(thr); + thr->clock.ReleaseStore(&arg.sync); + arg.sync_epoch = ctx->global_epoch; + IncrementEpoch(thr); + } + } + Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent, &arg); + DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent, tid, uid); return tid; } void ThreadContext::OnCreated(void *arg) { - thr = 0; - if (tid == kMainTid) - return; OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg); - if (!args->thr) // GCD workers don't have a parent thread. - return; - args->thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0); - ReleaseImpl(args->thr, 0, &sync); - creation_stack_id = CurrentStackId(args->thr, args->pc); + sync = args->sync; + sync_epoch = args->sync_epoch; + creation_stack_id = args->stack; } extern "C" void __tsan_stack_initialization() {} @@ -150,6 +151,15 @@ struct OnStartedArgs { void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, ThreadType thread_type) { + ctx->thread_registry.StartThread(tid, os_id, thread_type, thr); + if (!thr->ignore_sync) { + SlotAttachAndLock(thr); + if (thr->tctx->sync_epoch == ctx->global_epoch) + thr->clock.Acquire(thr->tctx->sync); + SlotUnlock(thr); + } + Free(thr->tctx->sync); + uptr stk_addr = 0; uptr stk_size = 0; uptr tls_addr = 0; @@ -159,12 +169,10 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr, &tls_size); #endif - - ThreadRegistry *tr = &ctx->thread_registry; - OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size }; - tr->StartThread(tid, os_id, thread_type, &args); - - while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack(); + thr->stk_addr = stk_addr; + thr->stk_size = stk_size; + thr->tls_addr = tls_addr; + thr->tls_size = tls_size; #if !SANITIZER_GO if (ctx->after_multithreaded_fork) { @@ -192,69 +200,80 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, } void ThreadContext::OnStarted(void *arg) { - OnStartedArgs *args = static_cast<OnStartedArgs *>(arg); - thr = args->thr; - // RoundUp so that one trace part does not contain events - // from different threads. - epoch0 = RoundUp(epoch1 + 1, kTracePartSize); - epoch1 = (u64)-1; - new (thr) - ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr, - args->stk_size, args->tls_addr, args->tls_size); + thr = static_cast<ThreadState *>(arg); + DPrintf("#%d: ThreadStart\n", tid); + new (thr) ThreadState(tid); if (common_flags()->detect_deadlocks) - thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id); - thr->fast_state.SetHistorySize(flags()->history_size); - // Commit switch to the new part of the trace. - // TraceAddEvent will reset stack0/mset0 in the new part for us. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - - thr->fast_synch_epoch = epoch0; - AcquireImpl(thr, 0, &sync); - sync.Reset(&thr->proc()->clock_cache); + thr->dd_lt = ctx->dd->CreateLogicalThread(tid); thr->tctx = this; +#if !SANITIZER_GO thr->is_inited = true; - DPrintf( - "#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx " - "tls_addr=%zx tls_size=%zx\n", - tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr, - args->tls_size); +#endif } void ThreadFinish(ThreadState *thr) { + DPrintf("#%d: ThreadFinish\n", thr->tid); ThreadCheckIgnore(thr); if (thr->stk_addr && thr->stk_size) DontNeedShadowFor(thr->stk_addr, thr->stk_size); if (thr->tls_addr && thr->tls_size) DontNeedShadowFor(thr->tls_addr, thr->tls_size); thr->is_dead = true; - ctx->thread_registry.FinishThread(thr->tid); -} - -void ThreadContext::OnFinished() { -#if SANITIZER_GO +#if !SANITIZER_GO + thr->is_inited = false; + thr->ignore_interceptors++; + PlatformCleanUpThreadState(thr); +#endif + if (!thr->ignore_sync) { + SlotLocker locker(thr); + ThreadRegistryLock lock(&ctx->thread_registry); + // Note: detached is protected by the thread registry mutex, + // the thread may be detaching concurrently in another thread. + if (!thr->tctx->detached) { + thr->clock.ReleaseStore(&thr->tctx->sync); + thr->tctx->sync_epoch = ctx->global_epoch; + IncrementEpoch(thr); + } + } +#if !SANITIZER_GO + UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr)); +#else Free(thr->shadow_stack); +#endif + thr->shadow_stack = nullptr; thr->shadow_stack_pos = nullptr; thr->shadow_stack_end = nullptr; -#endif - if (!detached) { - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseImpl(thr, 0, &sync); - } - epoch1 = thr->fast_state.epoch(); - if (common_flags()->detect_deadlocks) ctx->dd->DestroyLogicalThread(thr->dd_lt); - thr->clock.ResetCached(&thr->proc()->clock_cache); -#if !SANITIZER_GO - thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache); -#endif -#if !SANITIZER_GO - PlatformCleanUpThreadState(thr); -#endif + SlotDetach(thr); + ctx->thread_registry.FinishThread(thr->tid); thr->~ThreadState(); - thr = 0; +} + +void ThreadContext::OnFinished() { + Lock lock(&ctx->slot_mtx); + Lock lock1(&trace.mtx); + // Queue all trace parts into the global recycle queue. + auto parts = &trace.parts; + while (trace.local_head) { + CHECK(parts->Queued(trace.local_head)); + ctx->trace_part_recycle.PushBack(trace.local_head); + trace.local_head = parts->Next(trace.local_head); + } + ctx->trace_part_recycle_finished += parts->Size(); + if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadHi) { + ctx->trace_part_finished_excess += parts->Size(); + trace.parts_allocated = 0; + } else if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadLo && + parts->Size() > 1) { + ctx->trace_part_finished_excess += parts->Size() - 1; + trace.parts_allocated = 1; + } + // From now on replay will use trace->final_pos. + trace.final_pos = (Event *)atomic_load_relaxed(&thr->trace_pos); + atomic_store_relaxed(&thr->trace_pos, 0); + thr->tctx = nullptr; + thr = nullptr; } struct ConsumeThreadContext { @@ -262,60 +281,47 @@ struct ConsumeThreadContext { ThreadContextBase *tctx; }; -static bool ConsumeThreadByUid(ThreadContextBase *tctx, void *arg) { - ConsumeThreadContext *findCtx = (ConsumeThreadContext *)arg; - if (tctx->user_id == findCtx->uid && tctx->status != ThreadStatusInvalid) { - if (findCtx->tctx) { - // Ensure that user_id is unique. If it's not the case we are screwed. - // Something went wrong before, but now there is no way to recover. - // Returning a wrong thread is not an option, it may lead to very hard - // to debug false positives (e.g. if we join a wrong thread). - Report("ThreadSanitizer: dup thread with used id 0x%zx\n", findCtx->uid); - Die(); - } - findCtx->tctx = tctx; - tctx->user_id = 0; - } - return false; -} - Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) { - ConsumeThreadContext findCtx = {uid, nullptr}; - ctx->thread_registry.FindThread(ConsumeThreadByUid, &findCtx); - Tid tid = findCtx.tctx ? findCtx.tctx->tid : kInvalidTid; - DPrintf("#%d: ThreadTid uid=%zu tid=%d\n", thr->tid, uid, tid); - return tid; + return ctx->thread_registry.ConsumeThreadUserId(uid); } +struct JoinArg { + VectorClock *sync; + uptr sync_epoch; +}; + void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) { CHECK_GT(tid, 0); - CHECK_LT(tid, kMaxTid); DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid); - ctx->thread_registry.JoinThread(tid, thr); + JoinArg arg = {}; + ctx->thread_registry.JoinThread(tid, &arg); + if (!thr->ignore_sync) { + SlotLocker locker(thr); + if (arg.sync_epoch == ctx->global_epoch) + thr->clock.Acquire(arg.sync); + } + Free(arg.sync); } -void ThreadContext::OnJoined(void *arg) { - ThreadState *caller_thr = static_cast<ThreadState *>(arg); - AcquireImpl(caller_thr, 0, &sync); - sync.Reset(&caller_thr->proc()->clock_cache); +void ThreadContext::OnJoined(void *ptr) { + auto arg = static_cast<JoinArg *>(ptr); + arg->sync = sync; + arg->sync_epoch = sync_epoch; + sync = nullptr; + sync_epoch = 0; } -void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); } +void ThreadContext::OnDead() { CHECK_EQ(sync, nullptr); } void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) { CHECK_GT(tid, 0); - CHECK_LT(tid, kMaxTid); ctx->thread_registry.DetachThread(tid, thr); } -void ThreadContext::OnDetached(void *arg) { - ThreadState *thr1 = static_cast<ThreadState *>(arg); - sync.Reset(&thr1->proc()->clock_cache); -} +void ThreadContext::OnDetached(void *arg) { Free(sync); } void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) { CHECK_GT(tid, 0); - CHECK_LT(tid, kMaxTid); ctx->thread_registry.SetThreadUserId(tid, uid); } diff --git a/libsanitizer/tsan/tsan_shadow.h b/libsanitizer/tsan/tsan_shadow.h index 8b7bc34..843573e 100644 --- a/libsanitizer/tsan/tsan_shadow.h +++ b/libsanitizer/tsan/tsan_shadow.h @@ -10,223 +10,170 @@ #define TSAN_SHADOW_H #include "tsan_defs.h" -#include "tsan_trace.h" namespace __tsan { -// FastState (from most significant bit): -// ignore : 1 -// tid : kTidBits -// unused : - -// history_size : 3 -// epoch : kClkBits class FastState { public: - FastState(u64 tid, u64 epoch) { - x_ = tid << kTidShift; - x_ |= epoch; - DCHECK_EQ(tid, this->tid()); - DCHECK_EQ(epoch, this->epoch()); - DCHECK_EQ(GetIgnoreBit(), false); - } - - explicit FastState(u64 x) : x_(x) {} - - u64 raw() const { return x_; } - - u64 tid() const { - u64 res = (x_ & ~kIgnoreBit) >> kTidShift; - return res; - } - - u64 TidWithIgnore() const { - u64 res = x_ >> kTidShift; - return res; - } - - u64 epoch() const { - u64 res = x_ & ((1ull << kClkBits) - 1); - return res; - } + FastState() { Reset(); } - void IncrementEpoch() { - u64 old_epoch = epoch(); - x_ += 1; - DCHECK_EQ(old_epoch + 1, epoch()); - (void)old_epoch; + void Reset() { + part_.unused0_ = 0; + part_.sid_ = static_cast<u8>(kFreeSid); + part_.epoch_ = static_cast<u16>(kEpochLast); + part_.unused1_ = 0; + part_.ignore_accesses_ = false; } - void SetIgnoreBit() { x_ |= kIgnoreBit; } - void ClearIgnoreBit() { x_ &= ~kIgnoreBit; } - bool GetIgnoreBit() const { return (s64)x_ < 0; } + void SetSid(Sid sid) { part_.sid_ = static_cast<u8>(sid); } - void SetHistorySize(int hs) { - CHECK_GE(hs, 0); - CHECK_LE(hs, 7); - x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift); - } + Sid sid() const { return static_cast<Sid>(part_.sid_); } - ALWAYS_INLINE - int GetHistorySize() const { - return (int)((x_ >> kHistoryShift) & kHistoryMask); - } + Epoch epoch() const { return static_cast<Epoch>(part_.epoch_); } - void ClearHistorySize() { SetHistorySize(0); } + void SetEpoch(Epoch epoch) { part_.epoch_ = static_cast<u16>(epoch); } - ALWAYS_INLINE - u64 GetTracePos() const { - const int hs = GetHistorySize(); - // When hs == 0, the trace consists of 2 parts. - const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1; - return epoch() & mask; - } + void SetIgnoreBit() { part_.ignore_accesses_ = 1; } + void ClearIgnoreBit() { part_.ignore_accesses_ = 0; } + bool GetIgnoreBit() const { return part_.ignore_accesses_; } private: friend class Shadow; - static const int kTidShift = 64 - kTidBits - 1; - static const u64 kIgnoreBit = 1ull << 63; - static const u64 kFreedBit = 1ull << 63; - static const u64 kHistoryShift = kClkBits; - static const u64 kHistoryMask = 7; - u64 x_; + struct Parts { + u32 unused0_ : 8; + u32 sid_ : 8; + u32 epoch_ : kEpochBits; + u32 unused1_ : 1; + u32 ignore_accesses_ : 1; + }; + union { + Parts part_; + u32 raw_; + }; }; -// Shadow (from most significant bit): -// freed : 1 -// tid : kTidBits -// is_atomic : 1 -// is_read : 1 -// size_log : 2 -// addr0 : 3 -// epoch : kClkBits -class Shadow : public FastState { - public: - explicit Shadow(u64 x) : FastState(x) {} +static_assert(sizeof(FastState) == kShadowSize, "bad FastState size"); - explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); } - - void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) { - DCHECK_EQ((x_ >> kClkBits) & 31, 0); - DCHECK_LE(addr0, 7); - DCHECK_LE(kAccessSizeLog, 3); - x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits; - DCHECK_EQ(kAccessSizeLog, size_log()); - DCHECK_EQ(addr0, this->addr0()); - } - - void SetWrite(unsigned kAccessIsWrite) { - DCHECK_EQ(x_ & kReadBit, 0); - if (!kAccessIsWrite) - x_ |= kReadBit; - DCHECK_EQ(kAccessIsWrite, IsWrite()); - } - - void SetAtomic(bool kIsAtomic) { - DCHECK(!IsAtomic()); - if (kIsAtomic) - x_ |= kAtomicBit; - DCHECK_EQ(IsAtomic(), kIsAtomic); - } - - bool IsAtomic() const { return x_ & kAtomicBit; } - - bool IsZero() const { return x_ == 0; } - - static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) { - u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift; - DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore()); - return shifted_xor == 0; - } - - static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1, - const Shadow s2) { - u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31; - return masked_xor == 0; +class Shadow { + public: + static constexpr RawShadow kEmpty = static_cast<RawShadow>(0); + + Shadow(FastState state, u32 addr, u32 size, AccessType typ) { + raw_ = state.raw_; + DCHECK_GT(size, 0); + DCHECK_LE(size, 8); + UNUSED Sid sid0 = part_.sid_; + UNUSED u16 epoch0 = part_.epoch_; + raw_ |= (!!(typ & kAccessAtomic) << kIsAtomicShift) | + (!!(typ & kAccessRead) << kIsReadShift) | + (((((1u << size) - 1) << (addr & 0x7)) & 0xff) << kAccessShift); + // Note: we don't check kAccessAtomic because it overlaps with + // FastState::ignore_accesses_ and it may be set spuriously. + DCHECK_EQ(part_.is_read_, !!(typ & kAccessRead)); + DCHECK_EQ(sid(), sid0); + DCHECK_EQ(epoch(), epoch0); + } + + explicit Shadow(RawShadow x = Shadow::kEmpty) { raw_ = static_cast<u32>(x); } + + RawShadow raw() const { return static_cast<RawShadow>(raw_); } + Sid sid() const { return part_.sid_; } + Epoch epoch() const { return static_cast<Epoch>(part_.epoch_); } + u8 access() const { return part_.access_; } + + void GetAccess(uptr *addr, uptr *size, AccessType *typ) const { + DCHECK(part_.access_ != 0 || raw_ == static_cast<u32>(Shadow::kRodata)); + if (addr) + *addr = part_.access_ ? __builtin_ffs(part_.access_) - 1 : 0; + if (size) + *size = part_.access_ == kFreeAccess ? kShadowCell + : __builtin_popcount(part_.access_); + if (typ) + *typ = (part_.is_read_ ? kAccessRead : kAccessWrite) | + (part_.is_atomic_ ? kAccessAtomic : 0) | + (part_.access_ == kFreeAccess ? kAccessFree : 0); } - static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2, - unsigned kS2AccessSize) { - bool res = false; - u64 diff = s1.addr0() - s2.addr0(); - if ((s64)diff < 0) { // s1.addr0 < s2.addr0 - // if (s1.addr0() + size1) > s2.addr0()) return true; - if (s1.size() > -diff) - res = true; - } else { - // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true; - if (kS2AccessSize > diff) - res = true; - } - DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2)); - DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1)); + ALWAYS_INLINE + bool IsBothReadsOrAtomic(AccessType typ) const { + u32 is_read = !!(typ & kAccessRead); + u32 is_atomic = !!(typ & kAccessAtomic); + bool res = + raw_ & ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift)); + DCHECK_EQ(res, + (part_.is_read_ && is_read) || (part_.is_atomic_ && is_atomic)); return res; } - u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; } - u64 ALWAYS_INLINE size() const { return 1ull << size_log(); } - bool ALWAYS_INLINE IsWrite() const { return !IsRead(); } - bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; } - - // The idea behind the freed bit is as follows. - // When the memory is freed (or otherwise unaccessible) we write to the shadow - // values with tid/epoch related to the free and the freed bit set. - // During memory accesses processing the freed bit is considered - // as msb of tid. So any access races with shadow with freed bit set - // (it is as if write from a thread with which we never synchronized before). - // This allows us to detect accesses to freed memory w/o additional - // overheads in memory access processing and at the same time restore - // tid/epoch of free. - void MarkAsFreed() { x_ |= kFreedBit; } - - bool IsFreed() const { return x_ & kFreedBit; } - - bool GetFreedAndReset() { - bool res = x_ & kFreedBit; - x_ &= ~kFreedBit; + ALWAYS_INLINE + bool IsRWWeakerOrEqual(AccessType typ) const { + u32 is_read = !!(typ & kAccessRead); + u32 is_atomic = !!(typ & kAccessAtomic); + UNUSED u32 res0 = + (part_.is_atomic_ > is_atomic) || + (part_.is_atomic_ == is_atomic && part_.is_read_ >= is_read); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const u32 kAtomicReadMask = (1 << kIsAtomicShift) | (1 << kIsReadShift); + bool res = (raw_ & kAtomicReadMask) >= + ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift)); + + DCHECK_EQ(res, res0); return res; +#else + return res0; +#endif } - bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const { - bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) | - (u64(kIsAtomic) << kAtomicShift)); - DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic)); - return v; - } - - bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const { - bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1)); - DCHECK_EQ(v, (IsAtomic() < kIsAtomic) || - (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite)); - return v; + // The FreedMarker must not pass "the same access check" so that we don't + // return from the race detection algorithm early. + static RawShadow FreedMarker() { + FastState fs; + fs.SetSid(kFreeSid); + fs.SetEpoch(kEpochLast); + Shadow s(fs, 0, 8, kAccessWrite); + return s.raw(); } - bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const { - bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1)); - DCHECK_EQ(v, (IsAtomic() > kIsAtomic) || - (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite)); - return v; + static RawShadow FreedInfo(Sid sid, Epoch epoch) { + Shadow s; + s.part_.sid_ = sid; + s.part_.epoch_ = static_cast<u16>(epoch); + s.part_.access_ = kFreeAccess; + return s.raw(); } private: - static const u64 kReadShift = 5 + kClkBits; - static const u64 kReadBit = 1ull << kReadShift; - static const u64 kAtomicShift = 6 + kClkBits; - static const u64 kAtomicBit = 1ull << kAtomicShift; - - u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; } - - static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) { - if (s1.addr0() == s2.addr0()) - return true; - if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0()) - return true; - if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0()) - return true; - return false; - } + struct Parts { + u8 access_; + Sid sid_; + u16 epoch_ : kEpochBits; + u16 is_read_ : 1; + u16 is_atomic_ : 1; + }; + union { + Parts part_; + u32 raw_; + }; + + static constexpr u8 kFreeAccess = 0x81; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + static constexpr uptr kAccessShift = 0; + static constexpr uptr kIsReadShift = 30; + static constexpr uptr kIsAtomicShift = 31; +#else + static constexpr uptr kAccessShift = 24; + static constexpr uptr kIsReadShift = 1; + static constexpr uptr kIsAtomicShift = 0; +#endif + + public: + // .rodata shadow marker, see MapRodata and ContainsSameAccessFast. + static constexpr RawShadow kRodata = + static_cast<RawShadow>(1 << kIsReadShift); }; -const RawShadow kShadowRodata = (RawShadow)-1; // .rodata shadow marker +static_assert(sizeof(Shadow) == kShadowSize, "bad Shadow size"); } // namespace __tsan diff --git a/libsanitizer/tsan/tsan_sync.cpp b/libsanitizer/tsan/tsan_sync.cpp index f042aba..09d4178 100644 --- a/libsanitizer/tsan/tsan_sync.cpp +++ b/libsanitizer/tsan/tsan_sync.cpp @@ -18,43 +18,31 @@ namespace __tsan { void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s); -SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); } +SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(); } -void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, - bool save_stack) { +void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack) { + Reset(); this->addr = addr; - this->uid = uid; - this->next = 0; - - creation_stack_id = kInvalidStackID; + next = 0; if (save_stack && !SANITIZER_GO) // Go does not use them creation_stack_id = CurrentStackId(thr, pc); if (common_flags()->detect_deadlocks) DDMutexInit(thr, pc, this); } -void SyncVar::Reset(Processor *proc) { - uid = 0; +void SyncVar::Reset() { + CHECK(!ctx->resetting); creation_stack_id = kInvalidStackID; owner_tid = kInvalidTid; - last_lock = 0; + last_lock.Reset(); recursion = 0; atomic_store_relaxed(&flags, 0); - - if (proc == 0) { - CHECK_EQ(clock.size(), 0); - CHECK_EQ(read_clock.size(), 0); - } else { - clock.Reset(&proc->clock_cache); - read_clock.Reset(&proc->clock_cache); - } + Free(clock); + Free(read_clock); } MetaMap::MetaMap() - : block_alloc_(LINKER_INITIALIZED, "heap block allocator"), - sync_alloc_(LINKER_INITIALIZED, "sync allocator") { - atomic_store(&uid_gen_, 0, memory_order_relaxed); -} + : block_alloc_("heap block allocator"), sync_alloc_("sync allocator") {} void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) { u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache); @@ -68,16 +56,16 @@ void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) { *meta = idx | kFlagBlock; } -uptr MetaMap::FreeBlock(Processor *proc, uptr p) { +uptr MetaMap::FreeBlock(Processor *proc, uptr p, bool reset) { MBlock* b = GetBlock(p); if (b == 0) return 0; uptr sz = RoundUpTo(b->siz, kMetaShadowCell); - FreeRange(proc, p, sz); + FreeRange(proc, p, sz, reset); return sz; } -bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) { +bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) { bool has_something = false; u32 *meta = MemToMeta(p); u32 *end = MemToMeta(p + sz); @@ -99,7 +87,8 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) { DCHECK(idx & kFlagSync); SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask); u32 next = s->next; - s->Reset(proc); + if (reset) + s->Reset(); sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask); idx = next; } else { @@ -116,30 +105,30 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) { // which can be huge. The function probes pages one-by-one until it finds a page // without meta objects, at this point it stops freeing meta objects. Because // thread stacks grow top-down, we do the same starting from end as well. -void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { +void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) { if (SANITIZER_GO) { // UnmapOrDie/MmapFixedNoReserve does not work on Windows, // so we do the optimization only for C/C++. - FreeRange(proc, p, sz); + FreeRange(proc, p, sz, reset); return; } const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize; const uptr kPageSize = GetPageSizeCached() * kMetaRatio; if (sz <= 4 * kPageSize) { // If the range is small, just do the normal free procedure. - FreeRange(proc, p, sz); + FreeRange(proc, p, sz, reset); return; } // First, round both ends of the range to page size. uptr diff = RoundUp(p, kPageSize) - p; if (diff != 0) { - FreeRange(proc, p, diff); + FreeRange(proc, p, diff, reset); p += diff; sz -= diff; } diff = p + sz - RoundDown(p + sz, kPageSize); if (diff != 0) { - FreeRange(proc, p + sz - diff, diff); + FreeRange(proc, p + sz - diff, diff, reset); sz -= diff; } // Now we must have a non-empty page-aligned range. @@ -150,7 +139,7 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { const uptr sz0 = sz; // Probe start of the range. for (uptr checked = 0; sz > 0; checked += kPageSize) { - bool has_something = FreeRange(proc, p, kPageSize); + bool has_something = FreeRange(proc, p, kPageSize, reset); p += kPageSize; sz -= kPageSize; if (!has_something && checked > (128 << 10)) @@ -158,7 +147,7 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { } // Probe end of the range. for (uptr checked = 0; sz > 0; checked += kPageSize) { - bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize); + bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize, reset); sz -= kPageSize; // Stacks grow down, so sync object are most likely at the end of the region // (if it is a stack). The very end of the stack is TLS and tsan increases @@ -177,6 +166,27 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { Die(); } +void MetaMap::ResetClocks() { + // This can be called from the background thread + // which does not have proc/cache. + // The cache is too large for stack. + static InternalAllocatorCache cache; + internal_memset(&cache, 0, sizeof(cache)); + internal_allocator()->InitCache(&cache); + sync_alloc_.ForEach([&](SyncVar *s) { + if (s->clock) { + InternalFree(s->clock, &cache); + s->clock = nullptr; + } + if (s->read_clock) { + InternalFree(s->read_clock, &cache); + s->read_clock = nullptr; + } + s->last_lock.Reset(); + }); + internal_allocator()->DestroyCache(&cache); +} + MBlock* MetaMap::GetBlock(uptr p) { u32 *meta = MemToMeta(p); u32 idx = *meta; @@ -193,6 +203,7 @@ MBlock* MetaMap::GetBlock(uptr p) { SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, bool save_stack) { + DCHECK(!create || thr->slot_locked); u32 *meta = MemToMeta(addr); u32 idx0 = *meta; u32 myidx = 0; @@ -203,7 +214,7 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask); if (LIKELY(s->addr == addr)) { if (UNLIKELY(myidx != 0)) { - mys->Reset(thr->proc()); + mys->Reset(); sync_alloc_.Free(&thr->proc()->sync_cache, myidx); } return s; @@ -218,10 +229,9 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, } if (LIKELY(myidx == 0)) { - const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed); myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache); mys = sync_alloc_.Map(myidx); - mys->Init(thr, pc, addr, uid, save_stack); + mys->Init(thr, pc, addr, save_stack); } mys->next = idx0; if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0, diff --git a/libsanitizer/tsan/tsan_sync.h b/libsanitizer/tsan/tsan_sync.h index fc8fa28..67d3c0b 100644 --- a/libsanitizer/tsan/tsan_sync.h +++ b/libsanitizer/tsan/tsan_sync.h @@ -16,8 +16,9 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_deadlock_detector_interface.h" #include "tsan_defs.h" -#include "tsan_clock.h" #include "tsan_dense_alloc.h" +#include "tsan_shadow.h" +#include "tsan_vector_clock.h" namespace __tsan { @@ -53,34 +54,18 @@ struct SyncVar { uptr addr; // overwritten by DenseSlabAlloc freelist Mutex mtx; - u64 uid; // Globally unique id. StackID creation_stack_id; Tid owner_tid; // Set only by exclusive owners. - u64 last_lock; + FastState last_lock; int recursion; atomic_uint32_t flags; u32 next; // in MetaMap DDMutex dd; - SyncClock read_clock; // Used for rw mutexes only. - // The clock is placed last, so that it is situated on a different cache line - // with the mtx. This reduces contention for hot sync objects. - SyncClock clock; + VectorClock *read_clock; // Used for rw mutexes only. + VectorClock *clock; - void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack); - void Reset(Processor *proc); - - u64 GetId() const { - // 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits. - return GetLsb((u64)addr | (uid << 48), 60); - } - bool CheckId(u64 uid) const { - CHECK_EQ(uid, GetLsb(uid, 14)); - return GetLsb(this->uid, 14) == uid; - } - static uptr SplitId(u64 id, u64 *uid) { - *uid = id >> 48; - return (uptr)GetLsb(id, 48); - } + void Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack); + void Reset(); bool IsFlagSet(u32 f) const { return atomic_load_relaxed(&flags) & f; @@ -110,9 +95,20 @@ class MetaMap { MetaMap(); void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz); - uptr FreeBlock(Processor *proc, uptr p); - bool FreeRange(Processor *proc, uptr p, uptr sz); - void ResetRange(Processor *proc, uptr p, uptr sz); + + // FreeBlock resets all sync objects in the range if reset=true and must not + // run concurrently with ResetClocks which resets all sync objects + // w/o any synchronization (as part of DoReset). + // If we don't have a thread slot (very early/late in thread lifetime or + // Go/Java callbacks) or the slot is not locked, then reset must be set to + // false. In such case sync object clocks will be reset later (when it's + // reused or during the next ResetClocks). + uptr FreeBlock(Processor *proc, uptr p, bool reset); + bool FreeRange(Processor *proc, uptr p, uptr sz, bool reset); + void ResetRange(Processor *proc, uptr p, uptr sz, bool reset); + // Reset vector clocks of all sync objects. + // Must be called when no other threads access sync objects. + void ResetClocks(); MBlock* GetBlock(uptr p); SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr, @@ -142,7 +138,6 @@ class MetaMap { typedef DenseSlabAlloc<SyncVar, 1 << 20, 1 << 10, kFlagMask> SyncAlloc; BlockAlloc block_alloc_; SyncAlloc sync_alloc_; - atomic_uint64_t uid_gen_; SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, bool save_stack); diff --git a/libsanitizer/tsan/tsan_trace.h b/libsanitizer/tsan/tsan_trace.h index a771ad9..01bb7b3 100644 --- a/libsanitizer/tsan/tsan_trace.h +++ b/libsanitizer/tsan/tsan_trace.h @@ -19,57 +19,6 @@ namespace __tsan { -const int kTracePartSizeBits = 13; -const int kTracePartSize = 1 << kTracePartSizeBits; -const int kTraceParts = 2 * 1024 * 1024 / kTracePartSize; -const int kTraceSize = kTracePartSize * kTraceParts; - -// Must fit into 3 bits. -enum EventType { - EventTypeMop, - EventTypeFuncEnter, - EventTypeFuncExit, - EventTypeLock, - EventTypeUnlock, - EventTypeRLock, - EventTypeRUnlock -}; - -// Represents a thread event (from most significant bit): -// u64 typ : 3; // EventType. -// u64 addr : 61; // Associated pc. -typedef u64 Event; - -const uptr kEventPCBits = 61; - -struct TraceHeader { -#if !SANITIZER_GO - BufferedStackTrace stack0; // Start stack for the trace. -#else - VarSizeStackTrace stack0; -#endif - u64 epoch0; // Start epoch for the trace. - MutexSet mset0; - - TraceHeader() : stack0(), epoch0() {} -}; - -struct Trace { - Mutex mtx; -#if !SANITIZER_GO - // Must be last to catch overflow as paging fault. - // Go shadow stack is dynamically allocated. - uptr shadow_stack[kShadowStackSize]; -#endif - // Must be the last field, because we unmap the unused part in - // CreateThreadContext. - TraceHeader headers[kTraceParts]; - - Trace() : mtx(MutexTypeTrace) {} -}; - -namespace v3 { - enum class EventType : u64 { kAccessExt, kAccessRange, @@ -99,6 +48,8 @@ static constexpr Event NopEvent = {1, 0, EventType::kAccessExt, 0}; // close enough to each other. Otherwise we fall back to EventAccessExt. struct EventAccess { static constexpr uptr kPCBits = 15; + static_assert(kPCBits + kCompressedAddrBits + 5 == 64, + "unused bits in EventAccess"); u64 is_access : 1; // = 1 u64 is_read : 1; @@ -119,13 +70,23 @@ static_assert(sizeof(EventFunc) == 8, "bad EventFunc size"); // Extended memory access with full PC. struct EventAccessExt { + // Note: precisely specifying the unused parts of the bitfield is critical for + // performance. If we don't specify them, compiler will generate code to load + // the old value and shuffle it to extract the unused bits to apply to the new + // value. If we specify the unused part and store 0 in there, all that + // unnecessary code goes away (store of the 0 const is combined with other + // constant parts). + static constexpr uptr kUnusedBits = 11; + static_assert(kCompressedAddrBits + kUnusedBits + 9 == 64, + "unused bits in EventAccessExt"); + u64 is_access : 1; // = 0 u64 is_func : 1; // = 0 EventType type : 3; // = EventType::kAccessExt u64 is_read : 1; u64 is_atomic : 1; u64 size_log : 2; - u64 _ : 11; + u64 _ : kUnusedBits; u64 addr : kCompressedAddrBits; u64 pc; }; @@ -134,6 +95,8 @@ static_assert(sizeof(EventAccessExt) == 16, "bad EventAccessExt size"); // Access to a memory range. struct EventAccessRange { static constexpr uptr kSizeLoBits = 13; + static_assert(kCompressedAddrBits + kSizeLoBits + 7 == 64, + "unused bits in EventAccessRange"); u64 is_access : 1; // = 0 u64 is_func : 1; // = 0 @@ -150,6 +113,13 @@ static_assert(sizeof(EventAccessRange) == 16, "bad EventAccessRange size"); // Mutex lock. struct EventLock { static constexpr uptr kStackIDLoBits = 15; + static constexpr uptr kStackIDHiBits = + sizeof(StackID) * kByteBits - kStackIDLoBits; + static constexpr uptr kUnusedBits = 3; + static_assert(kCompressedAddrBits + kStackIDLoBits + 5 == 64, + "unused bits in EventLock"); + static_assert(kCompressedAddrBits + kStackIDHiBits + kUnusedBits == 64, + "unused bits in EventLock"); u64 is_access : 1; // = 0 u64 is_func : 1; // = 0 @@ -157,29 +127,37 @@ struct EventLock { u64 pc : kCompressedAddrBits; u64 stack_lo : kStackIDLoBits; u64 stack_hi : sizeof(StackID) * kByteBits - kStackIDLoBits; - u64 _ : 3; + u64 _ : kUnusedBits; u64 addr : kCompressedAddrBits; }; static_assert(sizeof(EventLock) == 16, "bad EventLock size"); // Mutex unlock. struct EventUnlock { + static constexpr uptr kUnusedBits = 15; + static_assert(kCompressedAddrBits + kUnusedBits + 5 == 64, + "unused bits in EventUnlock"); + u64 is_access : 1; // = 0 u64 is_func : 1; // = 0 EventType type : 3; // = EventType::kUnlock - u64 _ : 15; + u64 _ : kUnusedBits; u64 addr : kCompressedAddrBits; }; static_assert(sizeof(EventUnlock) == 8, "bad EventUnlock size"); // Time change event. struct EventTime { + static constexpr uptr kUnusedBits = 37; + static_assert(kUnusedBits + sizeof(Sid) * kByteBits + kEpochBits + 5 == 64, + "unused bits in EventTime"); + u64 is_access : 1; // = 0 u64 is_func : 1; // = 0 EventType type : 3; // = EventType::kTime u64 sid : sizeof(Sid) * kByteBits; u64 epoch : kEpochBits; - u64 _ : 64 - 5 - sizeof(Sid) * kByteBits - kEpochBits; + u64 _ : kUnusedBits; }; static_assert(sizeof(EventTime) == 8, "bad EventTime size"); @@ -188,10 +166,12 @@ struct Trace; struct TraceHeader { Trace* trace = nullptr; // back-pointer to Trace containing this part INode trace_parts; // in Trace::parts + INode global; // in Contex::trace_part_recycle }; struct TracePart : TraceHeader { - static constexpr uptr kByteSize = 256 << 10; + // There are a lot of goroutines in Go, so we use smaller parts. + static constexpr uptr kByteSize = (SANITIZER_GO ? 128 : 256) << 10; static constexpr uptr kSize = (kByteSize - sizeof(TraceHeader)) / sizeof(Event); // TraceAcquire does a fast event pointer overflow check by comparing @@ -209,13 +189,26 @@ static_assert(sizeof(TracePart) == TracePart::kByteSize, "bad TracePart size"); struct Trace { Mutex mtx; IList<TraceHeader, &TraceHeader::trace_parts, TracePart> parts; - Event* final_pos = - nullptr; // final position in the last part for finished threads + // First node non-queued into ctx->trace_part_recycle. + TracePart* local_head; + // Final position in the last part for finished threads. + Event* final_pos = nullptr; + // Number of trace parts allocated on behalf of this trace specifically. + // Total number of parts in this trace can be larger if we retake some + // parts from other traces. + uptr parts_allocated = 0; Trace() : mtx(MutexTypeTrace) {} -}; -} // namespace v3 + // We need at least 3 parts per thread, because we want to keep at last + // 2 parts per thread that are not queued into ctx->trace_part_recycle + // (the current one being filled and one full part that ensures that + // we always have at least one part worth of previous memory accesses). + static constexpr uptr kMinParts = 3; + + static constexpr uptr kFinishedThreadLo = 16; + static constexpr uptr kFinishedThreadHi = 64; +}; } // namespace __tsan diff --git a/libsanitizer/tsan/tsan_update_shadow_word.inc b/libsanitizer/tsan/tsan_update_shadow_word.inc deleted file mode 100644 index a58ef0f..0000000 --- a/libsanitizer/tsan/tsan_update_shadow_word.inc +++ /dev/null @@ -1,59 +0,0 @@ -//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -// Body of the hottest inner loop. -// If we wrap this body into a function, compilers (both gcc and clang) -// produce sligtly less efficient code. -//===----------------------------------------------------------------------===// -do { - const unsigned kAccessSize = 1 << kAccessSizeLog; - u64 *sp = &shadow_mem[idx]; - old = LoadShadow(sp); - if (LIKELY(old.IsZero())) { - if (!stored) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - // is the memory access equal to the previous? - if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) { - // same thread? - if (LIKELY(Shadow::TidsAreEqual(old, cur))) { - if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - if (HappensBefore(old, thr)) { - if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))) - break; - goto RACE; - } - // Do the memory access intersect? - if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) { - if (Shadow::TidsAreEqual(old, cur)) - break; - if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)) - break; - if (LIKELY(HappensBefore(old, thr))) - break; - goto RACE; - } - // The accesses do not intersect. - break; -} while (0); |