//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H /* cxa_guard_impl.h - Implements the C++ runtime support for function local * static guards. * The layout of the guard object is the same across ARM and Itanium. * * The first "guard byte" (which is checked by the compiler) is set only upon * the completion of cxa release. * * The second "init byte" does the rest of the bookkeeping. It tracks if * initialization is complete or pending, and if there are waiting threads. * * If the guard variable is 64-bits and the platforms supplies a 32-bit thread * identifier, it is used to detect recursive initialization. The thread ID of * the thread currently performing initialization is stored in the second word. * * Guard Object Layout: * ------------------------------------------------------------------------- * |a: guard byte | a+1: init byte | a+2 : unused ... | a+4: thread-id ... | * ------------------------------------------------------------------------ * * Access Protocol: * For each implementation the guard byte is checked and set before accessing * the init byte. * * Overall Design: * The implementation was designed to allow each implementation to be tested * independent of the C++ runtime or platform support. * */ #include "__cxxabi_config.h" #include "include/atomic_support.h" #include #include #if defined(__has_include) # if __has_include() # include # endif #endif #include #include <__threading_support> #ifndef _LIBCXXABI_HAS_NO_THREADS #if defined(__unix__) && defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA) #pragma comment(lib, "pthread") #endif #endif // To make testing possible, this header is included from both cxa_guard.cpp // and a number of tests. // // For this reason we place everything in an anonymous namespace -- even though // we're in a header. We want the actual implementation and the tests to have // unique definitions of the types in this header (since the tests may depend // on function local statics). // // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be // defined when including this file. Only `src/cxa_guard.cpp` should define // the former. #ifdef BUILDING_CXA_GUARD # include "abort_message.h" # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__) #elif defined(TESTING_CXA_GUARD) # define ABORT_WITH_MESSAGE(...) ::abort() #else # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined" #endif #if __has_feature(thread_sanitizer) extern "C" void __tsan_acquire(void*); extern "C" void __tsan_release(void*); #else #define __tsan_acquire(addr) ((void)0) #define __tsan_release(addr) ((void)0) #endif namespace __cxxabiv1 { // Use an anonymous namespace to ensure that the tests and actual implementation // have unique definitions of these symbols. namespace { //===----------------------------------------------------------------------===// // Misc Utilities //===----------------------------------------------------------------------===// template struct LazyValue { LazyValue() : is_init(false) {} T& get() { if (!is_init) { value = Init(); is_init = true; } return value; } private: T value; bool is_init = false; }; //===----------------------------------------------------------------------===// // PlatformGetThreadID //===----------------------------------------------------------------------===// #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) uint32_t PlatformThreadID() { static_assert(sizeof(mach_port_t) == sizeof(uint32_t), ""); return static_cast( pthread_mach_thread_np(std::__libcpp_thread_get_current_id())); } #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) uint32_t PlatformThreadID() { static_assert(sizeof(pid_t) == sizeof(uint32_t), ""); return static_cast(syscall(SYS_gettid)); } #else constexpr uint32_t (*PlatformThreadID)() = nullptr; #endif constexpr bool PlatformSupportsThreadID() { #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wtautological-pointer-compare" #endif return +PlatformThreadID != nullptr; #ifdef __clang__ #pragma clang diagnostic pop #endif } //===----------------------------------------------------------------------===// // GuardBase //===----------------------------------------------------------------------===// enum class AcquireResult { INIT_IS_DONE, INIT_IS_PENDING, }; constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; static constexpr uint8_t UNSET = 0; static constexpr uint8_t COMPLETE_BIT = (1 << 0); static constexpr uint8_t PENDING_BIT = (1 << 1); static constexpr uint8_t WAITING_BIT = (1 << 2); template struct GuardObject { GuardObject() = delete; GuardObject(GuardObject const&) = delete; GuardObject& operator=(GuardObject const&) = delete; explicit GuardObject(uint32_t* g) : base_address(g), guard_byte_address(reinterpret_cast(g)), init_byte_address(reinterpret_cast(g) + 1), thread_id_address(nullptr) {} explicit GuardObject(uint64_t* g) : base_address(g), guard_byte_address(reinterpret_cast(g)), init_byte_address(reinterpret_cast(g) + 1), thread_id_address(reinterpret_cast(g) + 1) {} public: /// Implements __cxa_guard_acquire AcquireResult cxa_guard_acquire() { AtomicInt guard_byte(guard_byte_address); if (guard_byte.load(std::_AO_Acquire) == COMPLETE_BIT) return INIT_IS_DONE; return derived()->acquire_init_byte(); } /// Implements __cxa_guard_release void cxa_guard_release() { AtomicInt guard_byte(guard_byte_address); // Store complete first, so that when release wakes other folks, they see // it as having been completed. guard_byte.store(COMPLETE_BIT, std::_AO_Release); derived()->release_init_byte(); } /// Implements __cxa_guard_abort void cxa_guard_abort() { derived()->abort_init_byte(); } public: /// base_address - the address of the original guard object. void* const base_address; /// The address of the guord byte at offset 0. uint8_t* const guard_byte_address; /// The address of the byte used by the implementation during initialization. uint8_t* const init_byte_address; /// An optional address storing an identifier for the thread performing initialization. /// It's used to detect recursive initialization. uint32_t* const thread_id_address; private: Derived* derived() { return static_cast(this); } }; //===----------------------------------------------------------------------===// // Single Threaded Implementation //===----------------------------------------------------------------------===// struct InitByteNoThreads : GuardObject { using GuardObject::GuardObject; AcquireResult acquire_init_byte() { if (*init_byte_address == COMPLETE_BIT) return INIT_IS_DONE; if (*init_byte_address & PENDING_BIT) ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); *init_byte_address = PENDING_BIT; return INIT_IS_PENDING; } void release_init_byte() { *init_byte_address = COMPLETE_BIT; } void abort_init_byte() { *init_byte_address = UNSET; } }; //===----------------------------------------------------------------------===// // Global Mutex Implementation //===----------------------------------------------------------------------===// struct LibcppMutex; struct LibcppCondVar; #ifndef _LIBCXXABI_HAS_NO_THREADS struct LibcppMutex { LibcppMutex() = default; LibcppMutex(LibcppMutex const&) = delete; LibcppMutex& operator=(LibcppMutex const&) = delete; bool lock() { return std::__libcpp_mutex_lock(&mutex); } bool unlock() { return std::__libcpp_mutex_unlock(&mutex); } private: friend struct LibcppCondVar; std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER; }; struct LibcppCondVar { LibcppCondVar() = default; LibcppCondVar(LibcppCondVar const&) = delete; LibcppCondVar& operator=(LibcppCondVar const&) = delete; bool wait(LibcppMutex& mut) { return std::__libcpp_condvar_wait(&cond, &mut.mutex); } bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); } private: std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER; }; #else struct LibcppMutex {}; struct LibcppCondVar {}; #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) template struct InitByteGlobalMutex : GuardObject> { using BaseT = typename InitByteGlobalMutex::GuardObject; using BaseT::BaseT; explicit InitByteGlobalMutex(uint32_t *g) : BaseT(g), has_thread_id_support(false) {} explicit InitByteGlobalMutex(uint64_t *g) : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {} public: AcquireResult acquire_init_byte() { LockGuard g("__cxa_guard_acquire"); // Check for possible recursive initialization. if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { if (*thread_id_address == current_thread_id.get()) ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); } // Wait until the pending bit is not set. while (*init_byte_address & PENDING_BIT) { *init_byte_address |= WAITING_BIT; global_cond.wait(global_mutex); } if (*init_byte_address == COMPLETE_BIT) return INIT_IS_DONE; if (has_thread_id_support) *thread_id_address = current_thread_id.get(); *init_byte_address = PENDING_BIT; return INIT_IS_PENDING; } void release_init_byte() { bool has_waiting; { LockGuard g("__cxa_guard_release"); has_waiting = *init_byte_address & WAITING_BIT; *init_byte_address = COMPLETE_BIT; } if (has_waiting) { if (global_cond.broadcast()) { ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release"); } } } void abort_init_byte() { bool has_waiting; { LockGuard g("__cxa_guard_abort"); if (has_thread_id_support) *thread_id_address = 0; has_waiting = *init_byte_address & WAITING_BIT; *init_byte_address = UNSET; } if (has_waiting) { if (global_cond.broadcast()) { ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort"); } } } private: using BaseT::init_byte_address; using BaseT::thread_id_address; const bool has_thread_id_support; LazyValue current_thread_id; private: struct LockGuard { LockGuard() = delete; LockGuard(LockGuard const&) = delete; LockGuard& operator=(LockGuard const&) = delete; explicit LockGuard(const char* calling_func) : calling_func(calling_func) { if (global_mutex.lock()) ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func); } ~LockGuard() { if (global_mutex.unlock()) ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func); } private: const char* const calling_func; }; }; //===----------------------------------------------------------------------===// // Futex Implementation //===----------------------------------------------------------------------===// #if defined(SYS_futex) void PlatformFutexWait(int* addr, int expect) { constexpr int WAIT = 0; syscall(SYS_futex, addr, WAIT, expect, 0); __tsan_acquire(addr); } void PlatformFutexWake(int* addr) { constexpr int WAKE = 1; __tsan_release(addr); syscall(SYS_futex, addr, WAKE, INT_MAX); } #else constexpr void (*PlatformFutexWait)(int*, int) = nullptr; constexpr void (*PlatformFutexWake)(int*) = nullptr; #endif constexpr bool PlatformSupportsFutex() { #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wtautological-pointer-compare" #endif return +PlatformFutexWait != nullptr; #ifdef __clang__ #pragma clang diagnostic pop #endif } /// InitByteFutex - Manages initialization using atomics and the futex syscall /// for waiting and waking. template struct InitByteFutex : GuardObject> { using BaseT = typename InitByteFutex::GuardObject; /// ARM Constructor explicit InitByteFutex(uint32_t *g) : BaseT(g), init_byte(this->init_byte_address), has_thread_id_support(this->thread_id_address && GetThreadIDArg), thread_id(this->thread_id_address) {} /// Itanium Constructor explicit InitByteFutex(uint64_t *g) : BaseT(g), init_byte(this->init_byte_address), has_thread_id_support(this->thread_id_address && GetThreadIDArg), thread_id(this->thread_id_address) {} public: AcquireResult acquire_init_byte() { while (true) { uint8_t last_val = UNSET; if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, std::_AO_Acquire)) { if (has_thread_id_support) { thread_id.store(current_thread_id.get(), std::_AO_Relaxed); } return INIT_IS_PENDING; } if (last_val == COMPLETE_BIT) return INIT_IS_DONE; if (last_val & PENDING_BIT) { // Check for recursive initialization if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) { ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); } if ((last_val & WAITING_BIT) == 0) { // This compare exchange can fail for several reasons // (1) another thread finished the whole thing before we got here // (2) another thread set the waiting bit we were trying to thread // (3) another thread had an exception and failed to finish if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, std::_AO_Acq_Rel, std::_AO_Release)) { // (1) success, via someone else's work! if (last_val == COMPLETE_BIT) return INIT_IS_DONE; // (3) someone else, bailed on doing the work, retry from the start! if (last_val == UNSET) continue; // (2) the waiting bit got set, so we are happy to keep waiting } } wait_on_initialization(); } } } void release_init_byte() { uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel); if (old & WAITING_BIT) wake_all(); } void abort_init_byte() { if (has_thread_id_support) thread_id.store(0, std::_AO_Relaxed); uint8_t old = init_byte.exchange(0, std::_AO_Acq_Rel); if (old & WAITING_BIT) wake_all(); } private: /// Use the futex to wait on the current guard variable. Futex expects a /// 32-bit 4-byte aligned address as the first argument, so we have to use use /// the base address of the guard variable (not the init byte). void wait_on_initialization() { Wait(static_cast(this->base_address), expected_value_for_futex(PENDING_BIT | WAITING_BIT)); } void wake_all() { Wake(static_cast(this->base_address)); } private: AtomicInt init_byte; const bool has_thread_id_support; // Unsafe to use unless has_thread_id_support AtomicInt thread_id; LazyValue current_thread_id; /// Create the expected integer value for futex `wait(int* addr, int expected)`. /// We pass the base address as the first argument, So this function creates /// an zero-initialized integer with `b` copied at the correct offset. static int expected_value_for_futex(uint8_t b) { int dest_val = 0; std::memcpy(reinterpret_cast(&dest_val) + 1, &b, 1); return dest_val; } static_assert(Wait != nullptr && Wake != nullptr, ""); }; //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// template struct GlobalStatic { static T instance; }; template _LIBCPP_SAFE_STATIC T GlobalStatic::instance = {}; enum class Implementation { NoThreads, GlobalLock, Futex }; template struct SelectImplementation; template <> struct SelectImplementation { using type = InitByteNoThreads; }; template <> struct SelectImplementation { using type = InitByteGlobalMutex< LibcppMutex, LibcppCondVar, GlobalStatic::instance, GlobalStatic::instance, PlatformThreadID>; }; template <> struct SelectImplementation { using type = InitByteFutex; }; // TODO(EricWF): We should prefer the futex implementation when available. But // it should be done in a separate step from adding the implementation. constexpr Implementation CurrentImplementation = #if defined(_LIBCXXABI_HAS_NO_THREADS) Implementation::NoThreads; #elif defined(_LIBCXXABI_USE_FUTEX) Implementation::Futex; #else Implementation::GlobalLock; #endif static_assert(CurrentImplementation != Implementation::Futex || PlatformSupportsFutex(), "Futex selected but not supported"); using SelectedImplementation = SelectImplementation::type; } // end namespace } // end namespace __cxxabiv1 #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H