From f838fa820f9271008617c345c477122d9e29a05c Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Mon, 5 Aug 2024 17:26:39 -0700 Subject: New ThreadPlanSingleThreadTimeout to resolve potential deadlock in single thread stepping (#90930) This PR introduces a new `ThreadPlanSingleThreadTimeout` that will be used to address potential deadlock during single-thread stepping. While debugging a target with a non-trivial number of threads (around 5000 threads in one example target), we noticed that a simple step over can take as long as 10 seconds. Enabling single-thread stepping mode significantly reduces the stepping time to around 3 seconds. However, this can introduce deadlock if we try to step over a method that depends on other threads to release a lock. To address this issue, we introduce a new `ThreadPlanSingleThreadTimeout` that can be controlled by the `target.process.thread.single-thread-plan-timeout` setting during single-thread stepping mode. The concept involves counting the elapsed time since the last internal stop to detect overall stepping progress. Once a timeout occurs, we assume the target is not making progress due to a potential deadlock, as mentioned above. We then send a new async interrupt, resume all threads, and `ThreadPlanSingleThreadTimeout` completes its task. To support this design, the major changes made in this PR are: 1. `ThreadPlanSingleThreadTimeout` is popped during every internal stop and reset (re-pushed) to the top of the stack (as a leaf node) during resume. This is achieved by always returning `true` from `ThreadPlanSingleThreadTimeout::DoPlanExplainsStop()` and `ThreadPlanSingleThreadTimeout::MischiefManaged()`. 2. A new thread-specific async interrupt stop is introduced, which can be detected/consumed by `ThreadPlanSingleThreadTimeout`. 3. The clearing of branch breakpoints in the range thread plan has been moved from `DoPlanExplainsStop()` to `ShouldStop()`, as it is not guaranteed that it will be called. The detailed design is discussed in the RFC below: [https://discourse.llvm.org/t/improve-single-thread-stepping/74599](https://discourse.llvm.org/t/improve-single-thread-stepping/74599) --------- Co-authored-by: jeffreytan81 --- .../Process/gdb-remote/ProcessGDBRemote.cpp | 61 ++++++++++++++++++---- 1 file changed, 50 insertions(+), 11 deletions(-) (limited to 'lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp') diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 604c923..6f9c2cc 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -1730,14 +1730,24 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo( thread_sp = memory_thread_sp; if (exc_type != 0) { - const size_t exc_data_size = exc_data.size(); - - thread_sp->SetStopInfo( - StopInfoMachException::CreateStopReasonWithMachException( - *thread_sp, exc_type, exc_data_size, - exc_data_size >= 1 ? exc_data[0] : 0, - exc_data_size >= 2 ? exc_data[1] : 0, - exc_data_size >= 3 ? exc_data[2] : 0)); + // For thread plan async interrupt, creating stop info on the + // original async interrupt request thread instead. If interrupt thread + // does not exist anymore we fallback to current signal receiving thread + // instead. + ThreadSP interrupt_thread; + if (m_interrupt_tid != LLDB_INVALID_THREAD_ID) + interrupt_thread = HandleThreadAsyncInterrupt(signo, description); + if (interrupt_thread) + thread_sp = interrupt_thread; + else { + const size_t exc_data_size = exc_data.size(); + thread_sp->SetStopInfo( + StopInfoMachException::CreateStopReasonWithMachException( + *thread_sp, exc_type, exc_data_size, + exc_data_size >= 1 ? exc_data[0] : 0, + exc_data_size >= 2 ? exc_data[1] : 0, + exc_data_size >= 3 ? exc_data[2] : 0)); + } } else { bool handled = false; bool did_exec = false; @@ -1936,9 +1946,20 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo( *thread_sp, signo, description.c_str())); } } - if (!handled) - thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithSignal( - *thread_sp, signo, description.c_str())); + if (!handled) { + // For thread plan async interrupt, creating stop info on the + // original async interrupt request thread instead. If interrupt + // thread does not exist anymore we fallback to current signal + // receiving thread instead. + ThreadSP interrupt_thread; + if (m_interrupt_tid != LLDB_INVALID_THREAD_ID) + interrupt_thread = HandleThreadAsyncInterrupt(signo, description); + if (interrupt_thread) + thread_sp = interrupt_thread; + else + thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithSignal( + *thread_sp, signo, description.c_str())); + } } if (!description.empty()) { @@ -1957,6 +1978,24 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo( return thread_sp; } +ThreadSP +ProcessGDBRemote::HandleThreadAsyncInterrupt(uint8_t signo, + const std::string &description) { + ThreadSP thread_sp; + { + std::lock_guard guard(m_thread_list_real.GetMutex()); + thread_sp = m_thread_list_real.FindThreadByProtocolID(m_interrupt_tid, + /*can_update=*/false); + } + if (thread_sp) + thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithInterrupt( + *thread_sp, signo, description.c_str())); + // Clear m_interrupt_tid regardless we can find original interrupt thread or + // not. + m_interrupt_tid = LLDB_INVALID_THREAD_ID; + return thread_sp; +} + lldb::ThreadSP ProcessGDBRemote::SetThreadStopInfo(StructuredData::Dictionary *thread_dict) { static constexpr llvm::StringLiteral g_key_tid("tid"); -- cgit v1.1