aboutsummaryrefslogtreecommitdiff
path: root/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
diff options
context:
space:
mode:
authorjeffreytan81 <jeffreytan@meta.com>2024-08-05 17:26:39 -0700
committerGitHub <noreply@github.com>2024-08-05 17:26:39 -0700
commitf838fa820f9271008617c345c477122d9e29a05c (patch)
tree17e83bb61d8354b9502fa2e0935d7a9360bf7892 /lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
parent84cc1865ef9202af39404ff4524a9b13df80cfc1 (diff)
downloadllvm-f838fa820f9271008617c345c477122d9e29a05c.zip
llvm-f838fa820f9271008617c345c477122d9e29a05c.tar.gz
llvm-f838fa820f9271008617c345c477122d9e29a05c.tar.bz2
New ThreadPlanSingleThreadTimeout to resolve potential deadlock in single thread stepping (#90930)
This PR introduces a new `ThreadPlanSingleThreadTimeout` that will be used to address potential deadlock during single-thread stepping. While debugging a target with a non-trivial number of threads (around 5000 threads in one example target), we noticed that a simple step over can take as long as 10 seconds. Enabling single-thread stepping mode significantly reduces the stepping time to around 3 seconds. However, this can introduce deadlock if we try to step over a method that depends on other threads to release a lock. To address this issue, we introduce a new `ThreadPlanSingleThreadTimeout` that can be controlled by the `target.process.thread.single-thread-plan-timeout` setting during single-thread stepping mode. The concept involves counting the elapsed time since the last internal stop to detect overall stepping progress. Once a timeout occurs, we assume the target is not making progress due to a potential deadlock, as mentioned above. We then send a new async interrupt, resume all threads, and `ThreadPlanSingleThreadTimeout` completes its task. To support this design, the major changes made in this PR are: 1. `ThreadPlanSingleThreadTimeout` is popped during every internal stop and reset (re-pushed) to the top of the stack (as a leaf node) during resume. This is achieved by always returning `true` from `ThreadPlanSingleThreadTimeout::DoPlanExplainsStop()` and `ThreadPlanSingleThreadTimeout::MischiefManaged()`. 2. A new thread-specific async interrupt stop is introduced, which can be detected/consumed by `ThreadPlanSingleThreadTimeout`. 3. The clearing of branch breakpoints in the range thread plan has been moved from `DoPlanExplainsStop()` to `ShouldStop()`, as it is not guaranteed that it will be called. The detailed design is discussed in the RFC below: [https://discourse.llvm.org/t/improve-single-thread-stepping/74599](https://discourse.llvm.org/t/improve-single-thread-stepping/74599) --------- Co-authored-by: jeffreytan81 <jeffreytan@fb.com>
Diffstat (limited to 'lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp')
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp61
1 files changed, 50 insertions, 11 deletions
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 604c923..6f9c2cc 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -1730,14 +1730,24 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
thread_sp = memory_thread_sp;
if (exc_type != 0) {
- const size_t exc_data_size = exc_data.size();
-
- thread_sp->SetStopInfo(
- StopInfoMachException::CreateStopReasonWithMachException(
- *thread_sp, exc_type, exc_data_size,
- exc_data_size >= 1 ? exc_data[0] : 0,
- exc_data_size >= 2 ? exc_data[1] : 0,
- exc_data_size >= 3 ? exc_data[2] : 0));
+ // For thread plan async interrupt, creating stop info on the
+ // original async interrupt request thread instead. If interrupt thread
+ // does not exist anymore we fallback to current signal receiving thread
+ // instead.
+ ThreadSP interrupt_thread;
+ if (m_interrupt_tid != LLDB_INVALID_THREAD_ID)
+ interrupt_thread = HandleThreadAsyncInterrupt(signo, description);
+ if (interrupt_thread)
+ thread_sp = interrupt_thread;
+ else {
+ const size_t exc_data_size = exc_data.size();
+ thread_sp->SetStopInfo(
+ StopInfoMachException::CreateStopReasonWithMachException(
+ *thread_sp, exc_type, exc_data_size,
+ exc_data_size >= 1 ? exc_data[0] : 0,
+ exc_data_size >= 2 ? exc_data[1] : 0,
+ exc_data_size >= 3 ? exc_data[2] : 0));
+ }
} else {
bool handled = false;
bool did_exec = false;
@@ -1936,9 +1946,20 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
*thread_sp, signo, description.c_str()));
}
}
- if (!handled)
- thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithSignal(
- *thread_sp, signo, description.c_str()));
+ if (!handled) {
+ // For thread plan async interrupt, creating stop info on the
+ // original async interrupt request thread instead. If interrupt
+ // thread does not exist anymore we fallback to current signal
+ // receiving thread instead.
+ ThreadSP interrupt_thread;
+ if (m_interrupt_tid != LLDB_INVALID_THREAD_ID)
+ interrupt_thread = HandleThreadAsyncInterrupt(signo, description);
+ if (interrupt_thread)
+ thread_sp = interrupt_thread;
+ else
+ thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithSignal(
+ *thread_sp, signo, description.c_str()));
+ }
}
if (!description.empty()) {
@@ -1957,6 +1978,24 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
return thread_sp;
}
+ThreadSP
+ProcessGDBRemote::HandleThreadAsyncInterrupt(uint8_t signo,
+ const std::string &description) {
+ ThreadSP thread_sp;
+ {
+ std::lock_guard<std::recursive_mutex> guard(m_thread_list_real.GetMutex());
+ thread_sp = m_thread_list_real.FindThreadByProtocolID(m_interrupt_tid,
+ /*can_update=*/false);
+ }
+ if (thread_sp)
+ thread_sp->SetStopInfo(StopInfo::CreateStopReasonWithInterrupt(
+ *thread_sp, signo, description.c_str()));
+ // Clear m_interrupt_tid regardless we can find original interrupt thread or
+ // not.
+ m_interrupt_tid = LLDB_INVALID_THREAD_ID;
+ return thread_sp;
+}
+
lldb::ThreadSP
ProcessGDBRemote::SetThreadStopInfo(StructuredData::Dictionary *thread_dict) {
static constexpr llvm::StringLiteral g_key_tid("tid");