diff options
author | jeffreytan81 <jeffreytan@meta.com> | 2024-08-05 17:26:39 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-05 17:26:39 -0700 |
commit | f838fa820f9271008617c345c477122d9e29a05c (patch) | |
tree | 17e83bb61d8354b9502fa2e0935d7a9360bf7892 /lldb/source/Target/ThreadPlanStepRange.cpp | |
parent | 84cc1865ef9202af39404ff4524a9b13df80cfc1 (diff) | |
download | llvm-f838fa820f9271008617c345c477122d9e29a05c.zip llvm-f838fa820f9271008617c345c477122d9e29a05c.tar.gz llvm-f838fa820f9271008617c345c477122d9e29a05c.tar.bz2 |
New ThreadPlanSingleThreadTimeout to resolve potential deadlock in single thread stepping (#90930)
This PR introduces a new `ThreadPlanSingleThreadTimeout` that will be
used to address potential deadlock during single-thread stepping.
While debugging a target with a non-trivial number of threads (around
5000 threads in one example target), we noticed that a simple step over
can take as long as 10 seconds. Enabling single-thread stepping mode
significantly reduces the stepping time to around 3 seconds. However,
this can introduce deadlock if we try to step over a method that depends
on other threads to release a lock.
To address this issue, we introduce a new
`ThreadPlanSingleThreadTimeout` that can be controlled by the
`target.process.thread.single-thread-plan-timeout` setting during
single-thread stepping mode. The concept involves counting the elapsed
time since the last internal stop to detect overall stepping progress.
Once a timeout occurs, we assume the target is not making progress due
to a potential deadlock, as mentioned above. We then send a new async
interrupt, resume all threads, and `ThreadPlanSingleThreadTimeout`
completes its task.
To support this design, the major changes made in this PR are:
1. `ThreadPlanSingleThreadTimeout` is popped during every internal stop
and reset (re-pushed) to the top of the stack (as a leaf node) during
resume. This is achieved by always returning `true` from
`ThreadPlanSingleThreadTimeout::DoPlanExplainsStop()` and
`ThreadPlanSingleThreadTimeout::MischiefManaged()`.
2. A new thread-specific async interrupt stop is introduced, which can
be detected/consumed by `ThreadPlanSingleThreadTimeout`.
3. The clearing of branch breakpoints in the range thread plan has been
moved from `DoPlanExplainsStop()` to `ShouldStop()`, as it is not
guaranteed that it will be called.
The detailed design is discussed in the RFC below:
[https://discourse.llvm.org/t/improve-single-thread-stepping/74599](https://discourse.llvm.org/t/improve-single-thread-stepping/74599)
---------
Co-authored-by: jeffreytan81 <jeffreytan@fb.com>
Diffstat (limited to 'lldb/source/Target/ThreadPlanStepRange.cpp')
-rw-r--r-- | lldb/source/Target/ThreadPlanStepRange.cpp | 71 |
1 files changed, 45 insertions, 26 deletions
diff --git a/lldb/source/Target/ThreadPlanStepRange.cpp b/lldb/source/Target/ThreadPlanStepRange.cpp index 801856b..3c82505 100644 --- a/lldb/source/Target/ThreadPlanStepRange.cpp +++ b/lldb/source/Target/ThreadPlanStepRange.cpp @@ -293,6 +293,20 @@ InstructionList *ThreadPlanStepRange::GetInstructionsForAddress( return nullptr; } +bool ThreadPlanStepRange::IsNextBranchBreakpointStop(StopInfoSP stop_info_sp) { + if (!m_next_branch_bp_sp) + return false; + + break_id_t bp_site_id = stop_info_sp->GetValue(); + BreakpointSiteSP bp_site_sp = + m_process.GetBreakpointSiteList().FindByID(bp_site_id); + if (!bp_site_sp) + return false; + else if (!bp_site_sp->IsBreakpointAtThisSite(m_next_branch_bp_sp->GetID())) + return false; + return true; +} + void ThreadPlanStepRange::ClearNextBranchBreakpoint() { if (m_next_branch_bp_sp) { Log *log = GetLog(LLDBLog::Step); @@ -305,6 +319,11 @@ void ThreadPlanStepRange::ClearNextBranchBreakpoint() { } } +void ThreadPlanStepRange::ClearNextBranchBreakpointExplainedStop() { + if (IsNextBranchBreakpointStop(GetPrivateStopInfo())) + ClearNextBranchBreakpoint(); +} + bool ThreadPlanStepRange::SetNextBranchBreakpoint() { if (m_next_branch_bp_sp) return true; @@ -347,7 +366,9 @@ bool ThreadPlanStepRange::SetNextBranchBreakpoint() { run_to_address = instructions->GetInstructionAtIndex(branch_index)->GetAddress(); } - + if (branch_index == pc_index) + LLDB_LOGF(log, "ThreadPlanStepRange::SetNextBranchBreakpoint - skipping " + "because current is branch instruction"); if (run_to_address.IsValid()) { const bool is_internal = true; m_next_branch_bp_sp = @@ -381,15 +402,16 @@ bool ThreadPlanStepRange::SetNextBranchBreakpoint() { return true; } else return false; - } + } else + LLDB_LOGF(log, "ThreadPlanStepRange::SetNextBranchBreakpoint - skipping " + "invalid run_to_address"); } return false; } bool ThreadPlanStepRange::NextRangeBreakpointExplainsStop( lldb::StopInfoSP stop_info_sp) { - Log *log = GetLog(LLDBLog::Step); - if (!m_next_branch_bp_sp) + if (!IsNextBranchBreakpointStop(stop_info_sp)) return false; break_id_t bp_site_id = stop_info_sp->GetValue(); @@ -397,30 +419,27 @@ bool ThreadPlanStepRange::NextRangeBreakpointExplainsStop( m_process.GetBreakpointSiteList().FindByID(bp_site_id); if (!bp_site_sp) return false; - else if (!bp_site_sp->IsBreakpointAtThisSite(m_next_branch_bp_sp->GetID())) - return false; - else { - // If we've hit the next branch breakpoint, then clear it. - size_t num_constituents = bp_site_sp->GetNumberOfConstituents(); - bool explains_stop = true; - // If all the constituents are internal, then we are probably just stepping - // over this range from multiple threads, or multiple frames, so we want to - // continue. If one is not internal, then we should not explain the stop, - // and let the user breakpoint handle the stop. - for (size_t i = 0; i < num_constituents; i++) { - if (!bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint().IsInternal()) { - explains_stop = false; - break; - } + + // If we've hit the next branch breakpoint, then clear it. + size_t num_constituents = bp_site_sp->GetNumberOfConstituents(); + bool explains_stop = true; + // If all the constituents are internal, then we are probably just stepping + // over this range from multiple threads, or multiple frames, so we want to + // continue. If one is not internal, then we should not explain the stop, + // and let the user breakpoint handle the stop. + for (size_t i = 0; i < num_constituents; i++) { + if (!bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint().IsInternal()) { + explains_stop = false; + break; } - LLDB_LOGF(log, - "ThreadPlanStepRange::NextRangeBreakpointExplainsStop - Hit " - "next range breakpoint which has %" PRIu64 - " constituents - explains stop: %u.", - (uint64_t)num_constituents, explains_stop); - ClearNextBranchBreakpoint(); - return explains_stop; } + Log *log = GetLog(LLDBLog::Step); + LLDB_LOGF(log, + "ThreadPlanStepRange::NextRangeBreakpointExplainsStop - Hit " + "next range breakpoint which has %" PRIu64 + " constituents - explains stop: %u.", + (uint64_t)num_constituents, explains_stop); + return explains_stop; } bool ThreadPlanStepRange::WillStop() { return true; } |