diff options
author | Pedro Alves <palves@redhat.com> | 2014-05-29 12:50:48 +0100 |
---|---|---|
committer | Pedro Alves <palves@redhat.com> | 2014-05-29 12:50:48 +0100 |
commit | 8817a6f225766029787b5e2c1300a342b328909e (patch) | |
tree | ddd4b202fdfc37f17c2619576b1b71592c622daa /gdb/linux-nat.c | |
parent | 251bde03baf93dbb44d3785e09e03179916143e3 (diff) | |
download | gdb-8817a6f225766029787b5e2c1300a342b328909e.zip gdb-8817a6f225766029787b5e2c1300a342b328909e.tar.gz gdb-8817a6f225766029787b5e2c1300a342b328909e.tar.bz2 |
PR gdb/15713 - errors from i386_linux_resume lead to lock-up
linux_nat_resume is not considering that linux_ops->to_resume may throw:
/* Mark LWP as not stopped to prevent it from being continued by
linux_nat_resume_callback. */
lp->stopped = 0;
if (resume_many)
iterate_over_lwps (ptid, linux_nat_resume_callback, NULL);
If something within linux_nat_resume_callback throws, GDB leaves the
lwp_info as if the inferior was resumed, while it actually wasn't.
A couple examples, there are possibly others:
- i386_linux_resume calls target_read which calls QUIT.
- if the actual ptrace resumption fails in inf_ptrace_resume,
perror_with_name is called.
If the user tries to kill the inferior at this point (or quit, which
offers to kill), GDB locks up trying to stop the lwp -- if it is
already stopped no new waitpid event gets generated for it.
Fix this by setting the stopped flag earlier, as soon as we collect a
stop event with waitpid, and clearing it always only after resuming
the lwp successfully.
Tested on x86_64 Fedora 20. Confirmed the lock-up disappears using a
local hack that forces an error in inf_ptrace_resume.
Also fixes a little "set debug lin-lwp" annoyance. Currently we always see:
Continuing.
LLR: Preparing to resume process 6802, 0, inferior_ptid Thread 0x7ffff7fc7740 (LWP 6802)
^^^^^^^^
RC: Resuming sibling Thread 0x7ffff77c5700 (LWP 6807), 0, resume
RC: Resuming sibling Thread 0x7ffff7fc6700 (LWP 6806), 0, resume
RC: Not resuming sibling Thread 0x7ffff7fc7740 (LWP 6802) (not stopped)
^^^^^^^^^^^^^^^^^^^^^^^
LLR: PTRACE_CONT process 6802, 0 (resume event thread)
This patch gets rid of the "Not resuming sibling" line.
2014-05-29 Pedro Alves <palves@redhat.com>
PR gdb/15713
* linux-nat.c (linux_nat_resume_callback): Rename the second
parameter to 'except'. Skip LP if it points to EXCEPT.
(linux_nat_resume): Don't mark the event lwp as not stopped
before resuming sibling lwps. Instead ask
linux_nat_resume_callback to skip the event lwp. Mark it as not
stopped after actually resuming it.
(linux_handle_syscall_trap): Mark the lwp as not stopped after
resuming it.
(wait_lwp): Mark the lwp as stopped here.
(stop_wait_callback): Mark the lwp as not stopped right after
resuming it. Don't mark lwps as stopped here.
(linux_nat_filter_event): Mark the lwp as stopped earlier.
(linux_nat_wait_1): Don't mark dead lwps as stopped here.
Diffstat (limited to 'gdb/linux-nat.c')
-rw-r--r-- | gdb/linux-nat.c | 41 |
1 files changed, 18 insertions, 23 deletions
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c index 767dcc9..3f6e5ea 100644 --- a/gdb/linux-nat.c +++ b/gdb/linux-nat.c @@ -1647,13 +1647,17 @@ resume_lwp (struct lwp_info *lp, int step, enum gdb_signal signo) } } -/* Resume LWP, with the last stop signal, if it is in pass state. */ +/* Callback for iterate_over_lwps. If LWP is EXCEPT, do nothing. + Resume LWP with the last stop signal, if it is in pass state. */ static int -linux_nat_resume_callback (struct lwp_info *lp, void *data) +linux_nat_resume_callback (struct lwp_info *lp, void *except) { enum gdb_signal signo = GDB_SIGNAL_0; + if (lp == except) + return 0; + if (lp->stopped) { struct thread_info *thread; @@ -1769,12 +1773,8 @@ linux_nat_resume (struct target_ops *ops, return; } - /* Mark LWP as not stopped to prevent it from being continued by - linux_nat_resume_callback. */ - lp->stopped = 0; - if (resume_many) - iterate_over_lwps (ptid, linux_nat_resume_callback, NULL); + iterate_over_lwps (ptid, linux_nat_resume_callback, lp); /* Convert to something the lower layer understands. */ ptid = pid_to_ptid (ptid_get_lwp (lp->ptid)); @@ -1783,6 +1783,7 @@ linux_nat_resume (struct target_ops *ops, linux_nat_prepare_to_resume (lp); linux_ops->to_resume (linux_ops, ptid, step, signo); lp->stopped_by_watchpoint = 0; + lp->stopped = 0; if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, @@ -1869,6 +1870,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping) lp->syscall_state = TARGET_WAITKIND_IGNORE; ptrace (PTRACE_CONT, ptid_get_lwp (lp->ptid), 0, 0); + lp->stopped = 0; return 1; } @@ -1952,6 +1954,7 @@ linux_handle_syscall_trap (struct lwp_info *lp, int stopping) linux_nat_prepare_to_resume (lp); linux_ops->to_resume (linux_ops, pid_to_ptid (ptid_get_lwp (lp->ptid)), lp->step, GDB_SIGNAL_0); + lp->stopped = 0; return 1; } @@ -2161,7 +2164,7 @@ linux_handle_extended_wait (struct lwp_info *lp, int status, linux_ops->to_resume (linux_ops, pid_to_ptid (ptid_get_lwp (lp->ptid)), 0, GDB_SIGNAL_0); - + lp->stopped = 0; return 1; } @@ -2316,6 +2319,7 @@ wait_lwp (struct lwp_info *lp) } gdb_assert (WIFSTOPPED (status)); + lp->stopped = 1; /* Handle GNU/Linux's syscall SIGTRAPs. */ if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP) @@ -2569,6 +2573,7 @@ stop_wait_callback (struct lwp_info *lp, void *data) errno = 0; ptrace (PTRACE_CONT, ptid_get_lwp (lp->ptid), 0, 0); + lp->stopped = 0; if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, "PTRACE_CONT %s, 0, 0 (%s) " @@ -2595,9 +2600,7 @@ stop_wait_callback (struct lwp_info *lp, void *data) /* Save the sigtrap event. */ lp->status = status; - gdb_assert (!lp->stopped); gdb_assert (lp->signalled); - lp->stopped = 1; } else { @@ -2609,8 +2612,6 @@ stop_wait_callback (struct lwp_info *lp, void *data) "SWC: Delayed SIGSTOP caught for %s.\n", target_pid_to_str (lp->ptid)); - lp->stopped = 1; - /* Reset SIGNALLED only after the stop_wait_callback call above as it does gdb_assert on SIGNALLED. */ lp->signalled = 0; @@ -2938,6 +2939,10 @@ linux_nat_filter_event (int lwpid, int status, int *new_pending_p) if (!WIFSTOPPED (status) && !lp) return NULL; + /* This LWP is stopped now. (And if dead, this prevents it from + ever being continued.) */ + lp->stopped = 1; + /* Handle GNU/Linux's syscall SIGTRAPs. */ if (WIFSTOPPED (status) && WSTOPSIG (status) == SYSCALL_SIGTRAP) { @@ -2980,7 +2985,6 @@ linux_nat_filter_event (int lwpid, int status, int *new_pending_p) used. */ if (ptid_get_pid (lp->ptid) == ptid_get_lwp (lp->ptid)) { - lp->stopped = 1; iterate_over_lwps (pid_to_ptid (ptid_get_pid (lp->ptid)), stop_and_resume_callback, new_pending_p); } @@ -3325,13 +3329,9 @@ retry: " cancelled it\n", ptid_get_lwp (lp->ptid)); } - lp->stopped = 1; } else - { - lp->stopped = 1; - lp->signalled = 0; - } + lp->signalled = 0; } else if (WIFEXITED (lp->status) || WIFSIGNALED (lp->status)) { @@ -3348,11 +3348,6 @@ retry: pending for the next time we're able to report it. */ - /* Prevent trying to stop this thread again. We'll - never try to resume it because it has a pending - status. */ - lp->stopped = 1; - /* Dead LWP's aren't expected to reported a pending sigstop. */ lp->signalled = 0; |