diff options
Diffstat (limited to 'gdb/linux-nat.c')
-rw-r--r-- | gdb/linux-nat.c | 450 |
1 files changed, 92 insertions, 358 deletions
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c index 95192f6..af1b764 100644 --- a/gdb/linux-nat.c +++ b/gdb/linux-nat.c @@ -24,10 +24,8 @@ #include "nat/linux-nat.h" #include "nat/linux-waitpid.h" #include "gdb_wait.h" -#ifdef HAVE_TKILL_SYSCALL #include <unistd.h> #include <sys/syscall.h> -#endif #include "nat/gdb_ptrace.h" #include "linux-nat.h" #include "nat/linux-ptrace.h" @@ -78,29 +76,35 @@ Waiting for events in sync mode =============================== -When waiting for an event in a specific thread, we just use waitpid, passing -the specific pid, and not passing WNOHANG. - -When waiting for an event in all threads, waitpid is not quite good. Prior to -version 2.4, Linux can either wait for event in main thread, or in secondary -threads. (2.4 has the __WALL flag). So, if we use blocking waitpid, we might -miss an event. The solution is to use non-blocking waitpid, together with -sigsuspend. First, we use non-blocking waitpid to get an event in the main -process, if any. Second, we use non-blocking waitpid with the __WCLONED -flag to check for events in cloned processes. If nothing is found, we use -sigsuspend to wait for SIGCHLD. When SIGCHLD arrives, it means something -happened to a child process -- and SIGCHLD will be delivered both for events -in main debugged process and in cloned processes. As soon as we know there's -an event, we get back to calling nonblocking waitpid with and without -__WCLONED. - -Note that SIGCHLD should be blocked between waitpid and sigsuspend calls, -so that we don't miss a signal. If SIGCHLD arrives in between, when it's -blocked, the signal becomes pending and sigsuspend immediately -notices it and returns. - -Waiting for events in async mode -================================ +When waiting for an event in a specific thread, we just use waitpid, +passing the specific pid, and not passing WNOHANG. + +When waiting for an event in all threads, waitpid is not quite good: + +- If the thread group leader exits while other threads in the thread + group still exist, waitpid(TGID, ...) hangs. That waitpid won't + return an exit status until the other threads in the group are + reaped. + +- When a non-leader thread execs, that thread just vanishes without + reporting an exit (so we'd hang if we waited for it explicitly in + that case). The exec event is instead reported to the TGID pid. + +The solution is to always use -1 and WNOHANG, together with +sigsuspend. + +First, we use non-blocking waitpid to check for events. If nothing is +found, we use sigsuspend to wait for SIGCHLD. When SIGCHLD arrives, +it means something happened to a child process. As soon as we know +there's an event, we get back to calling nonblocking waitpid. + +Note that SIGCHLD should be blocked between waitpid and sigsuspend +calls, so that we don't miss a signal. If SIGCHLD arrives in between, +when it's blocked, the signal becomes pending and sigsuspend +immediately notices it and returns. + +Waiting for events in async mode (TARGET_WNOHANG) +================================================= In async mode, GDB should always be ready to handle both user input and target events, so neither blocking waitpid nor sigsuspend are @@ -159,7 +163,24 @@ We could use a real-time signal instead. This would solve those problems; we could use PTRACE_GETSIGINFO to locate the specific stop signals sent by GDB. But we would still have to have some support for SIGSTOP, since PTRACE_ATTACH generates it, and there are races with trying to find a signal that is not -blocked. */ +blocked. + +Exec events +=========== + +The case of a thread group (process) with 3 or more threads, and a +thread other than the leader execs is worth detailing: + +On an exec, the Linux kernel destroys all threads except the execing +one in the thread group, and resets the execing thread's tid to the +tgid. No exit notification is sent for the execing thread -- from the +ptracer's perspective, it appears as though the execing thread just +vanishes. Until we reap all other threads except the leader and the +execing thread, the leader will be zombie, and the execing thread will +be in `D (disc sleep)' state. As soon as all other threads are +reaped, the execing thread changes its tid to the tgid, and the +previous (zombie) leader vanishes, giving place to the "new" +leader. */ #ifndef O_LARGEFILE #define O_LARGEFILE 0 @@ -652,39 +673,6 @@ linux_child_set_syscall_catchpoint (struct target_ops *self, return 0; } -/* On GNU/Linux there are no real LWP's. The closest thing to LWP's - are processes sharing the same VM space. A multi-threaded process - is basically a group of such processes. However, such a grouping - is almost entirely a user-space issue; the kernel doesn't enforce - such a grouping at all (this might change in the future). In - general, we'll rely on the threads library (i.e. the GNU/Linux - Threads library) to provide such a grouping. - - It is perfectly well possible to write a multi-threaded application - without the assistance of a threads library, by using the clone - system call directly. This module should be able to give some - rudimentary support for debugging such applications if developers - specify the CLONE_PTRACE flag in the clone system call, and are - using the Linux kernel 2.4 or above. - - Note that there are some peculiarities in GNU/Linux that affect - this code: - - - In general one should specify the __WCLONE flag to waitpid in - order to make it report events for any of the cloned processes - (and leave it out for the initial process). However, if a cloned - process has exited the exit status is only reported if the - __WCLONE flag is absent. Linux kernel 2.4 has a __WALL flag, but - we cannot use it since GDB must work on older systems too. - - - When a traced, cloned process exits and is waited for by the - debugger, the kernel reassigns it to the original parent and - keeps it around as a "zombie". Somehow, the GNU/Linux Threads - library doesn't notice this, which leads to the "zombie problem": - When debugged a multi-threaded process that spawns a lot of - threads will run out of processes, even if the threads exit, - because the "zombies" stay around. */ - /* List of known LWPs. */ struct lwp_info *lwp_list; @@ -748,7 +736,6 @@ linux_nat_pass_signals (struct target_ops *self, /* Prototypes for local functions. */ static int stop_wait_callback (struct lwp_info *lp, void *data); -static int linux_thread_alive (ptid_t ptid); static char *linux_child_pid_to_exec_file (struct target_ops *self, int pid); static int resume_stopped_resumed_lwps (struct lwp_info *lp, void *data); @@ -964,8 +951,7 @@ exit_lwp (struct lwp_info *lp) Returns a wait status for that LWP, to cache. */ static int -linux_nat_post_attach_wait (ptid_t ptid, int first, int *cloned, - int *signalled) +linux_nat_post_attach_wait (ptid_t ptid, int first, int *signalled) { pid_t new_pid, pid = ptid_get_lwp (ptid); int status; @@ -999,17 +985,7 @@ linux_nat_post_attach_wait (ptid_t ptid, int first, int *cloned, /* Make sure the initial process is stopped. The user-level threads layer might want to poke around in the inferior, and that won't work if things haven't stabilized yet. */ - new_pid = my_waitpid (pid, &status, 0); - if (new_pid == -1 && errno == ECHILD) - { - if (first) - warning (_("%s is a cloned process"), target_pid_to_str (ptid)); - - /* Try again with __WCLONE to check cloned processes. */ - new_pid = my_waitpid (pid, &status, __WCLONE); - *cloned = 1; - } - + new_pid = my_waitpid (pid, &status, __WALL); gdb_assert (pid == new_pid); if (!WIFSTOPPED (status)) @@ -1103,7 +1079,6 @@ attach_proc_task_lwp_callback (ptid_t ptid) target_pid_to_str (ptid)); lp = add_lwp (ptid); - lp->cloned = 1; /* The next time we wait for this LWP we'll see a SIGSTOP as PTRACE_ATTACH brings it to a halt. */ @@ -1166,8 +1141,7 @@ linux_nat_attach (struct target_ops *ops, const char *args, int from_tty) /* Add the initial process as the first LWP to the list. */ lp = add_initial_lwp (ptid); - status = linux_nat_post_attach_wait (lp->ptid, 1, &lp->cloned, - &lp->signalled); + status = linux_nat_post_attach_wait (lp->ptid, 1, &lp->signalled); if (!WIFSTOPPED (status)) { if (WIFEXITED (status)) @@ -1700,27 +1674,17 @@ linux_nat_resume (struct target_ops *ops, static int kill_lwp (int lwpid, int signo) { - /* Use tkill, if possible, in case we are using nptl threads. If tkill - fails, then we are not using nptl threads and we should be using kill. */ - -#ifdef HAVE_TKILL_SYSCALL - { - static int tkill_failed; - - if (!tkill_failed) - { - int ret; - - errno = 0; - ret = syscall (__NR_tkill, lwpid, signo); - if (errno != ENOSYS) - return ret; - tkill_failed = 1; - } - } -#endif + int ret; - return kill (lwpid, signo); + errno = 0; + ret = syscall (__NR_tkill, lwpid, signo); + if (errno == ENOSYS) + { + /* If tkill fails, then we are not using nptl threads, a + configuration we no longer support. */ + perror_with_name (("tkill")); + } + return ret; } /* Handle a GNU/Linux syscall trap wait response. If we see a syscall @@ -1880,8 +1844,7 @@ linux_handle_extended_wait (struct lwp_info *lp, int status) { /* The new child has a pending SIGSTOP. We can't affect it until it hits the SIGSTOP, but we're already attached. */ - ret = my_waitpid (new_pid, &status, - (event == PTRACE_EVENT_CLONE) ? __WCLONE : 0); + ret = my_waitpid (new_pid, &status, __WALL); if (ret == -1) perror_with_name (_("waiting for new child")); else if (ret != new_pid) @@ -1944,7 +1907,6 @@ linux_handle_extended_wait (struct lwp_info *lp, int status) pid, new_pid); new_lp = add_lwp (ptid_build (ptid_get_pid (lp->ptid), new_pid, 0)); - new_lp->cloned = 1; new_lp->stopped = 1; new_lp->resumed = 1; @@ -2059,19 +2021,13 @@ wait_lwp (struct lwp_info *lp) for (;;) { - /* If my_waitpid returns 0 it means the __WCLONE vs. non-__WCLONE kind - was right and we should just call sigsuspend. */ - - pid = my_waitpid (ptid_get_lwp (lp->ptid), &status, WNOHANG); - if (pid == -1 && errno == ECHILD) - pid = my_waitpid (ptid_get_lwp (lp->ptid), &status, __WCLONE | WNOHANG); + pid = my_waitpid (ptid_get_lwp (lp->ptid), &status, __WALL | WNOHANG); if (pid == -1 && errno == ECHILD) { /* The thread has previously exited. We need to delete it - now because, for some vendor 2.4 kernels with NPTL - support backported, there won't be an exit event unless - it is the main thread. 2.6 kernels will report an exit - event for each thread that exits, as expected. */ + now because if this was a non-leader thread execing, we + won't get an exit event. See comments on exec events at + the top of the file. */ thread_dead = 1; if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, "WL: %s vanished.\n", @@ -2084,9 +2040,8 @@ wait_lwp (struct lwp_info *lp) Thread group leader may have exited in which case we'll lock up in waitpid if there are other threads, even if they are all zombies too. Basically, we're not supposed to use waitpid this way. - __WCLONE is not applicable for the leader so we can't use that. - LINUX_NAT_THREAD_ALIVE cannot be used here as it requires a STOPPED - process; it gets ESRCH both for the zombie and for running processes. + tkill(pid,0) cannot be used here as it gets ESRCH for both + for zombie and running processes. As a workaround, check if we're waiting for the thread group leader and if it's a zombie, and avoid calling waitpid if it is. @@ -2572,15 +2527,6 @@ status_callback (struct lwp_info *lp, void *data) return 1; } -/* Return non-zero if LP isn't stopped. */ - -static int -running_callback (struct lwp_info *lp, void *data) -{ - return (!lp->stopped - || (lwp_status_pending_p (lp) && lp->resumed)); -} - /* Count the LWP's that have had events. */ static int @@ -2860,59 +2806,6 @@ resumed_callback (struct lwp_info *lp, void *data) return lp->resumed; } -/* Stop an active thread, verify it still exists, then resume it. If - the thread ends up with a pending status, then it is not resumed, - and *DATA (really a pointer to int), is set. */ - -static int -stop_and_resume_callback (struct lwp_info *lp, void *data) -{ - if (!lp->stopped) - { - ptid_t ptid = lp->ptid; - - stop_callback (lp, NULL); - stop_wait_callback (lp, NULL); - - /* Resume if the lwp still exists, and the core wanted it - running. */ - lp = find_lwp_pid (ptid); - if (lp != NULL) - { - if (lp->last_resume_kind == resume_stop - && !lwp_status_pending_p (lp)) - { - /* The core wanted the LWP to stop. Even if it stopped - cleanly (with SIGSTOP), leave the event pending. */ - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "SARC: core wanted LWP %ld stopped " - "(leaving SIGSTOP pending)\n", - ptid_get_lwp (lp->ptid)); - lp->status = W_STOPCODE (SIGSTOP); - } - - if (!lwp_status_pending_p (lp)) - { - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "SARC: re-resuming LWP %ld\n", - ptid_get_lwp (lp->ptid)); - resume_lwp (lp, lp->step, GDB_SIGNAL_0); - } - else - { - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "SARC: not re-resuming LWP %ld " - "(has pending)\n", - ptid_get_lwp (lp->ptid)); - } - } - } - return 0; -} - /* Check if we should go on and pass this event to common code. Return the affected lwp if we are, or NULL otherwise. */ @@ -3017,35 +2910,16 @@ linux_nat_filter_event (int lwpid, int status) { if (num_lwps (ptid_get_pid (lp->ptid)) > 1) { - /* If this is the main thread, we must stop all threads and - verify if they are still alive. This is because in the - nptl thread model on Linux 2.4, there is no signal issued - for exiting LWPs other than the main thread. We only get - the main thread exit signal once all child threads have - already exited. If we stop all the threads and use the - stop_wait_callback to check if they have exited we can - determine whether this signal should be ignored or - whether it means the end of the debugged application, - regardless of which threading model is being used. */ - if (ptid_get_pid (lp->ptid) == ptid_get_lwp (lp->ptid)) - { - iterate_over_lwps (pid_to_ptid (ptid_get_pid (lp->ptid)), - stop_and_resume_callback, NULL); - } - if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, "LLW: %s exited.\n", target_pid_to_str (lp->ptid)); - if (num_lwps (ptid_get_pid (lp->ptid)) > 1) - { - /* If there is at least one more LWP, then the exit signal - was not the end of the debugged application and should be - ignored. */ - exit_lwp (lp); - return NULL; - } + /* If there is at least one more LWP, then the exit signal + was not the end of the debugged application and should be + ignored. */ + exit_lwp (lp); + return NULL; } /* Note that even if the leader was ptrace-stopped, it can still @@ -3072,28 +2946,6 @@ linux_nat_filter_event (int lwpid, int status) return lp; } - /* Check if the current LWP has previously exited. In the nptl - thread model, LWPs other than the main thread do not issue - signals when they exit so we must check whenever the thread has - stopped. A similar check is made in stop_wait_callback(). */ - if (num_lwps (ptid_get_pid (lp->ptid)) > 1 && !linux_thread_alive (lp->ptid)) - { - ptid_t ptid = pid_to_ptid (ptid_get_pid (lp->ptid)); - - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "LLW: %s exited.\n", - target_pid_to_str (lp->ptid)); - - exit_lwp (lp); - - /* Make sure there is at least one thread running. */ - gdb_assert (iterate_over_lwps (ptid, running_callback, NULL)); - - /* Discard the event. */ - return NULL; - } - /* Make sure we don't report a SIGSTOP that we sent ourselves in an attempt to stop an LWP. */ if (lp->signalled @@ -3245,18 +3097,8 @@ check_zombie_leaders (void) leader's exit status until all other threads are gone. - There are 3 or more threads in the group, and a thread - other than the leader exec'd. On an exec, the Linux - kernel destroys all other threads (except the execing - one) in the thread group, and resets the execing thread's - tid to the tgid. No exit notification is sent for the - execing thread -- from the ptracer's perspective, it - appears as though the execing thread just vanishes. - Until we reap all other threads except the leader and the - execing thread, the leader will be zombie, and the - execing thread will be in `D (disc sleep)'. As soon as - all other threads are reaped, the execing thread changes - it's tid to the tgid, and the previous (zombie) leader - vanishes, giving place to the "new" leader. We could try + other than the leader exec'd. See comments on exec + events at the top of the file. We could try distinguishing the exit and exec cases, by waiting once more, and seeing if something comes out, but it doesn't sound useful. The previous leader _does_ go away, and @@ -3337,9 +3179,7 @@ linux_nat_wait_1 (struct target_ops *ops, the TGID pid. */ errno = 0; - lwpid = my_waitpid (-1, &status, __WCLONE | WNOHANG); - if (lwpid == 0 || (lwpid == -1 && errno == ECHILD)) - lwpid = my_waitpid (-1, &status, WNOHANG); + lwpid = my_waitpid (-1, &status, __WALL | WNOHANG); if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, @@ -3669,43 +3509,19 @@ kill_wait_callback (struct lwp_info *lp, void *data) SIGSTOPs, pending SIGTRAPs, etc.) to make sure the current program doesn't interfere with any following debugging session. */ - /* For cloned processes we must check both with __WCLONE and - without, since the exit status of a cloned process isn't reported - with __WCLONE. */ - if (lp->cloned) - { - do - { - pid = my_waitpid (ptid_get_lwp (lp->ptid), NULL, __WCLONE); - if (pid != (pid_t) -1) - { - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "KWC: wait %s received unknown.\n", - target_pid_to_str (lp->ptid)); - /* The Linux kernel sometimes fails to kill a thread - completely after PTRACE_KILL; that goes from the stop - point in do_fork out to the one in - get_signal_to_deliever and waits again. So kill it - again. */ - kill_callback (lp, NULL); - } - } - while (pid == ptid_get_lwp (lp->ptid)); - - gdb_assert (pid == -1 && errno == ECHILD); - } - do { - pid = my_waitpid (ptid_get_lwp (lp->ptid), NULL, 0); + pid = my_waitpid (ptid_get_lwp (lp->ptid), NULL, __WALL); if (pid != (pid_t) -1) { if (debug_linux_nat) fprintf_unfiltered (gdb_stdlog, - "KWC: wait %s received unk.\n", + "KWC: wait %s received unknown.\n", target_pid_to_str (lp->ptid)); - /* See the call to kill_callback above. */ + /* The Linux kernel sometimes fails to kill a thread + completely after PTRACE_KILL; that goes from the stop + point in do_fork out to the one in get_signal_to_deliver + and waits again. So kill it again. */ kill_callback (lp, NULL); } } @@ -3891,33 +3707,10 @@ linux_nat_xfer_partial (struct target_ops *ops, enum target_object object, } static int -linux_thread_alive (ptid_t ptid) -{ - int err, tmp_errno; - - gdb_assert (ptid_lwp_p (ptid)); - - /* Send signal 0 instead of anything ptrace, because ptracing a - running thread errors out claiming that the thread doesn't - exist. */ - err = kill_lwp (ptid_get_lwp (ptid), 0); - tmp_errno = errno; - if (debug_linux_nat) - fprintf_unfiltered (gdb_stdlog, - "LLTA: KILL(SIG0) %s (%s)\n", - target_pid_to_str (ptid), - err ? safe_strerror (tmp_errno) : "OK"); - - if (err != 0) - return 0; - - return 1; -} - -static int linux_nat_thread_alive (struct target_ops *ops, ptid_t ptid) { - return linux_thread_alive (ptid); + /* As long as a PTID is in lwp list, consider it alive. */ + return find_lwp_pid (ptid) != NULL; } /* Implement the to_update_thread_list target method for this @@ -3926,15 +3719,10 @@ linux_nat_thread_alive (struct target_ops *ops, ptid_t ptid) static void linux_nat_update_thread_list (struct target_ops *ops) { - if (linux_supports_traceclone ()) - { - /* With support for clone events, we add/delete threads from the - list as clone/exit events are processed, so just try deleting - exited threads still in the thread list. */ - delete_exited_threads (); - } - else - prune_threads (); + /* We add/delete threads from the list as clone/exit events are + processed, so just try deleting exited threads still in the + thread list. */ + delete_exited_threads (); } static char * @@ -5011,70 +4799,16 @@ Enables printf debugging output."), the GNU/Linux Threads library and therefore doesn't really belong here. */ -/* Read variable NAME in the target and return its value if found. - Otherwise return zero. It is assumed that the type of the variable - is `int'. */ - -static int -get_signo (const char *name) -{ - struct bound_minimal_symbol ms; - int signo; - - ms = lookup_minimal_symbol (name, NULL, NULL); - if (ms.minsym == NULL) - return 0; - - if (target_read_memory (BMSYMBOL_VALUE_ADDRESS (ms), (gdb_byte *) &signo, - sizeof (signo)) != 0) - return 0; - - return signo; -} - /* Return the set of signals used by the threads library in *SET. */ void lin_thread_get_thread_signals (sigset_t *set) { - struct sigaction action; - int restart, cancel; - - sigemptyset (&blocked_mask); sigemptyset (set); - restart = get_signo ("__pthread_sig_restart"); - cancel = get_signo ("__pthread_sig_cancel"); - - /* LinuxThreads normally uses the first two RT signals, but in some legacy - cases may use SIGUSR1/SIGUSR2. NPTL always uses RT signals, but does - not provide any way for the debugger to query the signal numbers - - fortunately they don't change! */ - - if (restart == 0) - restart = __SIGRTMIN; - - if (cancel == 0) - cancel = __SIGRTMIN + 1; - - sigaddset (set, restart); - sigaddset (set, cancel); - - /* The GNU/Linux Threads library makes terminating threads send a - special "cancel" signal instead of SIGCHLD. Make sure we catch - those (to prevent them from terminating GDB itself, which is - likely to be their default action) and treat them the same way as - SIGCHLD. */ - - action.sa_handler = sigchld_handler; - sigemptyset (&action.sa_mask); - action.sa_flags = SA_RESTART; - sigaction (cancel, &action, NULL); - - /* We block the "cancel" signal throughout this code ... */ - sigaddset (&blocked_mask, cancel); - sigprocmask (SIG_BLOCK, &blocked_mask, NULL); - - /* ... except during a sigsuspend. */ - sigdelset (&suspend_mask, cancel); + /* NPTL reserves the first two RT signals, but does not provide any + way for the debugger to query the signal numbers - fortunately + they don't change. */ + sigaddset (set, __SIGRTMIN); + sigaddset (set, __SIGRTMIN + 1); } |