diff options
Diffstat (limited to 'libgo/go/os/signal/signal_test.go')
-rw-r--r-- | libgo/go/os/signal/signal_test.go | 427 |
1 files changed, 260 insertions, 167 deletions
diff --git a/libgo/go/os/signal/signal_test.go b/libgo/go/os/signal/signal_test.go index 0708d4c..98a1cc1 100644 --- a/libgo/go/os/signal/signal_test.go +++ b/libgo/go/os/signal/signal_test.go @@ -22,51 +22,87 @@ import ( "time" ) -var testDeadline time.Time - -func TestMain(m *testing.M) { - flag.Parse() - - // TODO(golang.org/issue/28135): Remove this setup and use t.Deadline instead. - timeoutFlag := flag.Lookup("test.timeout") - if timeoutFlag != nil { - if d := timeoutFlag.Value.(flag.Getter).Get().(time.Duration); d != 0 { - testDeadline = time.Now().Add(d) +// settleTime is an upper bound on how long we expect signals to take to be +// delivered. Lower values make the test faster, but also flakier — especially +// on heavily loaded systems. +// +// The current value is set based on flakes observed in the Go builders. +var settleTime = 100 * time.Millisecond + +func init() { + if testenv.Builder() == "solaris-amd64-oraclerel" { + // The solaris-amd64-oraclerel builder has been observed to time out in + // TestNohup even with a 250ms settle time. + // + // Use a much longer settle time on that builder to try to suss out whether + // the test is flaky due to builder slowness (which may mean we need a + // longer GO_TEST_TIMEOUT_SCALE) or due to a dropped signal (which may + // instead need a test-skip and upstream bug filed against the Solaris + // kernel). + // + // This constant is chosen so as to make the test as generous as possible + // while still reliably completing within 3 minutes in non-short mode. + // + // See https://golang.org/issue/33174. + settleTime = 11 * time.Second + } else if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" { + if scale, err := strconv.Atoi(s); err == nil { + settleTime *= time.Duration(scale) } } - - os.Exit(m.Run()) } func waitSig(t *testing.T, c <-chan os.Signal, sig os.Signal) { + t.Helper() waitSig1(t, c, sig, false) } func waitSigAll(t *testing.T, c <-chan os.Signal, sig os.Signal) { + t.Helper() waitSig1(t, c, sig, true) } func waitSig1(t *testing.T, c <-chan os.Signal, sig os.Signal, all bool) { + t.Helper() + // Sleep multiple times to give the kernel more tries to // deliver the signal. - for i := 0; i < 10; i++ { + start := time.Now() + timer := time.NewTimer(settleTime / 10) + defer timer.Stop() + // If the caller notified for all signals on c, filter out SIGURG, + // which is used for runtime preemption and can come at unpredictable times. + // General user code should filter out all unexpected signals instead of just + // SIGURG, but since os/signal is tightly coupled to the runtime it seems + // appropriate to be stricter here. + for time.Since(start) < settleTime { select { case s := <-c: - // If the caller notified for all signals on - // c, filter out SIGURG, which is used for - // runtime preemption and can come at - // unpredictable times. - if all && s == syscall.SIGURG { - continue + if s == sig { + return } - if s != sig { + if !all || s != syscall.SIGURG { t.Fatalf("signal was %v, want %v", s, sig) } - return - - case <-time.After(100 * time.Millisecond): + case <-timer.C: + timer.Reset(settleTime / 10) } } - t.Fatalf("timeout waiting for %v", sig) + t.Fatalf("timeout after %v waiting for %v", settleTime, sig) +} + +// quiesce waits until we can be reasonably confident that all pending signals +// have been delivered by the OS. +func quiesce() { + // The kernel will deliver a signal as a thread returns + // from a syscall. If the only active thread is sleeping, + // and the system is busy, the kernel may not get around + // to waking up a thread to catch the signal. + // We try splitting up the sleep to give the kernel + // many chances to deliver the signal. + start := time.Now() + for time.Since(start) < settleTime { + time.Sleep(settleTime / 10) + } } // Test that basic signal handling works. @@ -112,50 +148,39 @@ func TestStress(t *testing.T) { dur = 100 * time.Millisecond } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4)) - done := make(chan bool) - finished := make(chan bool) - go func() { - sig := make(chan os.Signal, 1) - Notify(sig, syscall.SIGUSR1) - defer Stop(sig) - Loop: - for { - select { - case <-sig: - case <-done: - break Loop - } - } - finished <- true - }() + + sig := make(chan os.Signal, 1) + Notify(sig, syscall.SIGUSR1) + go func() { - Loop: + stop := time.After(dur) for { select { - case <-done: - break Loop + case <-stop: + // Allow enough time for all signals to be delivered before we stop + // listening for them. + quiesce() + Stop(sig) + // According to its documentation, “[w]hen Stop returns, it in + // guaranteed that c will receive no more signals.” So we can safely + // close sig here: if there is a send-after-close race here, that is a + // bug in Stop and we would like to detect it. + close(sig) + return + default: syscall.Kill(syscall.Getpid(), syscall.SIGUSR1) runtime.Gosched() } } - finished <- true }() - time.Sleep(dur) - close(done) - <-finished - <-finished - // When run with 'go test -cpu=1,2,4' SIGUSR1 from this test can slip - // into subsequent TestSignal() causing failure. - // Sleep for a while to reduce the possibility of the failure. - time.Sleep(10 * time.Millisecond) + + for range sig { + // Receive signals until the sender closes sig. + } } func testCancel(t *testing.T, ignore bool) { - // Send SIGWINCH. By default this signal should be ignored. - syscall.Kill(syscall.Getpid(), syscall.SIGWINCH) - time.Sleep(100 * time.Millisecond) - // Ask to be notified on c1 when a SIGWINCH is received. c1 := make(chan os.Signal, 1) Notify(c1, syscall.SIGWINCH) @@ -175,25 +200,16 @@ func testCancel(t *testing.T, ignore bool) { waitSig(t, c2, syscall.SIGHUP) // Ignore, or reset the signal handlers for, SIGWINCH and SIGHUP. + // Either way, this should undo both calls to Notify above. if ignore { Ignore(syscall.SIGWINCH, syscall.SIGHUP) + // Don't bother deferring a call to Reset: it is documented to undo Notify, + // but its documentation says nothing about Ignore, and (as of the time of + // writing) it empirically does not undo an Ignore. } else { Reset(syscall.SIGWINCH, syscall.SIGHUP) } - // At this point we do not expect any further signals on c1. - // However, it is just barely possible that the initial SIGWINCH - // at the start of this function was delivered after we called - // Notify on c1. In that case the waitSig for SIGWINCH may have - // picked up that initial SIGWINCH, and the second SIGWINCH may - // then have been delivered on the channel. This sequence of events - // may have caused issue 15661. - // So, read any possible signal from the channel now. - select { - case <-c1: - default: - } - // Send this process a SIGWINCH. It should be ignored. syscall.Kill(syscall.Getpid(), syscall.SIGWINCH) @@ -202,22 +218,28 @@ func testCancel(t *testing.T, ignore bool) { syscall.Kill(syscall.Getpid(), syscall.SIGHUP) } + quiesce() + select { case s := <-c1: - t.Fatalf("unexpected signal %v", s) - case <-time.After(100 * time.Millisecond): + t.Errorf("unexpected signal %v", s) + default: // nothing to read - good } select { case s := <-c2: - t.Fatalf("unexpected signal %v", s) - case <-time.After(100 * time.Millisecond): + t.Errorf("unexpected signal %v", s) + default: // nothing to read - good } - // Reset the signal handlers for all signals. - Reset() + // One or both of the signals may have been blocked for this process + // by the calling process. + // Discard any queued signals now to avoid interfering with other tests. + Notify(c1, syscall.SIGWINCH) + Notify(c2, syscall.SIGHUP) + quiesce() } // Test that Reset cancels registration for listed signals on all channels. @@ -289,7 +311,10 @@ func TestDetectNohup(t *testing.T) { } } -var sendUncaughtSighup = flag.Int("send_uncaught_sighup", 0, "send uncaught SIGHUP during TestStop") +var ( + sendUncaughtSighup = flag.Int("send_uncaught_sighup", 0, "send uncaught SIGHUP during TestStop") + dieFromSighup = flag.Bool("die_from_sighup", false, "wait to die from uncaught SIGHUP") +) // Test that Stop cancels the channel's registrations. func TestStop(t *testing.T) { @@ -300,59 +325,74 @@ func TestStop(t *testing.T) { } for _, sig := range sigs { - // Send the signal. - // If it's SIGWINCH, we should not see it. - // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. - if sig == syscall.SIGWINCH || (sig == syscall.SIGHUP && *sendUncaughtSighup == 1) { - syscall.Kill(syscall.Getpid(), sig) - } - - // The kernel will deliver a signal as a thread returns - // from a syscall. If the only active thread is sleeping, - // and the system is busy, the kernel may not get around - // to waking up a thread to catch the signal. - // We try splitting up the sleep to give the kernel - // another chance to deliver the signal. - time.Sleep(50 * time.Millisecond) - time.Sleep(50 * time.Millisecond) - - // Ask for signal - c := make(chan os.Signal, 1) - Notify(c, sig) - defer Stop(c) - - // Send this process that signal - syscall.Kill(syscall.Getpid(), sig) - waitSig(t, c, sig) + sig := sig + t.Run(fmt.Sprint(sig), func(t *testing.T) { + // When calling Notify with a specific signal, + // independent signals should not interfere with each other, + // and we end up needing to wait for signals to quiesce a lot. + // Test the three different signals concurrently. + t.Parallel() + + // If the signal is not ignored, send the signal before registering a + // channel to verify the behavior of the default Go handler. + // If it's SIGWINCH or SIGUSR1 we should not see it. + // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. + mayHaveBlockedSignal := false + if !Ignored(sig) && (sig != syscall.SIGHUP || *sendUncaughtSighup == 1) { + syscall.Kill(syscall.Getpid(), sig) + quiesce() + + // We don't know whether sig is blocked for this process; see + // https://golang.org/issue/38165. Assume that it could be. + mayHaveBlockedSignal = true + } - Stop(c) - time.Sleep(50 * time.Millisecond) - select { - case s := <-c: - t.Fatalf("unexpected signal %v", s) - case <-time.After(50 * time.Millisecond): - // nothing to read - good - } + // Ask for signal + c := make(chan os.Signal, 1) + Notify(c, sig) - // Send the signal. - // If it's SIGWINCH, we should not see it. - // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. - if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 { + // Send this process the signal again. syscall.Kill(syscall.Getpid(), sig) - } + waitSig(t, c, sig) + + if mayHaveBlockedSignal { + // We may have received a queued initial signal in addition to the one + // that we sent after Notify. If so, waitSig may have observed that + // initial signal instead of the second one, and we may need to wait for + // the second signal to clear. Do that now. + quiesce() + select { + case <-c: + default: + } + } - time.Sleep(50 * time.Millisecond) - select { - case s := <-c: - t.Fatalf("unexpected signal %v", s) - case <-time.After(50 * time.Millisecond): - // nothing to read - good - } + // Stop watching for the signal and send it again. + // If it's SIGHUP, maybe we'll die. Let the flag tell us what to do. + Stop(c) + if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 { + syscall.Kill(syscall.Getpid(), sig) + quiesce() + + select { + case s := <-c: + t.Errorf("unexpected signal %v", s) + default: + // nothing to read - good + } + + // If we're going to receive a signal, it has almost certainly been + // received by now. However, it may have been blocked for this process — + // we don't know. Explicitly unblock it and wait for it to clear now. + Notify(c, sig) + quiesce() + Stop(c) + } + }) } } -// Test that when run under nohup, an uncaught SIGHUP does not kill the program, -// but a +// Test that when run under nohup, an uncaught SIGHUP does not kill the program. func TestNohup(t *testing.T) { // Ugly: ask for SIGHUP so that child will not have no-hup set // even if test is running under nohup environment. @@ -371,12 +411,38 @@ func TestNohup(t *testing.T) { // // Both should fail without nohup and succeed with nohup. + var subTimeout time.Duration + + var wg sync.WaitGroup + wg.Add(2) + if deadline, ok := t.Deadline(); ok { + subTimeout = time.Until(deadline) + subTimeout -= subTimeout / 10 // Leave 10% headroom for propagating output. + } for i := 1; i <= 2; i++ { - out, err := exec.Command(os.Args[0], "-test.run=TestStop", "-send_uncaught_sighup="+strconv.Itoa(i)).CombinedOutput() - if err == nil { - t.Fatalf("ran test with -send_uncaught_sighup=%d and it succeeded: expected failure.\nOutput:\n%s", i, out) - } + i := i + go t.Run(fmt.Sprintf("uncaught-%d", i), func(t *testing.T) { + defer wg.Done() + + args := []string{ + "-test.v", + "-test.run=TestStop", + "-send_uncaught_sighup=" + strconv.Itoa(i), + "-die_from_sighup", + } + if subTimeout != 0 { + args = append(args, fmt.Sprintf("-test.timeout=%v", subTimeout)) + } + out, err := exec.Command(os.Args[0], args...).CombinedOutput() + + if err == nil { + t.Errorf("ran test with -send_uncaught_sighup=%d and it succeeded: expected failure.\nOutput:\n%s", i, out) + } else { + t.Logf("test with -send_uncaught_sighup=%d failed as expected.\nError: %v\nOutput:\n%s", i, err, out) + } + }) } + wg.Wait() Stop(c) @@ -387,21 +453,46 @@ func TestNohup(t *testing.T) { } // Again, this time with nohup, assuming we can find it. - _, err := os.Stat("/usr/bin/nohup") + _, err := exec.LookPath("nohup") if err != nil { t.Skip("cannot find nohup; skipping second half of test") } + wg.Add(2) + if deadline, ok := t.Deadline(); ok { + subTimeout = time.Until(deadline) + subTimeout -= subTimeout / 10 // Leave 10% headroom for propagating output. + } for i := 1; i <= 2; i++ { - os.Remove("nohup.out") - out, err := exec.Command("/usr/bin/nohup", os.Args[0], "-test.run=TestStop", "-send_uncaught_sighup="+strconv.Itoa(i)).CombinedOutput() + i := i + go t.Run(fmt.Sprintf("nohup-%d", i), func(t *testing.T) { + defer wg.Done() - data, _ := ioutil.ReadFile("nohup.out") - os.Remove("nohup.out") - if err != nil { - t.Fatalf("ran test with -send_uncaught_sighup=%d under nohup and it failed: expected success.\nError: %v\nOutput:\n%s%s", i, err, out, data) - } + // POSIX specifies that nohup writes to a file named nohup.out if standard + // output is a terminal. However, for an exec.Command, standard output is + // not a terminal — so we don't need to read or remove that file (and, + // indeed, cannot even create it if the current user is unable to write to + // GOROOT/src, such as when GOROOT is installed and owned by root). + + args := []string{ + os.Args[0], + "-test.v", + "-test.run=TestStop", + "-send_uncaught_sighup=" + strconv.Itoa(i), + } + if subTimeout != 0 { + args = append(args, fmt.Sprintf("-test.timeout=%v", subTimeout)) + } + out, err := exec.Command("nohup", args...).CombinedOutput() + + if err != nil { + t.Errorf("ran test with -send_uncaught_sighup=%d under nohup and it failed: expected success.\nError: %v\nOutput:\n%s", i, err, out) + } else { + t.Logf("ran test with -send_uncaught_sighup=%d under nohup.\nOutput:\n%s", i, out) + } + }) } + wg.Wait() } // Test that SIGCONT works (issue 8953). @@ -416,7 +507,7 @@ func TestSIGCONT(t *testing.T) { // Test race between stopping and receiving a signal (issue 14571). func TestAtomicStop(t *testing.T) { if os.Getenv("GO_TEST_ATOMIC_STOP") != "" { - atomicStopTestProgram() + atomicStopTestProgram(t) t.Fatal("atomicStopTestProgram returned") } @@ -438,8 +529,8 @@ func TestAtomicStop(t *testing.T) { const execs = 10 for i := 0; i < execs; i++ { timeout := "0" - if !testDeadline.IsZero() { - timeout = testDeadline.Sub(time.Now()).String() + if deadline, ok := t.Deadline(); ok { + timeout = time.Until(deadline).String() } cmd := exec.Command(os.Args[0], "-test.run=TestAtomicStop", "-test.timeout="+timeout) cmd.Env = append(os.Environ(), "GO_TEST_ATOMIC_STOP=1") @@ -478,7 +569,7 @@ func TestAtomicStop(t *testing.T) { // atomicStopTestProgram is run in a subprocess by TestAtomicStop. // It tries to trigger a signal delivery race. This function should // either catch a signal or die from it. -func atomicStopTestProgram() { +func atomicStopTestProgram(t *testing.T) { // This test won't work if SIGINT is ignored here. if Ignored(syscall.SIGINT) { fmt.Println("SIGINT is ignored") @@ -488,10 +579,10 @@ func atomicStopTestProgram() { const tries = 10 timeout := 2 * time.Second - if !testDeadline.IsZero() { + if deadline, ok := t.Deadline(); ok { // Give each try an equal slice of the deadline, with one slice to spare for // cleanup. - timeout = testDeadline.Sub(time.Now()) / (tries + 1) + timeout = time.Until(deadline) / (tries + 1) } pid := syscall.Getpid() @@ -541,43 +632,45 @@ func TestTime(t *testing.T) { dur = 100 * time.Millisecond } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4)) - done := make(chan bool) - finished := make(chan bool) - go func() { - sig := make(chan os.Signal, 1) - Notify(sig, syscall.SIGUSR1) - defer Stop(sig) - Loop: - for { - select { - case <-sig: - case <-done: - break Loop - } - } - finished <- true - }() + + sig := make(chan os.Signal, 1) + Notify(sig, syscall.SIGUSR1) + + stop := make(chan struct{}) go func() { - Loop: for { select { - case <-done: - break Loop + case <-stop: + // Allow enough time for all signals to be delivered before we stop + // listening for them. + quiesce() + Stop(sig) + // According to its documentation, “[w]hen Stop returns, it in + // guaranteed that c will receive no more signals.” So we can safely + // close sig here: if there is a send-after-close race, that is a bug in + // Stop and we would like to detect it. + close(sig) + return + default: syscall.Kill(syscall.Getpid(), syscall.SIGUSR1) runtime.Gosched() } } - finished <- true }() + + done := make(chan struct{}) + go func() { + for range sig { + // Receive signals until the sender closes sig. + } + close(done) + }() + t0 := time.Now() for t1 := t0; t1.Sub(t0) < dur; t1 = time.Now() { } // hammering on getting time - close(done) - <-finished - <-finished - // When run with 'go test -cpu=1,2,4' SIGUSR1 from this test can slip - // into subsequent TestSignal() causing failure. - // Sleep for a while to reduce the possibility of the failure. - time.Sleep(10 * time.Millisecond) + + close(stop) + <-done } |