1 files changed, 260 insertions, 167 deletions
diff --git a/libgo/go/os/signal/signal_test.go b/libgo/go/os/signal/signal_test.go
index 0708d4c..98a1cc1 100644
--- a/libgo/go/os/signal/signal_test.go
+++ b/libgo/go/os/signal/signal_test.go
@@ -22,51 +22,87 @@ import (
 	"time"
 )
 
-var testDeadline time.Time
-
-func TestMain(m *testing.M) {
-	flag.Parse()
-
-	// TODO(golang.org/issue/28135): Remove this setup and use t.Deadline instead.
-	timeoutFlag := flag.Lookup("test.timeout")
-	if timeoutFlag != nil {
-		if d := timeoutFlag.Value.(flag.Getter).Get().(time.Duration); d != 0 {
-			testDeadline = time.Now().Add(d)
+// settleTime is an upper bound on how long we expect signals to take to be
+// delivered. Lower values make the test faster, but also flakier — especially
+// on heavily loaded systems.
+//
+// The current value is set based on flakes observed in the Go builders.
+var settleTime = 100 * time.Millisecond
+
+func init() {
+	if testenv.Builder() == "solaris-amd64-oraclerel" {
+		// The solaris-amd64-oraclerel builder has been observed to time out in
+		// TestNohup even with a 250ms settle time.
+		//
+		// Use a much longer settle time on that builder to try to suss out whether
+		// the test is flaky due to builder slowness (which may mean we need a
+		// longer GO_TEST_TIMEOUT_SCALE) or due to a dropped signal (which may
+		// instead need a test-skip and upstream bug filed against the Solaris
+		// kernel).
+		//
+		// This constant is chosen so as to make the test as generous as possible
+		// while still reliably completing within 3 minutes in non-short mode.
+		//
+		// See https://golang.org/issue/33174.
+		settleTime = 11 * time.Second
+	} else if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" {
+		if scale, err := strconv.Atoi(s); err == nil {
+			settleTime *= time.Duration(scale)
 		}
 	}
-
-	os.Exit(m.Run())
 }
 
 func waitSig(t *testing.T, c <-chan os.Signal, sig os.Signal) {
+	t.Helper()
 	waitSig1(t, c, sig, false)
 }
 func waitSigAll(t *testing.T, c <-chan os.Signal, sig os.Signal) {
+	t.Helper()
 	waitSig1(t, c, sig, true)
 }
 
 func waitSig1(t *testing.T, c <-chan os.Signal, sig os.Signal, all bool) {
+	t.Helper()
+
 	// Sleep multiple times to give the kernel more tries to
 	// deliver the signal.
-	for i := 0; i < 10; i++ {
+	start := time.Now()
+	timer := time.NewTimer(settleTime / 10)
+	defer timer.Stop()
+	// If the caller notified for all signals on c, filter out SIGURG,
+	// which is used for runtime preemption and can come at unpredictable times.
+	// General user code should filter out all unexpected signals instead of just
+	// SIGURG, but since os/signal is tightly coupled to the runtime it seems
+	// appropriate to be stricter here.
+	for time.Since(start) < settleTime {
 		select {
 		case s := <-c:
-			// If the caller notified for all signals on
-			// c, filter out SIGURG, which is used for
-			// runtime preemption and can come at
-			// unpredictable times.
-			if all && s == syscall.SIGURG {
-				continue
+			if s == sig {
+				return
 			}
-			if s != sig {
+			if !all || s != syscall.SIGURG {
 				t.Fatalf("signal was %v, want %v", s, sig)
 			}
-			return
-
-		case <-time.After(100 * time.Millisecond):
+		case <-timer.C:
+			timer.Reset(settleTime / 10)
 		}
 	}
-	t.Fatalf("timeout waiting for %v", sig)
+	t.Fatalf("timeout after %v waiting for %v", settleTime, sig)
+}
+
+// quiesce waits until we can be reasonably confident that all pending signals
+// have been delivered by the OS.
+func quiesce() {
+	// The kernel will deliver a signal as a thread returns
+	// from a syscall. If the only active thread is sleeping,
+	// and the system is busy, the kernel may not get around
+	// to waking up a thread to catch the signal.
+	// We try splitting up the sleep to give the kernel
+	// many chances to deliver the signal.
+	start := time.Now()
+	for time.Since(start) < settleTime {
+		time.Sleep(settleTime / 10)
+	}
 }
 
 // Test that basic signal handling works.
@@ -112,50 +148,39 @@ func TestStress(t *testing.T) {
 		dur = 100 * time.Millisecond
 	}
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4))
-	done := make(chan bool)
-	finished := make(chan bool)
-	go func() {
-		sig := make(chan os.Signal, 1)
-		Notify(sig, syscall.SIGUSR1)
-		defer Stop(sig)
-	Loop:
-		for {
-			select {
-			case <-sig:
-			case <-done:
-				break Loop
-			}
-		}
-		finished <- true
-	}()
+
+	sig := make(chan os.Signal, 1)
+	Notify(sig, syscall.SIGUSR1)
+
 	go func() {
-	Loop:
+		stop := time.After(dur)
 		for {
 			select {
-			case <-done:
-				break Loop
+			case <-stop:
+				// Allow enough time for all signals to be delivered before we stop
+				// listening for them.
+				quiesce()
+				Stop(sig)
+				// According to its documentation, “[w]hen Stop returns, it in
+				// guaranteed that c will receive no more signals.” So we can safely
+				// close sig here: if there is a send-after-close race here, that is a
+				// bug in Stop and we would like to detect it.
+				close(sig)
+				return
+
 			default:
 				syscall.Kill(syscall.Getpid(), syscall.SIGUSR1)
 				runtime.Gosched()
 			}
 		}
-		finished <- true
 	}()
-	time.Sleep(dur)
-	close(done)
-	<-finished
-	<-finished
-	// When run with 'go test -cpu=1,2,4' SIGUSR1 from this test can slip
-	// into subsequent TestSignal() causing failure.
-	// Sleep for a while to reduce the possibility of the failure.
-	time.Sleep(10 * time.Millisecond)
+
+	for range sig {
+		// Receive signals until the sender closes sig.
+	}
 }
 
 func testCancel(t *testing.T, ignore bool) {
-	// Send SIGWINCH. By default this signal should be ignored.
-	syscall.Kill(syscall.Getpid(), syscall.SIGWINCH)
-	time.Sleep(100 * time.Millisecond)
-
 	// Ask to be notified on c1 when a SIGWINCH is received.
 	c1 := make(chan os.Signal, 1)
 	Notify(c1, syscall.SIGWINCH)
@@ -175,25 +200,16 @@ func testCancel(t *testing.T, ignore bool) {
 	waitSig(t, c2, syscall.SIGHUP)
 
 	// Ignore, or reset the signal handlers for, SIGWINCH and SIGHUP.
+	// Either way, this should undo both calls to Notify above.
 	if ignore {
 		Ignore(syscall.SIGWINCH, syscall.SIGHUP)
+		// Don't bother deferring a call to Reset: it is documented to undo Notify,
+		// but its documentation says nothing about Ignore, and (as of the time of
+		// writing) it empirically does not undo an Ignore.
 	} else {
 		Reset(syscall.SIGWINCH, syscall.SIGHUP)
 	}
 
-	// At this point we do not expect any further signals on c1.
-	// However, it is just barely possible that the initial SIGWINCH
-	// at the start of this function was delivered after we called
-	// Notify on c1. In that case the waitSig for SIGWINCH may have
-	// picked up that initial SIGWINCH, and the second SIGWINCH may
-	// then have been delivered on the channel. This sequence of events
-	// may have caused issue 15661.
-	// So, read any possible signal from the channel now.
-	select {
-	case <-c1:
-	default:
-	}
-
 	// Send this process a SIGWINCH. It should be ignored.
 	syscall.Kill(syscall.Getpid(), syscall.SIGWINCH)
 
@@ -202,22 +218,28 @@ func testCancel(t *testing.T, ignore bool) {
 		syscall.Kill(syscall.Getpid(), syscall.SIGHUP)
 	}
 
+	quiesce()
+
 	select {
 	case s := <-c1:
-		t.Fatalf("unexpected signal %v", s)
-	case <-time.After(100 * time.Millisecond):
+		t.Errorf("unexpected signal %v", s)
+	default:
 		// nothing to read - good
 	}
 
 	select {
 	case s := <-c2:
-		t.Fatalf("unexpected signal %v", s)
-	case <-time.After(100 * time.Millisecond):
+		t.Errorf("unexpected signal %v", s)
+	default:
 		// nothing to read - good
 	}
 
-	// Reset the signal handlers for all signals.
-	Reset()
+	// One or both of the signals may have been blocked for this process
+	// by the calling process.
+	// Discard any queued signals now to avoid interfering with other tests.
+	Notify(c1, syscall.SIGWINCH)
+	Notify(c2, syscall.SIGHUP)
+	quiesce()
 }
 
 // Test that Reset cancels registration for listed signals on all channels.
@@ -289,7 +311,10 @@ func TestDetectNohup(t *testing.T) {
 	}
 }
 
-var sendUncaughtSighup = flag.Int("send_uncaught_sighup", 0, "send uncaught SIGHUP during TestStop")
+var (
+	sendUncaughtSighup = flag.Int("send_uncaught_sighup", 0, "send uncaught SIGHUP during TestStop")
+	dieFromSighup      = flag.Bool("die_from_sighup", false, "wait to die from uncaught SIGHUP")
+)
 
 // Test that Stop cancels the channel's registrations.
 func TestStop(t *testing.T) {
@@ -300,59 +325,74 @@ func TestStop(t *testing.T) {
 	}
 
 	for _, sig := range sigs {
-		// Send the signal.
-		// If it's SIGWINCH, we should not see it.
-		// If it's SIGHUP, maybe we'll die. Let the flag tell us what to do.
-		if sig == syscall.SIGWINCH || (sig == syscall.SIGHUP && *sendUncaughtSighup == 1) {
-			syscall.Kill(syscall.Getpid(), sig)
-		}
-
-		// The kernel will deliver a signal as a thread returns
-		// from a syscall. If the only active thread is sleeping,
-		// and the system is busy, the kernel may not get around
-		// to waking up a thread to catch the signal.
-		// We try splitting up the sleep to give the kernel
-		// another chance to deliver the signal.
-		time.Sleep(50 * time.Millisecond)
-		time.Sleep(50 * time.Millisecond)
-
-		// Ask for signal
-		c := make(chan os.Signal, 1)
-		Notify(c, sig)
-		defer Stop(c)
-
-		// Send this process that signal
-		syscall.Kill(syscall.Getpid(), sig)
-		waitSig(t, c, sig)
+		sig := sig
+		t.Run(fmt.Sprint(sig), func(t *testing.T) {
+			// When calling Notify with a specific signal,
+			// independent signals should not interfere with each other,
+			// and we end up needing to wait for signals to quiesce a lot.
+			// Test the three different signals concurrently.
+			t.Parallel()
+
+			// If the signal is not ignored, send the signal before registering a
+			// channel to verify the behavior of the default Go handler.
+			// If it's SIGWINCH or SIGUSR1 we should not see it.
+			// If it's SIGHUP, maybe we'll die. Let the flag tell us what to do.
+			mayHaveBlockedSignal := false
+			if !Ignored(sig) && (sig != syscall.SIGHUP || *sendUncaughtSighup == 1) {
+				syscall.Kill(syscall.Getpid(), sig)
+				quiesce()
+
+				// We don't know whether sig is blocked for this process; see
+				// https://golang.org/issue/38165. Assume that it could be.
+				mayHaveBlockedSignal = true
+			}
 
-		Stop(c)
-		time.Sleep(50 * time.Millisecond)
-		select {
-		case s := <-c:
-			t.Fatalf("unexpected signal %v", s)
-		case <-time.After(50 * time.Millisecond):
-			// nothing to read - good
-		}
+			// Ask for signal
+			c := make(chan os.Signal, 1)
+			Notify(c, sig)
 
-		// Send the signal.
-		// If it's SIGWINCH, we should not see it.
-		// If it's SIGHUP, maybe we'll die. Let the flag tell us what to do.
-		if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 {
+			// Send this process the signal again.
 			syscall.Kill(syscall.Getpid(), sig)
-		}
+			waitSig(t, c, sig)
+
+			if mayHaveBlockedSignal {
+				// We may have received a queued initial signal in addition to the one
+				// that we sent after Notify. If so, waitSig may have observed that
+				// initial signal instead of the second one, and we may need to wait for
+				// the second signal to clear. Do that now.
+				quiesce()
+				select {
+				case <-c:
+				default:
+				}
+			}
 
-		time.Sleep(50 * time.Millisecond)
-		select {
-		case s := <-c:
-			t.Fatalf("unexpected signal %v", s)
-		case <-time.After(50 * time.Millisecond):
-			// nothing to read - good
-		}
+			// Stop watching for the signal and send it again.
+			// If it's SIGHUP, maybe we'll die. Let the flag tell us what to do.
+			Stop(c)
+			if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 {
+				syscall.Kill(syscall.Getpid(), sig)
+				quiesce()
+
+				select {
+				case s := <-c:
+					t.Errorf("unexpected signal %v", s)
+				default:
+					// nothing to read - good
+				}
+
+				// If we're going to receive a signal, it has almost certainly been
+				// received by now. However, it may have been blocked for this process —
+				// we don't know. Explicitly unblock it and wait for it to clear now.
+				Notify(c, sig)
+				quiesce()
+				Stop(c)
+			}
+		})
 	}
 }
 
-// Test that when run under nohup, an uncaught SIGHUP does not kill the program,
-// but a
+// Test that when run under nohup, an uncaught SIGHUP does not kill the program.
 func TestNohup(t *testing.T) {
 	// Ugly: ask for SIGHUP so that child will not have no-hup set
 	// even if test is running under nohup environment.
@@ -371,12 +411,38 @@ func TestNohup(t *testing.T) {
 	//
 	// Both should fail without nohup and succeed with nohup.
 
+	var subTimeout time.Duration
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+	if deadline, ok := t.Deadline(); ok {
+		subTimeout = time.Until(deadline)
+		subTimeout -= subTimeout / 10 // Leave 10% headroom for propagating output.
+	}
 	for i := 1; i <= 2; i++ {
-		out, err := exec.Command(os.Args[0], "-test.run=TestStop", "-send_uncaught_sighup="+strconv.Itoa(i)).CombinedOutput()
-		if err == nil {
-			t.Fatalf("ran test with -send_uncaught_sighup=%d and it succeeded: expected failure.\nOutput:\n%s", i, out)
-		}
+		i := i
+		go t.Run(fmt.Sprintf("uncaught-%d", i), func(t *testing.T) {
+			defer wg.Done()
+
+			args := []string{
+				"-test.v",
+				"-test.run=TestStop",
+				"-send_uncaught_sighup=" + strconv.Itoa(i),
+				"-die_from_sighup",
+			}
+			if subTimeout != 0 {
+				args = append(args, fmt.Sprintf("-test.timeout=%v", subTimeout))
+			}
+			out, err := exec.Command(os.Args[0], args...).CombinedOutput()
+
+			if err == nil {
+				t.Errorf("ran test with -send_uncaught_sighup=%d and it succeeded: expected failure.\nOutput:\n%s", i, out)
+			} else {
+				t.Logf("test with -send_uncaught_sighup=%d failed as expected.\nError: %v\nOutput:\n%s", i, err, out)
+			}
+		})
 	}
+	wg.Wait()
 
 	Stop(c)
 
@@ -387,21 +453,46 @@ func TestNohup(t *testing.T) {
 	}
 
 	// Again, this time with nohup, assuming we can find it.
-	_, err := os.Stat("/usr/bin/nohup")
+	_, err := exec.LookPath("nohup")
 	if err != nil {
 		t.Skip("cannot find nohup; skipping second half of test")
 	}
 
+	wg.Add(2)
+	if deadline, ok := t.Deadline(); ok {
+		subTimeout = time.Until(deadline)
+		subTimeout -= subTimeout / 10 // Leave 10% headroom for propagating output.
+	}
 	for i := 1; i <= 2; i++ {
-		os.Remove("nohup.out")
-		out, err := exec.Command("/usr/bin/nohup", os.Args[0], "-test.run=TestStop", "-send_uncaught_sighup="+strconv.Itoa(i)).CombinedOutput()
+		i := i
+		go t.Run(fmt.Sprintf("nohup-%d", i), func(t *testing.T) {
+			defer wg.Done()
 
-		data, _ := ioutil.ReadFile("nohup.out")
-		os.Remove("nohup.out")
-		if err != nil {
-			t.Fatalf("ran test with -send_uncaught_sighup=%d under nohup and it failed: expected success.\nError: %v\nOutput:\n%s%s", i, err, out, data)
-		}
+			// POSIX specifies that nohup writes to a file named nohup.out if standard
+			// output is a terminal. However, for an exec.Command, standard output is
+			// not a terminal — so we don't need to read or remove that file (and,
+			// indeed, cannot even create it if the current user is unable to write to
+			// GOROOT/src, such as when GOROOT is installed and owned by root).
+
+			args := []string{
+				os.Args[0],
+				"-test.v",
+				"-test.run=TestStop",
+				"-send_uncaught_sighup=" + strconv.Itoa(i),
+			}
+			if subTimeout != 0 {
+				args = append(args, fmt.Sprintf("-test.timeout=%v", subTimeout))
+			}
+			out, err := exec.Command("nohup", args...).CombinedOutput()
+
+			if err != nil {
+				t.Errorf("ran test with -send_uncaught_sighup=%d under nohup and it failed: expected success.\nError: %v\nOutput:\n%s", i, err, out)
+			} else {
+				t.Logf("ran test with -send_uncaught_sighup=%d under nohup.\nOutput:\n%s", i, out)
+			}
+		})
 	}
+	wg.Wait()
 }
 
 // Test that SIGCONT works (issue 8953).
@@ -416,7 +507,7 @@ func TestSIGCONT(t *testing.T) {
 // Test race between stopping and receiving a signal (issue 14571).
 func TestAtomicStop(t *testing.T) {
 	if os.Getenv("GO_TEST_ATOMIC_STOP") != "" {
-		atomicStopTestProgram()
+		atomicStopTestProgram(t)
 		t.Fatal("atomicStopTestProgram returned")
 	}
 
@@ -438,8 +529,8 @@ func TestAtomicStop(t *testing.T) {
 	const execs = 10
 	for i := 0; i < execs; i++ {
 		timeout := "0"
-		if !testDeadline.IsZero() {
-			timeout = testDeadline.Sub(time.Now()).String()
+		if deadline, ok := t.Deadline(); ok {
+			timeout = time.Until(deadline).String()
 		}
 		cmd := exec.Command(os.Args[0], "-test.run=TestAtomicStop", "-test.timeout="+timeout)
 		cmd.Env = append(os.Environ(), "GO_TEST_ATOMIC_STOP=1")
@@ -478,7 +569,7 @@ func TestAtomicStop(t *testing.T) {
 // atomicStopTestProgram is run in a subprocess by TestAtomicStop.
 // It tries to trigger a signal delivery race. This function should
 // either catch a signal or die from it.
-func atomicStopTestProgram() {
+func atomicStopTestProgram(t *testing.T) {
 	// This test won't work if SIGINT is ignored here.
 	if Ignored(syscall.SIGINT) {
 		fmt.Println("SIGINT is ignored")
@@ -488,10 +579,10 @@ func atomicStopTestProgram() {
 	const tries = 10
 
 	timeout := 2 * time.Second
-	if !testDeadline.IsZero() {
+	if deadline, ok := t.Deadline(); ok {
 		// Give each try an equal slice of the deadline, with one slice to spare for
 		// cleanup.
-		timeout = testDeadline.Sub(time.Now()) / (tries + 1)
+		timeout = time.Until(deadline) / (tries + 1)
 	}
 
 	pid := syscall.Getpid()
@@ -541,43 +632,45 @@ func TestTime(t *testing.T) {
 		dur = 100 * time.Millisecond
 	}
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4))
-	done := make(chan bool)
-	finished := make(chan bool)
-	go func() {
-		sig := make(chan os.Signal, 1)
-		Notify(sig, syscall.SIGUSR1)
-		defer Stop(sig)
-	Loop:
-		for {
-			select {
-			case <-sig:
-			case <-done:
-				break Loop
-			}
-		}
-		finished <- true
-	}()
+
+	sig := make(chan os.Signal, 1)
+	Notify(sig, syscall.SIGUSR1)
+
+	stop := make(chan struct{})
 	go func() {
-	Loop:
 		for {
 			select {
-			case <-done:
-				break Loop
+			case <-stop:
+				// Allow enough time for all signals to be delivered before we stop
+				// listening for them.
+				quiesce()
+				Stop(sig)
+				// According to its documentation, “[w]hen Stop returns, it in
+				// guaranteed that c will receive no more signals.” So we can safely
+				// close sig here: if there is a send-after-close race, that is a bug in
+				// Stop and we would like to detect it.
+				close(sig)
+				return
+
 			default:
 				syscall.Kill(syscall.Getpid(), syscall.SIGUSR1)
 				runtime.Gosched()
 			}
 		}
-		finished <- true
 	}()
+
+	done := make(chan struct{})
+	go func() {
+		for range sig {
+			// Receive signals until the sender closes sig.
+		}
+		close(done)
+	}()
+
 	t0 := time.Now()
 	for t1 := t0; t1.Sub(t0) < dur; t1 = time.Now() {
 	} // hammering on getting time
-	close(done)
-	<-finished
-	<-finished
-	// When run with 'go test -cpu=1,2,4' SIGUSR1 from this test can slip
-	// into subsequent TestSignal() causing failure.
-	// Sleep for a while to reduce the possibility of the failure.
-	time.Sleep(10 * time.Millisecond)
+
+	close(stop)
+	<-done
 }