diff options
author | Ian Lance Taylor <iant@golang.org> | 2020-02-05 14:33:27 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-02-15 09:14:10 -0800 |
commit | 0b3c2eed35d608d6541ecf004a9576b4eae0b4ef (patch) | |
tree | c92c05d53eb054d8085d069800f4e9b586fef5a3 /libgo/go/runtime | |
parent | 17edb3310d8ce9d5f6c9e53f6c1f7d611c2a5a41 (diff) | |
download | gcc-0b3c2eed35d608d6541ecf004a9576b4eae0b4ef.zip gcc-0b3c2eed35d608d6541ecf004a9576b4eae0b4ef.tar.gz gcc-0b3c2eed35d608d6541ecf004a9576b4eae0b4ef.tar.bz2 |
libgo: update to Go1.14rc1 release
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/218017
Diffstat (limited to 'libgo/go/runtime')
35 files changed, 664 insertions, 319 deletions
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go index 549e566..ec8252b 100644 --- a/libgo/go/runtime/chan.go +++ b/libgo/go/runtime/chan.go @@ -133,21 +133,6 @@ func chanbuf(c *hchan, i uint) unsafe.Pointer { return add(c.buf, uintptr(i)*uintptr(c.elemsize)) } -// full reports whether a send on c would block (that is, the channel is full). -// It uses a single word-sized read of mutable state, so although -// the answer is instantaneously true, the correct answer may have changed -// by the time the calling function receives the return value. -func full(c *hchan) bool { - // c.dataqsiz is immutable (never written after the channel is created) - // so it is safe to read at any time during channel operation. - if c.dataqsiz == 0 { - // Assumes that a pointer read is relaxed-atomic. - return c.recvq.first == nil - } - // Assumes that a uint read is relaxed-atomic. - return c.qcount == c.dataqsiz -} - // entry point for c <- x from compiled code //go:nosplit func chansend1(c *hchan, elem unsafe.Pointer) { @@ -192,7 +177,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { // // After observing that the channel is not closed, we observe that the channel is // not ready for sending. Each of these observations is a single word-sized read - // (first c.closed and second full()). + // (first c.closed and second c.recvq.first or c.qcount depending on kind of channel). // Because a closed channel cannot transition from 'ready for sending' to // 'not ready for sending', even if the channel is closed between the two observations, // they imply a moment between the two when the channel was both not yet closed @@ -201,10 +186,9 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { // // It is okay if the reads are reordered here: if we observe that the channel is not // ready for sending and then observe that it is not closed, that implies that the - // channel wasn't closed during the first observation. However, nothing here - // guarantees forward progress. We rely on the side effects of lock release in - // chanrecv() and closechan() to update this thread's view of c.closed and full(). - if !block && c.closed == 0 && full(c) { + // channel wasn't closed during the first observation. + if !block && c.closed == 0 && ((c.dataqsiz == 0 && c.recvq.first == nil) || + (c.dataqsiz > 0 && c.qcount == c.dataqsiz)) { return false } @@ -434,16 +418,6 @@ func closechan(c *hchan) { } } -// empty reports whether a read from c would block (that is, the channel is -// empty). It uses a single atomic read of mutable state. -func empty(c *hchan) bool { - // c.dataqsiz is immutable. - if c.dataqsiz == 0 { - return atomic.Loadp(unsafe.Pointer(&c.sendq.first)) == nil - } - return atomic.Loaduint(&c.qcount) == 0 -} - // entry points for <- c from compiled code //go:nosplit func chanrecv1(c *hchan, elem unsafe.Pointer) { @@ -484,33 +458,21 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } // Fast path: check for failed non-blocking operation without acquiring the lock. - if !block && empty(c) { - // After observing that the channel is not ready for receiving, we observe whether the - // channel is closed. - // - // Reordering of these checks could lead to incorrect behavior when racing with a close. - // For example, if the channel was open and not empty, was closed, and then drained, - // reordered reads could incorrectly indicate "open and empty". To prevent reordering, - // we use atomic loads for both checks, and rely on emptying and closing to happen in - // separate critical sections under the same lock. This assumption fails when closing - // an unbuffered channel with a blocked send, but that is an error condition anyway. - if atomic.Load(&c.closed) == 0 { - // Because a channel cannot be reopened, the later observation of the channel - // being not closed implies that it was also not closed at the moment of the - // first observation. We behave as if we observed the channel at that moment - // and report that the receive cannot proceed. - return - } - // The channel is irreversibly closed. Re-check whether the channel has any pending data - // to receive, which could have arrived between the empty and closed checks above. - // Sequential consistency is also required here, when racing with such a send. - if empty(c) { - // The channel is irreversibly closed and empty. - if ep != nil { - typedmemclr(c.elemtype, ep) - } - return true, false - } + // + // After observing that the channel is not ready for receiving, we observe that the + // channel is not closed. Each of these observations is a single word-sized read + // (first c.sendq.first or c.qcount, and second c.closed). + // Because a channel cannot be reopened, the later observation of the channel + // being not closed implies that it was also not closed at the moment of the + // first observation. We behave as if we observed the channel at that moment + // and report that the receive cannot proceed. + // + // The order of operations is important here: reversing the operations can lead to + // incorrect behavior when racing with a close. + if !block && (c.dataqsiz == 0 && c.sendq.first == nil || + c.dataqsiz > 0 && atomic.Loaduint(&c.qcount) == 0) && + atomic.Load(&c.closed) == 0 { + return } var t0 int64 diff --git a/libgo/go/runtime/chan_test.go b/libgo/go/runtime/chan_test.go index ac81d40..c194781 100644 --- a/libgo/go/runtime/chan_test.go +++ b/libgo/go/runtime/chan_test.go @@ -1132,20 +1132,6 @@ func BenchmarkChanPopular(b *testing.B) { wg.Wait() } -func BenchmarkChanClosed(b *testing.B) { - c := make(chan struct{}) - close(c) - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - select { - case <-c: - default: - b.Error("Unreachable") - } - } - }) -} - var ( alwaysFalse = false workSink = 0 diff --git a/libgo/go/runtime/checkptr.go b/libgo/go/runtime/checkptr.go index f478ddd..974f0a0 100644 --- a/libgo/go/runtime/checkptr.go +++ b/libgo/go/runtime/checkptr.go @@ -8,45 +8,22 @@ package runtime import "unsafe" -type ptrAlignError struct { - ptr unsafe.Pointer - elem *_type - n uintptr -} - -func (e ptrAlignError) RuntimeError() {} - -func (e ptrAlignError) Error() string { - return "runtime error: unsafe pointer conversion" -} - func checkptrAlignment(p unsafe.Pointer, elem *_type, n uintptr) { // Check that (*[n]elem)(p) is appropriately aligned. // TODO(mdempsky): What about fieldAlign? if uintptr(p)&(uintptr(elem.align)-1) != 0 { - panic(ptrAlignError{p, elem, n}) + throw("checkptr: unsafe pointer conversion") } // Check that (*[n]elem)(p) doesn't straddle multiple heap objects. if size := n * elem.size; size > 1 && checkptrBase(p) != checkptrBase(add(p, size-1)) { - panic(ptrAlignError{p, elem, n}) + throw("checkptr: unsafe pointer conversion") } } -type ptrArithError struct { - ptr unsafe.Pointer - originals []unsafe.Pointer -} - -func (e ptrArithError) RuntimeError() {} - -func (e ptrArithError) Error() string { - return "runtime error: unsafe pointer arithmetic" -} - func checkptrArithmetic(p unsafe.Pointer, originals []unsafe.Pointer) { if 0 < uintptr(p) && uintptr(p) < minLegalPointer { - panic(ptrArithError{p, originals}) + throw("checkptr: unsafe pointer arithmetic") } // Check that if the computed pointer p points into a heap @@ -63,7 +40,7 @@ func checkptrArithmetic(p unsafe.Pointer, originals []unsafe.Pointer) { } } - panic(ptrArithError{p, originals}) + throw("checkptr: unsafe pointer arithmetic") } // checkptrBase returns the base address for the allocation containing diff --git a/libgo/go/runtime/checkptr_test.go b/libgo/go/runtime/checkptr_test.go new file mode 100644 index 0000000..ab3058f --- /dev/null +++ b/libgo/go/runtime/checkptr_test.go @@ -0,0 +1,50 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "internal/testenv" + "os/exec" + "runtime" + "strings" + "testing" +) + +func TestCheckPtr(t *testing.T) { + if runtime.Compiler == "gccgo" { + t.Skip("gccgo does not have -d=checkptr") + } + t.Parallel() + testenv.MustHaveGoRun(t) + + exe, err := buildTestProg(t, "testprog", "-gcflags=all=-d=checkptr=1") + if err != nil { + t.Fatal(err) + } + + testCases := []struct { + cmd string + want string + }{ + {"CheckPtrAlignment", "fatal error: checkptr: unsafe pointer conversion\n"}, + {"CheckPtrArithmetic", "fatal error: checkptr: unsafe pointer arithmetic\n"}, + {"CheckPtrSize", "fatal error: checkptr: unsafe pointer conversion\n"}, + {"CheckPtrSmall", "fatal error: checkptr: unsafe pointer arithmetic\n"}, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.cmd, func(t *testing.T) { + t.Parallel() + got, err := testenv.CleanCmdEnv(exec.Command(exe, tc.cmd)).CombinedOutput() + if err != nil { + t.Log(err) + } + if !strings.HasPrefix(string(got), tc.want) { + t.Errorf("output:\n%s\n\nwant output starting with: %s", got, tc.want) + } + }) + } +} diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go index 1202e36..e480466 100644 --- a/libgo/go/runtime/debug.go +++ b/libgo/go/runtime/debug.go @@ -26,12 +26,12 @@ func GOMAXPROCS(n int) int { return ret } - stopTheWorldGC("GOMAXPROCS") + stopTheWorld("GOMAXPROCS") // newprocs will be processed by startTheWorld newprocs = int32(n) - startTheWorldGC() + startTheWorld() return ret } diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index 9a977d8..b60c19b 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -45,6 +45,9 @@ var NetpollGenericInit = netpollGenericInit var ParseRelease = parseRelease +var Memmove = memmove +var MemclrNoHeapPointers = memclrNoHeapPointers + const PreemptMSupported = preemptMSupported type LFNode struct { @@ -573,6 +576,7 @@ const ( PageSize = pageSize PallocChunkPages = pallocChunkPages PageAlloc64Bit = pageAlloc64Bit + PallocSumBytes = pallocSumBytes ) // Expose pallocSum for testing. diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go index e2e601f..96af606 100644 --- a/libgo/go/runtime/extern.go +++ b/libgo/go/runtime/extern.go @@ -78,21 +78,6 @@ It is a comma-separated list of name=val pairs setting these named variables: If the line ends with "(forced)", this GC was forced by a runtime.GC() call. - Setting gctrace to any value > 0 also causes the garbage collector - to emit a summary when memory is released back to the system. - This process of returning memory to the system is called scavenging. - The format of this summary is subject to change. - Currently it is: - scvg#: # MB released printed only if non-zero - scvg#: inuse: # idle: # sys: # released: # consumed: # (MB) - where the fields are as follows: - scvg# the scavenge cycle number, incremented at each scavenge - inuse: # MB used or partially used spans - idle: # MB spans pending scavenging - sys: # MB mapped from the system - released: # MB released to the system - consumed: # MB allocated from the system - madvdontneed: setting madvdontneed=1 will use MADV_DONTNEED instead of MADV_FREE on Linux when returning memory to the kernel. This is less efficient, but causes RSS numbers to drop @@ -112,6 +97,19 @@ It is a comma-separated list of name=val pairs setting these named variables: scavenge: scavenge=1 enables debugging mode of heap scavenger. + scavtrace: setting scavtrace=1 causes the runtime to emit a single line to standard + error, roughly once per GC cycle, summarizing the amount of work done by the + scavenger as well as the total amount of memory returned to the operating system + and an estimate of physical memory utilization. The format of this line is subject + to change, but currently it is: + scav # KiB work, # KiB total, #% util + where the fields are as follows: + # KiB work the amount of memory returned to the OS since the last scav line + # KiB total how much of the heap at this point in time has been released to the OS + #% util the fraction of all unscavenged memory which is in-use + If the line ends with "(forced)", then scavenging was forced by a + debug.FreeOSMemory() call. + scheddetail: setting schedtrace=X and scheddetail=1 causes the scheduler to emit detailed multiline info every X milliseconds, describing state of the scheduler, processors, threads and goroutines. diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go index fc24f04..ddbe5dd 100644 --- a/libgo/go/runtime/gcinfo_test.go +++ b/libgo/go/runtime/gcinfo_test.go @@ -165,7 +165,7 @@ func infoBigStruct() []byte { typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64 typePointer, typeScalar, // i string } - case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x", "wasm": + case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "riscv64", "s390x", "wasm": return []byte{ typePointer, // q *int typeScalar, typeScalar, typeScalar, // w byte; e [17]byte diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go index cff663a..704bbe6 100644 --- a/libgo/go/runtime/hash64.go +++ b/libgo/go/runtime/hash64.go @@ -6,7 +6,7 @@ // xxhash: https://code.google.com/p/xxhash/ // cityhash: https://code.google.com/p/cityhash/ -// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x wasm alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64 +// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64 package runtime diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go index de40a00..af9e7d1 100644 --- a/libgo/go/runtime/lfstack_64bit.go +++ b/libgo/go/runtime/lfstack_64bit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x wasm arm64be alpha sparc64 ia64 riscv64 +// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sparc64 ia64 package runtime diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go index fda2273..35ace7f 100644 --- a/libgo/go/runtime/malloc.go +++ b/libgo/go/runtime/malloc.go @@ -513,6 +513,7 @@ func mallocinit() { // allocation at 0x40 << 32 because when using 4k pages with 3-level // translation buffers, the user address space is limited to 39 bits // On darwin/arm64, the address space is even smaller. + // // On AIX, mmaps starts at 0x0A00000000000000 for 64-bit. // processes. for i := 0x7f; i >= 0; i-- { diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go index bd30bc1..45555ee 100644 --- a/libgo/go/runtime/malloc_test.go +++ b/libgo/go/runtime/malloc_test.go @@ -206,14 +206,6 @@ type acLink struct { var arenaCollisionSink []*acLink func TestArenaCollision(t *testing.T) { - if GOOS == "darwin" && race.Enabled { - // Skip this test on Darwin in race mode because Darwin 10.10 has - // issues following arena hints and runs out of them in race mode, so - // MAP_FIXED is used to ensure we keep the heap in the memory region the - // race detector expects. - // TODO(mknyszek): Delete this when Darwin 10.10 is no longer supported. - t.Skip("disabled on Darwin with race mode since MAP_FIXED is used") - } testenv.MustHaveExec(t) // Test that mheap.sysAlloc handles collisions with other diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go index 0b2e191..396c130 100644 --- a/libgo/go/runtime/memmove_test.go +++ b/libgo/go/runtime/memmove_test.go @@ -11,7 +11,9 @@ import ( "internal/race" "internal/testenv" . "runtime" + "sync/atomic" "testing" + "unsafe" ) func TestMemmove(t *testing.T) { @@ -206,6 +208,71 @@ func cmpb(a, b []byte) int { return l } +// Ensure that memmove writes pointers atomically, so the GC won't +// observe a partially updated pointer. +func TestMemmoveAtomicity(t *testing.T) { + if race.Enabled { + t.Skip("skip under the race detector -- this test is intentionally racy") + } + + var x int + + for _, backward := range []bool{true, false} { + for _, n := range []int{3, 4, 5, 6, 7, 8, 9, 10, 15, 25, 49} { + n := n + + // test copying [N]*int. + sz := uintptr(n * PtrSize) + name := fmt.Sprint(sz) + if backward { + name += "-backward" + } else { + name += "-forward" + } + t.Run(name, func(t *testing.T) { + // Use overlapping src and dst to force forward/backward copy. + var s [100]*int + src := s[n-1 : 2*n-1] + dst := s[:n] + if backward { + src, dst = dst, src + } + for i := range src { + src[i] = &x + } + for i := range dst { + dst[i] = nil + } + + var ready uint32 + go func() { + sp := unsafe.Pointer(&src[0]) + dp := unsafe.Pointer(&dst[0]) + atomic.StoreUint32(&ready, 1) + for i := 0; i < 10000; i++ { + Memmove(dp, sp, sz) + MemclrNoHeapPointers(dp, sz) + } + atomic.StoreUint32(&ready, 2) + }() + + for atomic.LoadUint32(&ready) == 0 { + Gosched() + } + + for atomic.LoadUint32(&ready) != 2 { + for i := range dst { + p := dst[i] + if p != nil && p != &x { + t.Fatalf("got partially updated pointer %p at dst[%d], want either nil or %p", p, i, &x) + } + } + } + }) + } + } +} + func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) { for _, n := range sizes { b.Run(fmt.Sprint(n), func(b *testing.B) { diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go index b0040f9..8ded306 100644 --- a/libgo/go/runtime/mgc.go +++ b/libgo/go/runtime/mgc.go @@ -1271,7 +1271,6 @@ func gcStart(trigger gcTrigger) { } // Ok, we're doing it! Stop everybody else - semacquire(&gcsema) semacquire(&worldsema) if trace.enabled { @@ -1370,13 +1369,6 @@ func gcStart(trigger gcTrigger) { work.pauseNS += now - work.pauseStart work.tMark = now }) - - // Release the world sema before Gosched() in STW mode - // because we will need to reacquire it later but before - // this goroutine becomes runnable again, and we could - // self-deadlock otherwise. - semrelease(&worldsema) - // In STW mode, we could block the instant systemstack // returns, so don't do anything important here. Make sure we // block rather than returning to user code. @@ -1446,10 +1438,6 @@ top: return } - // forEachP needs worldsema to execute, and we'll need it to - // stop the world later, so acquire worldsema now. - semacquire(&worldsema) - // Flush all local buffers and collect flushedWork flags. gcMarkDoneFlushed = 0 systemstack(func() { @@ -1510,7 +1498,6 @@ top: // work to do. Keep going. It's possible the // transition condition became true again during the // ragged barrier, so re-check it. - semrelease(&worldsema) goto top } @@ -1587,7 +1574,6 @@ top: now := startTheWorldWithSema(true) work.pauseNS += now - work.pauseStart }) - semrelease(&worldsema) goto top } } @@ -1802,7 +1788,6 @@ func gcMarkTermination(nextTriggerRatio float64) { } semrelease(&worldsema) - semrelease(&gcsema) // Careful: another GC cycle may start now. releasem(mp) diff --git a/libgo/go/runtime/mgcscavenge.go b/libgo/go/runtime/mgcscavenge.go index f3856db..3b60b3d 100644 --- a/libgo/go/runtime/mgcscavenge.go +++ b/libgo/go/runtime/mgcscavenge.go @@ -80,6 +80,17 @@ const ( // maxPagesPerPhysPage is the maximum number of supported runtime pages per // physical page, based on maxPhysPageSize. maxPagesPerPhysPage = maxPhysPageSize / pageSize + + // scavengeCostRatio is the approximate ratio between the costs of using previously + // scavenged memory and scavenging memory. + // + // For most systems the cost of scavenging greatly outweighs the costs + // associated with using scavenged memory, making this constant 0. On other systems + // (especially ones where "sysUsed" is not just a no-op) this cost is non-trivial. + // + // This ratio is used as part of multiplicative factor to help the scavenger account + // for the additional costs of using scavenged memory in its pacing. + scavengeCostRatio = 0.7 * sys.GoosDarwin ) // heapRetained returns an estimate of the current heap RSS. @@ -248,7 +259,7 @@ func bgscavenge(c chan int) { released := uintptr(0) // Time in scavenging critical section. - crit := int64(0) + crit := float64(0) // Run on the system stack since we grab the heap lock, // and a stack growth with the heap lock means a deadlock. @@ -266,16 +277,10 @@ func bgscavenge(c chan int) { // Scavenge one page, and measure the amount of time spent scavenging. start := nanotime() released = mheap_.pages.scavengeOne(physPageSize, false) - crit = nanotime() - start + atomic.Xadduintptr(&mheap_.pages.scavReleased, released) + crit = float64(nanotime() - start) }) - if debug.gctrace > 0 { - if released > 0 { - print("scvg: ", released>>10, " KB released\n") - } - print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") - } - if released == 0 { lock(&scavenge.lock) scavenge.parked = true @@ -283,6 +288,14 @@ func bgscavenge(c chan int) { continue } + // Multiply the critical time by 1 + the ratio of the costs of using + // scavenged memory vs. scavenging memory. This forces us to pay down + // the cost of reusing this memory eagerly by sleeping for a longer period + // of time and scavenging less frequently. More concretely, we avoid situations + // where we end up scavenging so often that we hurt allocation performance + // because of the additional overheads of using scavenged memory. + crit *= 1 + scavengeCostRatio + // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone // put their machine to sleep) in the critical section, bound the time we use to // calculate at 10 ms to avoid letting the sleep time get arbitrarily high. @@ -298,13 +311,13 @@ func bgscavenge(c chan int) { // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time // down. adjust := scavengeEWMA / idealFraction - sleepTime := int64(adjust * float64(crit) / (scavengePercent / 100.0)) + sleepTime := int64(adjust * crit / (scavengePercent / 100.0)) // Go to sleep. slept := scavengeSleep(sleepTime) // Compute the new ratio. - fraction := float64(crit) / float64(crit+slept) + fraction := crit / (crit + float64(slept)) // Set a lower bound on the fraction. // Due to OS-related anomalies we may "sleep" for an inordinate amount @@ -348,12 +361,39 @@ func (s *pageAlloc) scavenge(nbytes uintptr, locked bool) uintptr { return released } +// printScavTrace prints a scavenge trace line to standard error. +// +// released should be the amount of memory released since the last time this +// was called, and forced indicates whether the scavenge was forced by the +// application. +func printScavTrace(released uintptr, forced bool) { + printlock() + print("scav ", + released>>10, " KiB work, ", + atomic.Load64(&memstats.heap_released)>>10, " KiB total, ", + (atomic.Load64(&memstats.heap_inuse)*100)/heapRetained(), "% util", + ) + if forced { + print(" (forced)") + } + println() + printunlock() +} + // resetScavengeAddr sets the scavenge start address to the top of the heap's // address space. This should be called each time the scavenger's pacing // changes. // // s.mheapLock must be held. func (s *pageAlloc) resetScavengeAddr() { + released := atomic.Loaduintptr(&s.scavReleased) + if debug.scavtrace > 0 { + printScavTrace(released, false) + } + // Subtract from scavReleased instead of just setting it to zero because + // the scavenger could have increased scavReleased concurrently with the + // load above, and we may miss an update by just blindly zeroing the field. + atomic.Xadduintptr(&s.scavReleased, -released) s.scavAddr = chunkBase(s.end) - 1 } @@ -415,7 +455,10 @@ func (s *pageAlloc) scavengeOne(max uintptr, locked bool) uintptr { // Check the chunk containing the scav addr, starting at the addr // and see if there are any free and unscavenged pages. - if s.summary[len(s.summary)-1][ci].max() >= uint(minPages) { + // + // Only check this if s.scavAddr is covered by any address range + // in s.inUse, so that we know our check of the summary is safe. + if s.inUse.contains(s.scavAddr) && s.summary[len(s.summary)-1][ci].max() >= uint(minPages) { // We only bother looking for a candidate if there at least // minPages free pages at all. It's important that we only // continue if the summary says we can because that's how diff --git a/libgo/go/runtime/mgcscavenge_test.go b/libgo/go/runtime/mgcscavenge_test.go index 518d5ab..58f9e3a 100644 --- a/libgo/go/runtime/mgcscavenge_test.go +++ b/libgo/go/runtime/mgcscavenge_test.go @@ -272,6 +272,9 @@ func TestPallocDataFindScavengeCandidate(t *testing.T) { // Tests end-to-end scavenging on a pageAlloc. func TestPageAllocScavenge(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } type test struct { request, expect uintptr } @@ -279,12 +282,13 @@ func TestPageAllocScavenge(t *testing.T) { if minPages < 1 { minPages = 1 } - tests := map[string]struct { + type setup struct { beforeAlloc map[ChunkIdx][]BitRange beforeScav map[ChunkIdx][]BitRange expect []test afterScav map[ChunkIdx][]BitRange - }{ + } + tests := map[string]setup{ "AllFreeUnscavExhaust": { beforeAlloc: map[ChunkIdx][]BitRange{ BaseChunkIdx: {}, @@ -393,6 +397,26 @@ func TestPageAllocScavenge(t *testing.T) { }, }, } + if PageAlloc64Bit != 0 { + tests["ScavAllVeryDiscontiguous"] = setup{ + beforeAlloc: map[ChunkIdx][]BitRange{ + BaseChunkIdx: {}, + BaseChunkIdx + 0x1000: {}, + }, + beforeScav: map[ChunkIdx][]BitRange{ + BaseChunkIdx: {}, + BaseChunkIdx + 0x1000: {}, + }, + expect: []test{ + {^uintptr(0), 2 * PallocChunkPages * PageSize}, + {^uintptr(0), 0}, + }, + afterScav: map[ChunkIdx][]BitRange{ + BaseChunkIdx: {{0, PallocChunkPages}}, + BaseChunkIdx + 0x1000: {{0, PallocChunkPages}}, + }, + } + } for name, v := range tests { v := v runTest := func(t *testing.T, locked bool) { diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go index f40589a..c40c9e2 100644 --- a/libgo/go/runtime/mheap.go +++ b/libgo/go/runtime/mheap.go @@ -70,7 +70,7 @@ type mheap struct { // on the swept stack. sweepSpans [2]gcSweepBuf - _ uint32 // align uint64 fields on 32-bit for atomics + // _ uint32 // align uint64 fields on 32-bit for atomics // Proportional sweep // @@ -786,7 +786,9 @@ func (h *mheap) reclaim(npage uintptr) { // reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n). // It returns the number of pages returned to the heap. // -// h.lock must be held and the caller must be non-preemptible. +// h.lock must be held and the caller must be non-preemptible. Note: h.lock may be +// temporarily unlocked and re-locked in order to do sweeping or if tracing is +// enabled. func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { // The heap lock must be held because this accesses the // heapArena.spans arrays using potentially non-live pointers. @@ -842,8 +844,10 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { n -= uintptr(len(inUse) * 8) } if trace.enabled { + unlock(&h.lock) // Account for pages scanned but not reclaimed. traceGCSweepSpan((n0 - nFreed) * pageSize) + lock(&h.lock) } return nFreed } @@ -1430,11 +1434,8 @@ func (h *mheap) scavengeAll() { unlock(&h.lock) gp.m.mallocing-- - if debug.gctrace > 0 { - if released > 0 { - print("forced scvg: ", released>>20, " MB released\n") - } - print("forced scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") + if debug.scavtrace > 0 { + printScavTrace(released, true) } } diff --git a/libgo/go/runtime/mkpreempt.go b/libgo/go/runtime/mkpreempt.go index 615ec18..64e2207 100644 --- a/libgo/go/runtime/mkpreempt.go +++ b/libgo/go/runtime/mkpreempt.go @@ -83,6 +83,7 @@ var arches = map[string]func(){ "mips64x": func() { genMIPS(true) }, "mipsx": func() { genMIPS(false) }, "ppc64x": genPPC64, + "riscv64": genRISCV64, "s390x": genS390X, "wasm": genWasm, } @@ -478,6 +479,11 @@ func genPPC64() { p("JMP (CTR)") } +func genRISCV64() { + p("// No async preemption on riscv64 - see issue 36711") + p("UNDEF") +} + func genS390X() { // Add integer registers R0-R12 // R13 (g), R14 (LR), R15 (SP) are special, and not saved here. diff --git a/libgo/go/runtime/mpagealloc.go b/libgo/go/runtime/mpagealloc.go index 572e6a9..bb751f1 100644 --- a/libgo/go/runtime/mpagealloc.go +++ b/libgo/go/runtime/mpagealloc.go @@ -225,7 +225,9 @@ type pageAlloc struct { // the bitmaps align better on zero-values. chunks [1 << pallocChunksL1Bits]*[1 << pallocChunksL2Bits]pallocData - // The address to start an allocation search with. + // The address to start an allocation search with. It must never + // point to any memory that is not contained in inUse, i.e. + // inUse.contains(searchAddr) must always be true. // // When added with arenaBaseOffset, we guarantee that // all valid heap addresses (when also added with @@ -237,9 +239,15 @@ type pageAlloc struct { // space on architectures with segmented address spaces. searchAddr uintptr - // The address to start a scavenge candidate search with. + // The address to start a scavenge candidate search with. It + // need not point to memory contained in inUse. scavAddr uintptr + // The amount of memory scavenged since the last scavtrace print. + // + // Read and updated atomically. + scavReleased uintptr + // start and end represent the chunk indices // which pageAlloc knows about. It assumes // chunks in the range [start, end) are diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go index dd44da1..385b7b3 100644 --- a/libgo/go/runtime/mpagealloc_64bit.go +++ b/libgo/go/runtime/mpagealloc_64bit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le s390x arm64be alpha sparc64 ia64 riscv64 +// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64 // See mpagealloc_32bit.go for why darwin/arm64 is excluded here. diff --git a/libgo/go/runtime/mpagealloc_test.go b/libgo/go/runtime/mpagealloc_test.go index 6c48296..89a4a25 100644 --- a/libgo/go/runtime/mpagealloc_test.go +++ b/libgo/go/runtime/mpagealloc_test.go @@ -41,6 +41,9 @@ func checkPageAlloc(t *testing.T, want, got *PageAlloc) { } func TestPageAllocGrow(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } type test struct { chunks []ChunkIdx inUse []AddrRange @@ -216,15 +219,19 @@ func TestPageAllocGrow(t *testing.T) { } func TestPageAllocAlloc(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } type hit struct { npages, base, scav uintptr } - tests := map[string]struct { + type test struct { scav map[ChunkIdx][]BitRange before map[ChunkIdx][]BitRange after map[ChunkIdx][]BitRange hits []hit - }{ + } + tests := map[string]test{ "AllFree1": { before: map[ChunkIdx][]BitRange{ BaseChunkIdx: {}, @@ -365,7 +372,6 @@ func TestPageAllocAlloc(t *testing.T) { BaseChunkIdx: {{0, 195}}, }, }, - // TODO(mknyszek): Add tests close to the chunk size. "ExhaustPallocChunkPages-3": { before: map[ChunkIdx][]BitRange{ BaseChunkIdx: {}, @@ -565,6 +571,48 @@ func TestPageAllocAlloc(t *testing.T) { }, }, } + if PageAlloc64Bit != 0 { + const chunkIdxBigJump = 0x100000 // chunk index offset which translates to O(TiB) + + // This test attempts to trigger a bug wherein we look at unmapped summary + // memory that isn't just in the case where we exhaust the heap. + // + // It achieves this by placing a chunk such that its summary will be + // at the very end of a physical page. It then also places another chunk + // much further up in the address space, such that any allocations into the + // first chunk do not exhaust the heap and the second chunk's summary is not in the + // page immediately adjacent to the first chunk's summary's page. + // Allocating into this first chunk to exhaustion and then into the second + // chunk may then trigger a check in the allocator which erroneously looks at + // unmapped summary memory and crashes. + + // Figure out how many chunks are in a physical page, then align BaseChunkIdx + // to a physical page in the chunk summary array. Here we only assume that + // each summary array is aligned to some physical page. + sumsPerPhysPage := ChunkIdx(PhysPageSize / PallocSumBytes) + baseChunkIdx := BaseChunkIdx &^ (sumsPerPhysPage - 1) + tests["DiscontiguousMappedSumBoundary"] = test{ + before: map[ChunkIdx][]BitRange{ + baseChunkIdx + sumsPerPhysPage - 1: {}, + baseChunkIdx + chunkIdxBigJump: {}, + }, + scav: map[ChunkIdx][]BitRange{ + baseChunkIdx + sumsPerPhysPage - 1: {}, + baseChunkIdx + chunkIdxBigJump: {}, + }, + hits: []hit{ + {PallocChunkPages - 1, PageBase(baseChunkIdx+sumsPerPhysPage-1, 0), 0}, + {1, PageBase(baseChunkIdx+sumsPerPhysPage-1, PallocChunkPages-1), 0}, + {1, PageBase(baseChunkIdx+chunkIdxBigJump, 0), 0}, + {PallocChunkPages - 1, PageBase(baseChunkIdx+chunkIdxBigJump, 1), 0}, + {1, 0, 0}, + }, + after: map[ChunkIdx][]BitRange{ + baseChunkIdx + sumsPerPhysPage - 1: {{0, PallocChunkPages}}, + baseChunkIdx + chunkIdxBigJump: {{0, PallocChunkPages}}, + }, + } + } for name, v := range tests { v := v t.Run(name, func(t *testing.T) { @@ -589,6 +637,9 @@ func TestPageAllocAlloc(t *testing.T) { } func TestPageAllocExhaust(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } for _, npages := range []uintptr{1, 2, 3, 4, 5, 8, 16, 64, 1024, 1025, 2048, 2049} { npages := npages t.Run(fmt.Sprintf("%d", npages), func(t *testing.T) { @@ -638,6 +689,9 @@ func TestPageAllocExhaust(t *testing.T) { } func TestPageAllocFree(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } tests := map[string]struct { before map[ChunkIdx][]BitRange after map[ChunkIdx][]BitRange @@ -867,6 +921,9 @@ func TestPageAllocFree(t *testing.T) { } func TestPageAllocAllocAndFree(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } type hit struct { alloc bool npages uintptr diff --git a/libgo/go/runtime/mpagecache_test.go b/libgo/go/runtime/mpagecache_test.go index 6fdaa04..b8cc0bd 100644 --- a/libgo/go/runtime/mpagecache_test.go +++ b/libgo/go/runtime/mpagecache_test.go @@ -180,6 +180,9 @@ func TestPageCacheAlloc(t *testing.T) { } func TestPageCacheFlush(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } bits64ToBitRanges := func(bits uint64, base uint) []BitRange { var ranges []BitRange start, size := uint(0), uint(0) @@ -254,6 +257,9 @@ func TestPageCacheFlush(t *testing.T) { } func TestPageAllocAllocToCache(t *testing.T) { + if GOOS == "openbsd" && testing.Short() { + t.Skip("skipping because virtual memory is limited; see #36210") + } tests := map[string]struct { before map[ChunkIdx][]BitRange scav map[ChunkIdx][]BitRange diff --git a/libgo/go/runtime/mpallocbits.go b/libgo/go/runtime/mpallocbits.go index 9d01ff8..a801134 100644 --- a/libgo/go/runtime/mpallocbits.go +++ b/libgo/go/runtime/mpallocbits.go @@ -202,17 +202,11 @@ func (b *pallocBits) summarize() pallocSum { // If find fails to find any free space, it returns an index of ^uint(0) and // the new searchIdx should be ignored. // -// The returned searchIdx is always the index of the first free page found -// in this bitmap during the search, except if npages == 1, in which -// case it will be the index just after the first free page, because the -// index returned as the first result is assumed to be allocated and so -// represents a minor optimization for that case. +// Note that if npages == 1, the two returned values will always be identical. func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) { if npages == 1 { addr := b.find1(searchIdx) - // Return a searchIdx of addr + 1 since we assume addr will be - // allocated. - return addr, addr + 1 + return addr, addr } else if npages <= 64 { return b.findSmallN(npages, searchIdx) } diff --git a/libgo/go/runtime/mranges.go b/libgo/go/runtime/mranges.go index c14e5c7..b133851 100644 --- a/libgo/go/runtime/mranges.go +++ b/libgo/go/runtime/mranges.go @@ -29,6 +29,11 @@ func (a addrRange) size() uintptr { return a.limit - a.base } +// contains returns whether or not the range contains a given address. +func (a addrRange) contains(addr uintptr) bool { + return addr >= a.base && addr < a.limit +} + // subtract takes the addrRange toPrune and cuts out any overlap with // from, then returns the new range. subtract assumes that a and b // either don't overlap at all, only overlap on one side, or are equal. @@ -87,6 +92,15 @@ func (a *addrRanges) findSucc(base uintptr) int { return len(a.ranges) } +// contains returns true if a covers the address addr. +func (a *addrRanges) contains(addr uintptr) bool { + i := a.findSucc(addr) + if i == 0 { + return false + } + return a.ranges[i-1].contains(addr) +} + // add inserts a new address range to a. // // r must not overlap with any address range in a. diff --git a/libgo/go/runtime/preempt_nonwindows.go b/libgo/go/runtime/preempt_nonwindows.go new file mode 100644 index 0000000..3066a152 --- /dev/null +++ b/libgo/go/runtime/preempt_nonwindows.go @@ -0,0 +1,13 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !windows + +package runtime + +//go:nosplit +func osPreemptExtEnter(mp *m) {} + +//go:nosplit +func osPreemptExtExit(mp *m) {} diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index e3f934a..f75cacf 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -841,23 +841,8 @@ func casGFromPreempted(gp *g, old, new uint32) bool { // goroutines. func stopTheWorld(reason string) { semacquire(&worldsema) - gp := getg() - gp.m.preemptoff = reason - systemstack(func() { - // Mark the goroutine which called stopTheWorld preemptible so its - // stack may be scanned. - // This lets a mark worker scan us while we try to stop the world - // since otherwise we could get in a mutual preemption deadlock. - // We must not modify anything on the G stack because a stack shrink - // may occur. A stack shrink is otherwise OK though because in order - // to return from this function (and to leave the system stack) we - // must have preempted all goroutines, including any attempting - // to scan our stack, in which case, any stack shrinking will - // have already completed by the time we exit. - casgstatus(gp, _Grunning, _Gwaiting) - stopTheWorldWithSema() - casgstatus(gp, _Gwaiting, _Grunning) - }) + getg().m.preemptoff = reason + systemstack(stopTheWorldWithSema) } // startTheWorld undoes the effects of stopTheWorld. @@ -869,31 +854,10 @@ func startTheWorld() { getg().m.preemptoff = "" } -// stopTheWorldGC has the same effect as stopTheWorld, but blocks -// until the GC is not running. It also blocks a GC from starting -// until startTheWorldGC is called. -func stopTheWorldGC(reason string) { - semacquire(&gcsema) - stopTheWorld(reason) -} - -// startTheWorldGC undoes the effects of stopTheWorldGC. -func startTheWorldGC() { - startTheWorld() - semrelease(&gcsema) -} - -// Holding worldsema grants an M the right to try to stop the world. +// Holding worldsema grants an M the right to try to stop the world +// and prevents gomaxprocs from changing concurrently. var worldsema uint32 = 1 -// Holding gcsema grants the M the right to block a GC, and blocks -// until the current GC is done. In particular, it prevents gomaxprocs -// from changing concurrently. -// -// TODO(mknyszek): Once gomaxprocs and the execution tracer can handle -// being changed/enabled during a GC, remove this. -var gcsema uint32 = 1 - // stopTheWorldWithSema is the core implementation of stopTheWorld. // The caller is responsible for acquiring worldsema and disabling // preemption first and then should stopTheWorldWithSema on the system @@ -2577,6 +2541,27 @@ func dropg() { // We pass now in and out to avoid extra calls of nanotime. //go:yeswritebarrierrec func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) { + // If there are no timers to adjust, and the first timer on + // the heap is not yet ready to run, then there is nothing to do. + if atomic.Load(&pp.adjustTimers) == 0 { + next := int64(atomic.Load64(&pp.timer0When)) + if next == 0 { + return now, 0, false + } + if now == 0 { + now = nanotime() + } + if now < next { + // Next timer is not ready to run. + // But keep going if we would clear deleted timers. + // This corresponds to the condition below where + // we decide whether to call clearDeletedTimers. + if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) { + return now, next, false + } + } + } + lock(&pp.timersLock) adjusttimers(pp) @@ -2599,6 +2584,13 @@ func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) { } } + // If this is the local P, and there are a lot of deleted timers, + // clear them out. We only do this for the local P to reduce + // lock contention on timersLock. + if pp == getg().m.p.ptr() && int(atomic.Load(&pp.deletedTimers)) > len(pp.timers)/4 { + clearDeletedTimers(pp) + } + unlock(&pp.timersLock) return rnow, pollUntil, ran @@ -2723,7 +2715,7 @@ func preemptPark(gp *g) { } // goyield is like Gosched, but it: -// - does not emit a GoSched trace event +// - emits a GoPreempt trace event instead of a GoSched trace event // - puts the current G on the runq of the current P instead of the globrunq func goyield() { checkTimeouts() @@ -2731,6 +2723,9 @@ func goyield() { } func goyield_m(gp *g) { + if trace.enabled { + traceGoPreempt() + } pp := gp.m.p.ptr() casgstatus(gp, _Grunning, _Grunnable) dropg() @@ -3816,7 +3811,10 @@ func (pp *p) destroy() { lock(&pp.timersLock) moveTimers(plocal, pp.timers) pp.timers = nil + pp.numTimers = 0 pp.adjustTimers = 0 + pp.deletedTimers = 0 + atomic.Store64(&pp.timer0When, 0) unlock(&pp.timersLock) unlock(&plocal.timersLock) } @@ -4122,23 +4120,26 @@ func checkdead() { } // Maybe jump time forward for playground. - _p_ := timejump() - if _p_ != nil { - for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link { - if (*pp).ptr() == _p_ { - *pp = _p_.link - break + if faketime != 0 { + when, _p_ := timeSleepUntil() + if _p_ != nil { + faketime = when + for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link { + if (*pp).ptr() == _p_ { + *pp = _p_.link + break + } } + mp := mget() + if mp == nil { + // There should always be a free M since + // nothing is running. + throw("checkdead: no m for timer") + } + mp.nextp.set(_p_) + notewakeup(&mp.park) + return } - mp := mget() - if mp == nil { - // There should always be a free M since - // nothing is running. - throw("checkdead: no m for timer") - } - mp.nextp.set(_p_) - notewakeup(&mp.park) - return } // There are no goroutines running, so we can look at the P's. @@ -4183,7 +4184,7 @@ func sysmon() { } usleep(delay) now := nanotime() - next := timeSleepUntil() + next, _ := timeSleepUntil() if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) { lock(&sched.lock) if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) { @@ -4205,7 +4206,7 @@ func sysmon() { osRelax(false) } now = nanotime() - next = timeSleepUntil() + next, _ = timeSleepUntil() lock(&sched.lock) atomic.Store(&sched.sysmonwait, 0) noteclear(&sched.sysmonnote) diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go index 60aa90f..6edf7a5 100644 --- a/libgo/go/runtime/runtime1.go +++ b/libgo/go/runtime/runtime1.go @@ -323,6 +323,7 @@ var debug struct { madvdontneed int32 // for Linux; issue 28466 sbrk int32 scavenge int32 + scavtrace int32 scheddetail int32 schedtrace int32 tracebackancestors int32 @@ -343,6 +344,7 @@ var dbgvars = []dbgVar{ {"madvdontneed", &debug.madvdontneed}, {"sbrk", &debug.sbrk}, {"scavenge", &debug.scavenge}, + {"scavtrace", &debug.scavtrace}, {"scheddetail", &debug.scheddetail}, {"schedtrace", &debug.schedtrace}, {"tracebackancestors", &debug.tracebackancestors}, diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index d50f82a..f5bfc08 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -677,6 +677,11 @@ type p struct { _ uint32 // Alignment for atomic fields below + // The when field of the first entry on the timer heap. + // This is updated using atomic functions. + // This is 0 if the timer heap is empty. + timer0When uint64 + // Per-P GC state gcAssistTime int64 // Nanoseconds in assistAlloc gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic) @@ -708,12 +713,20 @@ type p struct { // Must hold timersLock to access. timers []*timer + // Number of timers in P's heap. + // Modified using atomic instructions. + numTimers uint32 + // Number of timerModifiedEarlier timers on P's heap. // This should only be modified while holding timersLock, // or while the timer status is in a transient state // such as timerModifying. adjustTimers uint32 + // Number of timerDeleted timers in P's heap. + // Modified using atomic instructions. + deletedTimers uint32 + // Race context used while executing timer functions. // Not for gccgo: timerRaceCtx uintptr diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go index fb16796..b6fab6d 100644 --- a/libgo/go/runtime/sema.go +++ b/libgo/go/runtime/sema.go @@ -199,9 +199,9 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) { // the waiter G immediately. // Note that waiter inherits our time slice: this is desirable // to avoid having a highly contended semaphore hog the P - // indefinitely. goyield is like Gosched, but it does not emit a - // GoSched trace event and, more importantly, puts the current G - // on the local runq instead of the global one. + // indefinitely. goyield is like Gosched, but it emits a + // "preempted" trace event instead and, more importantly, puts + // the current G on the local runq instead of the global one. // We only do this in the starving regime (handoff=true), as in // the non-starving case it is possible for a different waiter // to acquire the semaphore while we are yielding/scheduling, diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go index 29f9443..150345f 100644 --- a/libgo/go/runtime/signal_unix.go +++ b/libgo/go/runtime/signal_unix.go @@ -399,6 +399,16 @@ func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) { sigprofNonGo(pc) return } + if sig == sigPreempt && preemptMSupported && debug.asyncpreemptoff == 0 { + // This is probably a signal from preemptM sent + // while executing Go code but received while + // executing non-Go code. + // We got past sigfwdgo, so we know that there is + // no non-Go signal handler for sigPreempt. + // The default behavior for sigPreempt is to ignore + // the signal, so badsignal will be a no-op anyway. + return + } badsignal(uintptr(sig), &c) return } diff --git a/libgo/go/runtime/testdata/testprog/checkptr.go b/libgo/go/runtime/testdata/testprog/checkptr.go new file mode 100644 index 0000000..177db38 --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/checkptr.go @@ -0,0 +1,36 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "unsafe" + +func init() { + register("CheckPtrAlignment", CheckPtrAlignment) + register("CheckPtrArithmetic", CheckPtrArithmetic) + register("CheckPtrSize", CheckPtrSize) + register("CheckPtrSmall", CheckPtrSmall) +} + +func CheckPtrAlignment() { + var x [2]int64 + p := unsafe.Pointer(&x[0]) + sink2 = (*int64)(unsafe.Pointer(uintptr(p) + 1)) +} + +func CheckPtrArithmetic() { + var x int + i := uintptr(unsafe.Pointer(&x)) + sink2 = (*int)(unsafe.Pointer(i)) +} + +func CheckPtrSize() { + p := new(int64) + sink2 = p + sink2 = (*[100]int64)(unsafe.Pointer(p)) +} + +func CheckPtrSmall() { + sink2 = unsafe.Pointer(uintptr(1)) +} diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go index ded68ed..d0dd3a4 100644 --- a/libgo/go/runtime/time.go +++ b/libgo/go/runtime/time.go @@ -73,14 +73,15 @@ type timer struct { // timerNoStatus -> timerWaiting // anything else -> panic: invalid value // deltimer: -// timerWaiting -> timerDeleted -// timerModifiedXX -> timerDeleted -// timerNoStatus -> do nothing -// timerDeleted -> do nothing -// timerRemoving -> do nothing -// timerRemoved -> do nothing -// timerRunning -> wait until status changes -// timerMoving -> wait until status changes +// timerWaiting -> timerDeleted +// timerModifiedEarlier -> timerModifying -> timerDeleted +// timerModifiedLater -> timerDeleted +// timerNoStatus -> do nothing +// timerDeleted -> do nothing +// timerRemoving -> do nothing +// timerRemoved -> do nothing +// timerRunning -> wait until status changes +// timerMoving -> wait until status changes // timerModifying -> panic: concurrent deltimer/modtimer calls // modtimer: // timerWaiting -> timerModifying -> timerModifiedXX @@ -168,6 +169,10 @@ const ( // maxWhen is the maximum value for timer's when field. const maxWhen = 1<<63 - 1 +// verifyTimers can be set to true to add debugging checks that the +// timer heaps are valid. +const verifyTimers = false + // Package time APIs. // Godoc uses the comments in package time, not these. @@ -283,7 +288,12 @@ func doaddtimer(pp *p, t *timer) bool { t.pp.set(pp) i := len(pp.timers) pp.timers = append(pp.timers, t) - return siftupTimer(pp.timers, i) + ok := siftupTimer(pp.timers, i) + if t == pp.timers[0] { + atomic.Store64(&pp.timer0When, uint64(t.when)) + } + atomic.Xadd(&pp.numTimers, 1) + return ok } // deltimer deletes the timer t. It may be on some other P, so we can't @@ -294,7 +304,9 @@ func deltimer(t *timer) bool { for { switch s := atomic.Load(&t.status); s { case timerWaiting, timerModifiedLater: + tpp := t.pp.ptr() if atomic.Cas(&t.status, s, timerDeleted) { + atomic.Xadd(&tpp.deletedTimers, 1) // Timer was not yet run. return true } @@ -305,6 +317,7 @@ func deltimer(t *timer) bool { if !atomic.Cas(&t.status, timerModifying, timerDeleted) { badTimer() } + atomic.Xadd(&tpp.deletedTimers, 1) // Timer was not yet run. return true } @@ -355,6 +368,10 @@ func dodeltimer(pp *p, i int) bool { ok = false } } + if i == 0 { + updateTimer0When(pp) + } + atomic.Xadd(&pp.numTimers, -1) return ok } @@ -378,6 +395,8 @@ func dodeltimer0(pp *p) bool { if last > 0 { ok = siftdownTimer(pp.timers, 0) } + updateTimer0When(pp) + atomic.Xadd(&pp.numTimers, -1) return ok } @@ -485,6 +504,7 @@ func resettimer(t *timer, when int64) { return } case timerDeleted: + tpp := t.pp.ptr() if atomic.Cas(&t.status, s, timerModifying) { t.nextwhen = when newStatus := uint32(timerModifiedLater) @@ -495,6 +515,7 @@ func resettimer(t *timer, when int64) { if !atomic.Cas(&t.status, timerModifying, newStatus) { badTimer() } + atomic.Xadd(&tpp.deletedTimers, -1) if newStatus == timerModifiedEarlier { wakeNetPoller(when) } @@ -542,6 +563,7 @@ func cleantimers(pp *p) bool { if !atomic.Cas(&t.status, timerRemoving, timerRemoved) { return false } + atomic.Xadd(&pp.deletedTimers, -1) case timerModifiedEarlier, timerModifiedLater: if !atomic.Cas(&t.status, s, timerMoving) { continue @@ -630,9 +652,13 @@ func adjusttimers(pp *p) { return } if atomic.Load(&pp.adjustTimers) == 0 { + if verifyTimers { + verifyTimerHeap(pp) + } return } var moved []*timer +loop: for i := 0; i < len(pp.timers); i++ { t := pp.timers[i] if t.pp.ptr() != pp { @@ -647,6 +673,7 @@ func adjusttimers(pp *p) { if !atomic.Cas(&t.status, timerRemoving, timerRemoved) { badTimer() } + atomic.Xadd(&pp.deletedTimers, -1) // Look at this heap position again. i-- } @@ -664,10 +691,11 @@ func adjusttimers(pp *p) { moved = append(moved, t) if s == timerModifiedEarlier { if n := atomic.Xadd(&pp.adjustTimers, -1); int32(n) <= 0 { - addAdjustedTimers(pp, moved) - return + break loop } } + // Look at this heap position again. + i-- } case timerNoStatus, timerRunning, timerRemoving, timerRemoved, timerMoving: badTimer() @@ -685,6 +713,10 @@ func adjusttimers(pp *p) { if len(moved) > 0 { addAdjustedTimers(pp, moved) } + + if verifyTimers { + verifyTimerHeap(pp) + } } // addAdjustedTimers adds any timers we adjusted in adjusttimers @@ -708,17 +740,11 @@ func addAdjustedTimers(pp *p, moved []*timer) { // The netpoller M will wake up and adjust timers before sleeping again. //go:nowritebarrierrec func nobarrierWakeTime(pp *p) int64 { - lock(&pp.timersLock) - ret := int64(0) - if len(pp.timers) > 0 { - if atomic.Load(&pp.adjustTimers) > 0 { - ret = nanotime() - } else { - ret = pp.timers[0].when - } + if atomic.Load(&pp.adjustTimers) > 0 { + return nanotime() + } else { + return int64(atomic.Load64(&pp.timer0When)) } - unlock(&pp.timersLock) - return ret } // runtimer examines the first timer in timers. If it is ready based on now, @@ -759,6 +785,7 @@ func runtimer(pp *p, now int64) int64 { if !atomic.Cas(&t.status, timerRemoving, timerRemoved) { badTimer() } + atomic.Xadd(&pp.deletedTimers, -1) if len(pp.timers) == 0 { return -1 } @@ -817,6 +844,7 @@ func runOneTimer(pp *p, t *timer, now int64) { if !atomic.Cas(&t.status, timerRunning, timerWaiting) { badTimer() } + updateTimer0When(pp) } else { // Remove from heap. if !dodeltimer0(pp) { @@ -834,69 +862,131 @@ func runOneTimer(pp *p, t *timer, now int64) { lock(&pp.timersLock) } -func timejump() *p { - if faketime == 0 { - return nil - } - - // Nothing is running, so we can look at all the P's. - // Determine a timer bucket with minimum when. - var ( - minT *timer - minWhen int64 - minP *p - ) - for _, pp := range allp { - if pp.status != _Pidle && pp.status != _Pdead { - throw("non-idle P in timejump") - } - if len(pp.timers) == 0 { - continue - } - c := pp.adjustTimers - for _, t := range pp.timers { +// clearDeletedTimers removes all deleted timers from the P's timer heap. +// This is used to avoid clogging up the heap if the program +// starts a lot of long-running timers and then stops them. +// For example, this can happen via context.WithTimeout. +// +// This is the only function that walks through the entire timer heap, +// other than moveTimers which only runs when the world is stopped. +// +// The caller must have locked the timers for pp. +func clearDeletedTimers(pp *p) { + cdel := int32(0) + cearlier := int32(0) + to := 0 + changedHeap := false + timers := pp.timers +nextTimer: + for _, t := range timers { + for { switch s := atomic.Load(&t.status); s { case timerWaiting: - if minT == nil || t.when < minWhen { - minT = t - minWhen = t.when - minP = pp + if changedHeap { + timers[to] = t + siftupTimer(timers, to) } + to++ + continue nextTimer case timerModifiedEarlier, timerModifiedLater: - if minT == nil || t.nextwhen < minWhen { - minT = t - minWhen = t.nextwhen - minP = pp + if atomic.Cas(&t.status, s, timerMoving) { + t.when = t.nextwhen + timers[to] = t + siftupTimer(timers, to) + to++ + changedHeap = true + if !atomic.Cas(&t.status, timerMoving, timerWaiting) { + badTimer() + } + if s == timerModifiedEarlier { + cearlier++ + } + continue nextTimer } - if s == timerModifiedEarlier { - c-- + case timerDeleted: + if atomic.Cas(&t.status, s, timerRemoving) { + t.pp = 0 + cdel++ + if !atomic.Cas(&t.status, timerRemoving, timerRemoved) { + badTimer() + } + changedHeap = true + continue nextTimer } - case timerRunning, timerModifying, timerMoving: + case timerModifying: + // Loop until modification complete. + osyield() + case timerNoStatus, timerRemoved: + // We should not see these status values in a timer heap. + badTimer() + case timerRunning, timerRemoving, timerMoving: + // Some other P thinks it owns this timer, + // which should not happen. + badTimer() + default: badTimer() - } - // The timers are sorted, so we only have to check - // the first timer for each P, unless there are - // some timerModifiedEarlier timers. The number - // of timerModifiedEarlier timers is in the adjustTimers - // field, used to initialize c, above. - if c == 0 { - break } } } - if minT == nil || minWhen <= faketime { - return nil + // Set remaining slots in timers slice to nil, + // so that the timer values can be garbage collected. + for i := to; i < len(timers); i++ { + timers[i] = nil + } + + atomic.Xadd(&pp.deletedTimers, -cdel) + atomic.Xadd(&pp.numTimers, -cdel) + atomic.Xadd(&pp.adjustTimers, -cearlier) + + timers = timers[:to] + pp.timers = timers + updateTimer0When(pp) + + if verifyTimers { + verifyTimerHeap(pp) + } +} + +// verifyTimerHeap verifies that the timer heap is in a valid state. +// This is only for debugging, and is only called if verifyTimers is true. +// The caller must have locked the timers. +func verifyTimerHeap(pp *p) { + for i, t := range pp.timers { + if i == 0 { + // First timer has no parent. + continue + } + + // The heap is 4-ary. See siftupTimer and siftdownTimer. + p := (i - 1) / 4 + if t.when < pp.timers[p].when { + print("bad timer heap at ", i, ": ", p, ": ", pp.timers[p].when, ", ", i, ": ", t.when, "\n") + throw("bad timer heap") + } + } + if numTimers := int(atomic.Load(&pp.numTimers)); len(pp.timers) != numTimers { + println("timer heap len", len(pp.timers), "!= numTimers", numTimers) + throw("bad timer heap len") } +} - faketime = minWhen - return minP +// updateTimer0When sets the P's timer0When field. +// The caller must have locked the timers for pp. +func updateTimer0When(pp *p) { + if len(pp.timers) == 0 { + atomic.Store64(&pp.timer0When, 0) + } else { + atomic.Store64(&pp.timer0When, uint64(pp.timers[0].when)) + } } -// timeSleepUntil returns the time when the next timer should fire. -// This is only called by sysmon. -func timeSleepUntil() int64 { +// timeSleepUntil returns the time when the next timer should fire, +// and the P that holds the timer heap that that timer is on. +// This is only called by sysmon and checkdead. +func timeSleepUntil() (int64, *p) { next := int64(maxWhen) + var pret *p // Prevent allp slice changes. This is like retake. lock(&allpLock) @@ -907,8 +997,17 @@ func timeSleepUntil() int64 { continue } - lock(&pp.timersLock) c := atomic.Load(&pp.adjustTimers) + if c == 0 { + w := int64(atomic.Load64(&pp.timer0When)) + if w != 0 && w < next { + next = w + pret = pp + } + continue + } + + lock(&pp.timersLock) for _, t := range pp.timers { switch s := atomic.Load(&t.status); s { case timerWaiting: @@ -943,7 +1042,7 @@ func timeSleepUntil() int64 { } unlock(&allpLock) - return next + return next, pret } // Heap maintenance algorithms. diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go index 81ff0ca..358674b 100644 --- a/libgo/go/runtime/trace.go +++ b/libgo/go/runtime/trace.go @@ -181,12 +181,9 @@ func traceBufPtrOf(b *traceBuf) traceBufPtr { // Most clients should use the runtime/trace package or the testing package's // -test.trace flag instead of calling StartTrace directly. func StartTrace() error { - // Stop the world so that we can take a consistent snapshot + // Stop the world, so that we can take a consistent snapshot // of all goroutines at the beginning of the trace. - // Do not stop the world during GC so we ensure we always see - // a consistent view of GC-related events (e.g. a start is always - // paired with an end). - stopTheWorldGC("start tracing") + stopTheWorld("start tracing") // We are in stop-the-world, but syscalls can finish and write to trace concurrently. // Exitsyscall could check trace.enabled long before and then suddenly wake up @@ -197,7 +194,7 @@ func StartTrace() error { if trace.enabled || trace.shutdown { unlock(&trace.bufLock) - startTheWorldGC() + startTheWorld() return errorString("tracing is already enabled") } @@ -268,7 +265,7 @@ func StartTrace() error { unlock(&trace.bufLock) - startTheWorldGC() + startTheWorld() return nil } @@ -277,14 +274,14 @@ func StartTrace() error { func StopTrace() { // Stop the world so that we can collect the trace buffers from all p's below, // and also to avoid races with traceEvent. - stopTheWorldGC("stop tracing") + stopTheWorld("stop tracing") // See the comment in StartTrace. lock(&trace.bufLock) if !trace.enabled { unlock(&trace.bufLock) - startTheWorldGC() + startTheWorld() return } @@ -321,7 +318,7 @@ func StopTrace() { trace.shutdown = true unlock(&trace.bufLock) - startTheWorldGC() + startTheWorld() // The world is started but we've set trace.shutdown, so new tracing can't start. // Wait for the trace reader to flush pending buffers and stop. diff --git a/libgo/go/runtime/trace/trace_stack_test.go b/libgo/go/runtime/trace/trace_stack_test.go index e3608c6..62c06e6 100644 --- a/libgo/go/runtime/trace/trace_stack_test.go +++ b/libgo/go/runtime/trace/trace_stack_test.go @@ -233,7 +233,6 @@ func TestTraceSymbolize(t *testing.T) { }}, {trace.EvGomaxprocs, []frame{ {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged. - {"runtime.startTheWorldGC", 0}, {"runtime.GOMAXPROCS", 0}, {"runtime/trace_test.TestTraceSymbolize", 0}, {"testing.tRunner", 0}, diff --git a/libgo/go/runtime/utf8.go b/libgo/go/runtime/utf8.go index 6590472..a404a33 100644 --- a/libgo/go/runtime/utf8.go +++ b/libgo/go/runtime/utf8.go @@ -13,7 +13,7 @@ import _ "unsafe" // For go:linkname. // Numbers fundamental to the encoding. const ( runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character" - runeSelf = 0x80 // characters below Runeself are represented as themselves in a single byte. + runeSelf = 0x80 // characters below runeSelf are represented as themselves in a single byte. maxRune = '\U0010FFFF' // Maximum valid Unicode code point. ) |