diff options
author | Ian Lance Taylor <iant@golang.org> | 2018-09-24 21:46:21 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2018-09-24 21:46:21 +0000 |
commit | dd931d9b48647e898dc80927c532ae93cc09e192 (patch) | |
tree | 71be2295cd79b8a182f6130611658db8628772d5 /libgo/go/runtime | |
parent | 779d8a5ad09b01428726ea5a0e6c87bd9ac3c0e4 (diff) | |
download | gcc-dd931d9b48647e898dc80927c532ae93cc09e192.zip gcc-dd931d9b48647e898dc80927c532ae93cc09e192.tar.gz gcc-dd931d9b48647e898dc80927c532ae93cc09e192.tar.bz2 |
libgo: update to Go 1.11
Reviewed-on: https://go-review.googlesource.com/136435
gotools/:
* Makefile.am (mostlyclean-local): Run chmod on check-go-dir to
make sure it is writable.
(check-go-tools): Likewise.
(check-vet): Copy internal/objabi to check-vet-dir.
* Makefile.in: Rebuild.
From-SVN: r264546
Diffstat (limited to 'libgo/go/runtime')
134 files changed, 6916 insertions, 3144 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go index 7c98f1b..c6bc6b6 100644 --- a/libgo/go/runtime/alg.go +++ b/libgo/go/runtime/alg.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/cpu" "runtime/internal/sys" "unsafe" ) @@ -137,7 +138,7 @@ func interhash(p unsafe.Pointer, h uintptr) uintptr { t := *(**_type)(tab) fn := t.hashfn if fn == nil { - panic(errorString("hash of unhashable type " + *t.string)) + panic(errorString("hash of unhashable type " + t.string())) } if isDirectIface(t) { return c1 * fn(unsafe.Pointer(&a.data), h^c0) @@ -154,7 +155,7 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr { } fn := t.hashfn if fn == nil { - panic(errorString("hash of unhashable type " + *t.string)) + panic(errorString("hash of unhashable type " + t.string())) } if isDirectIface(t) { return c1 * fn(unsafe.Pointer(&a.data), h^c0) @@ -212,7 +213,7 @@ func efaceeq(x, y eface) bool { } eq := t.equalfn if eq == nil { - panic(errorString("comparing uncomparable type " + *t.string)) + panic(errorString("comparing uncomparable type " + t.string())) } if isDirectIface(t) { return x.data == y.data @@ -233,7 +234,7 @@ func ifaceeq(x, y iface) bool { } eq := t.equalfn if eq == nil { - panic(errorString("comparing uncomparable type " + *t.string)) + panic(errorString("comparing uncomparable type " + t.string())) } if isDirectIface(t) { return x.data == y.data @@ -251,7 +252,7 @@ func ifacevaleq(x iface, t *_type, p unsafe.Pointer) bool { } eq := t.equalfn if eq == nil { - panic(errorString("comparing uncomparable type " + *t.string)) + panic(errorString("comparing uncomparable type " + t.string())) } if isDirectIface(t) { return x.data == p @@ -272,7 +273,7 @@ func ifaceefaceeq(x iface, y eface) bool { } eq := xt.equalfn if eq == nil { - panic(errorString("comparing uncomparable type " + *xt.string)) + panic(errorString("comparing uncomparable type " + xt.string())) } if isDirectIface(xt) { return x.data == y.data @@ -289,7 +290,7 @@ func efacevaleq(x eface, t *_type, p unsafe.Pointer) bool { } eq := t.equalfn if eq == nil { - panic(errorString("comparing uncomparable type " + *t.string)) + panic(errorString("comparing uncomparable type " + t.string())) } if isDirectIface(t) { return x.data == p @@ -388,23 +389,25 @@ func ifaceHash(i interface { const hashRandomBytes = sys.PtrSize / 4 * 64 -// used in asm_{386,amd64}.s to seed the hash function +// used in asm_{386,amd64,arm64}.s to seed the hash function var aeskeysched [hashRandomBytes]byte // used in hash{32,64}.go to seed the hash function var hashkey [4]uintptr func alginit() { - // Install aes hash algorithm if we have the instructions we need + // Install AES hash algorithms if the instructions needed are present. if (GOARCH == "386" || GOARCH == "amd64") && GOOS != "nacl" && support_aes && - cpuid_ecx&(1<<25) != 0 && // aes (aesenc) - cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb) - cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q}) - useAeshash = true - // Initialize with random data so hash collisions will be hard to engineer. - getRandomData(aeskeysched[:]) + cpu.X86.HasAES && // AESENC + cpu.X86.HasSSSE3 && // PSHUFB + cpu.X86.HasSSE41 { // PINSR{D,Q} + initAlgAES() + return + } + if GOARCH == "arm64" && cpu.ARM64.HasAES { + initAlgAES() return } getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:]) @@ -413,3 +416,9 @@ func alginit() { hashkey[2] |= 1 hashkey[3] |= 1 } + +func initAlgAES() { + useAeshash = true + // Initialize with random data so hash collisions will be hard to engineer. + getRandomData(aeskeysched[:]) +} diff --git a/libgo/go/runtime/atomic_pointer.go b/libgo/go/runtime/atomic_pointer.go index b66ef58..2d023d3 100644 --- a/libgo/go/runtime/atomic_pointer.go +++ b/libgo/go/runtime/atomic_pointer.go @@ -16,11 +16,24 @@ import ( // Instead, these are wrappers around the actual atomics (casp1 and so on) // that use noescape to convey which arguments do not escape. +// atomicwb performs a write barrier before an atomic pointer write. +// The caller should guard the call with "if writeBarrier.enabled". +// +//go:nosplit +func atomicwb(ptr *unsafe.Pointer, new unsafe.Pointer) { + slot := (*uintptr)(unsafe.Pointer(ptr)) + if !getg().m.p.ptr().wbBuf.putFast(*slot, uintptr(new)) { + wbBufFlush(slot, uintptr(new)) + } +} + // atomicstorep performs *ptr = new atomically and invokes a write barrier. // //go:nosplit func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) { - writebarrierptr_prewrite((*uintptr)(ptr), uintptr(new)) + if writeBarrier.enabled { + atomicwb((*unsafe.Pointer)(ptr), new) + } atomic.StorepNoWB(noescape(ptr), new) } @@ -29,7 +42,9 @@ func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool { // The write barrier is only necessary if the CAS succeeds, // but since it needs to happen before the write becomes // public, we have to do it conservatively all the time. - writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) + if writeBarrier.enabled { + atomicwb(ptr, new) + } return atomic.Casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), noescape(old), new) } @@ -43,7 +58,9 @@ func sync_atomic_StoreUintptr(ptr *uintptr, new uintptr) //go:linkname sync_atomic_StorePointer sync_atomic.StorePointer //go:nosplit func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) { - writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) + if writeBarrier.enabled { + atomicwb(ptr, new) + } sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) } @@ -53,7 +70,9 @@ func sync_atomic_SwapUintptr(ptr *uintptr, new uintptr) uintptr //go:linkname sync_atomic_SwapPointer sync_atomic.SwapPointer //go:nosplit func sync_atomic_SwapPointer(ptr *unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer { - writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) + if writeBarrier.enabled { + atomicwb(ptr, new) + } old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(new))) return old } @@ -64,6 +83,8 @@ func sync_atomic_CompareAndSwapUintptr(ptr *uintptr, old, new uintptr) bool //go:linkname sync_atomic_CompareAndSwapPointer sync_atomic.CompareAndSwapPointer //go:nosplit func sync_atomic_CompareAndSwapPointer(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool { - writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new)) + if writeBarrier.enabled { + atomicwb(ptr, new) + } return sync_atomic_CompareAndSwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(old), uintptr(new)) } diff --git a/libgo/go/runtime/vdso_none.go b/libgo/go/runtime/auxv_none.go index fc21240..3ca617b 100644 --- a/libgo/go/runtime/vdso_none.go +++ b/libgo/go/runtime/auxv_none.go @@ -4,6 +4,10 @@ // +build !linux // +build !darwin +// +build !dragonfly +// +build !freebsd +// +build !netbsd +// +build !solaris package runtime diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go index 67b2bce..24bf749 100644 --- a/libgo/go/runtime/cgocall.go +++ b/libgo/go/runtime/cgocall.go @@ -212,22 +212,13 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) { // pointer into Go memory. If it does, we panic. // The return values are unused but useful to see in panic tracebacks. func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { - if cgoInRange(p, mheap_.arena_start, mheap_.arena_used) { - if !inheap(uintptr(p)) { - // On 32-bit systems it is possible for C's allocated memory - // to have addresses between arena_start and arena_used. - // Either this pointer is a stack or an unused span or it's - // a C allocation. Escape analysis should prevent the first, - // garbage collection should prevent the second, - // and the third is completely OK. - return - } - - b, hbits, span, _ := heapBitsForObject(uintptr(p), 0, 0, false) + if inheap(uintptr(p)) { + b, span, _ := findObject(uintptr(p), 0, 0, false) base = b if base == 0 { return } + hbits := heapBitsForAddr(base) n := span.elemsize for i = uintptr(0); i < n; i += sys.PtrSize { if i != 1*sys.PtrSize && !hbits.morePointers() { diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go index b85b519..d896fb7 100644 --- a/libgo/go/runtime/cgocheck.go +++ b/libgo/go/runtime/cgocheck.go @@ -126,9 +126,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) { roots = roots.next } - aoff := uintptr(src) - mheap_.arena_start - idx := aoff >> _PageShift - s := mheap_.spans[idx] + s := spanOfUnchecked(uintptr(src)) if s.state == _MSpanManual { // There are no heap bits for value stored on the stack. // For a channel receive src might be on the stack of some @@ -151,9 +149,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) { if i >= off && bits&bitPointer != 0 { v := *(*unsafe.Pointer)(add(src, i)) if cgoIsGoPointer(v) { - systemstack(func() { - throw(cgoWriteBarrierFail) - }) + throw(cgoWriteBarrierFail) } } hbits = hbits.next() @@ -186,9 +182,7 @@ func cgoCheckBits(src unsafe.Pointer, gcbits *byte, off, size uintptr) { if bits&1 != 0 { v := *(*unsafe.Pointer)(add(src, i)) if cgoIsGoPointer(v) { - systemstack(func() { - throw(cgoWriteBarrierFail) - }) + throw(cgoWriteBarrierFail) } } } diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go index 87f7879..88a8944 100644 --- a/libgo/go/runtime/chan.go +++ b/libgo/go/runtime/chan.go @@ -88,7 +88,7 @@ func makechan(t *chantype, size int) *hchan { throw("makechan: bad alignment") } - if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > _MaxMem-hchanSize { + if size < 0 || uintptr(size) > maxSliceCap(elem.size) || uintptr(size)*elem.size > maxAlloc-hchanSize { panic(plainError("makechan: size out of range")) } @@ -157,7 +157,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { if !block { return false } - gopark(nil, nil, "chan send (nil chan)", traceEvGoStop, 2) + gopark(nil, nil, waitReasonChanSendNilChan, traceEvGoStop, 2) throw("unreachable") } @@ -246,7 +246,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { gp.waiting = mysg gp.param = nil c.sendq.enqueue(mysg) - goparkunlock(&c.lock, "chan send", traceEvGoBlockSend, 3) + goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3) // someone woke us up. if mysg != gp.waiting { @@ -325,6 +325,8 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) { // So make sure that no preemption points can happen between read & use. dst := sg.elem typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.size) + // No need for cgo write barrier checks because dst is always + // Go memory. memmove(dst, src, t.size) } @@ -444,7 +446,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) if !block { return } - gopark(nil, nil, "chan receive (nil chan)", traceEvGoStop, 2) + gopark(nil, nil, waitReasonChanReceiveNilChan, traceEvGoStop, 2) throw("unreachable") } @@ -535,7 +537,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) mysg.c = c gp.param = nil c.recvq.enqueue(mysg) - goparkunlock(&c.lock, "chan receive", traceEvGoBlockRecv, 3) + goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3) // someone woke us up if mysg != gp.waiting { diff --git a/libgo/go/runtime/chanbarrier_test.go b/libgo/go/runtime/chanbarrier_test.go index b6029fb..d479574 100644 --- a/libgo/go/runtime/chanbarrier_test.go +++ b/libgo/go/runtime/chanbarrier_test.go @@ -57,7 +57,7 @@ func testChanSendBarrier(useSelect bool) { var globalMu sync.Mutex outer := 100 inner := 100000 - if testing.Short() { + if testing.Short() || runtime.GOARCH == "wasm" { outer = 10 inner = 1000 } diff --git a/libgo/go/runtime/cputicks.go b/libgo/go/runtime/cputicks.go index 7e62dc1..c41a58b 100644 --- a/libgo/go/runtime/cputicks.go +++ b/libgo/go/runtime/cputicks.go @@ -2,6 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// // +build !arm +// // +build !arm64 +// // +build !mips64 +// // +build !mips64le +// // +build !mips +// // +build !mipsle +// // +build !wasm + package runtime // careful: cputicks is not guaranteed to be monotonic! In particular, we have diff --git a/libgo/go/runtime/crash_cgo_test.go b/libgo/go/runtime/crash_cgo_test.go index 6688b3c..770f85e 100644 --- a/libgo/go/runtime/crash_cgo_test.go +++ b/libgo/go/runtime/crash_cgo_test.go @@ -89,19 +89,6 @@ func TestCgoExternalThreadSIGPROF(t *testing.T) { switch runtime.GOOS { case "plan9", "windows": t.Skipf("no pthreads on %s", runtime.GOOS) - case "darwin": - if runtime.GOARCH != "arm" && runtime.GOARCH != "arm64" { - // static constructor needs external linking, but we don't support - // external linking on OS X 10.6. - out, err := exec.Command("uname", "-r").Output() - if err != nil { - t.Fatalf("uname -r failed: %v", err) - } - // OS X 10.6 == Darwin 10.x - if strings.HasPrefix(string(out), "10.") { - t.Skipf("no external linking on OS X 10.6") - } - } } if runtime.GOARCH == "ppc64" { // TODO(austin) External linking not implemented on @@ -252,8 +239,12 @@ func TestCgoCCodeSIGPROF(t *testing.T) { func TestCgoCrashTraceback(t *testing.T) { t.Parallel() - if runtime.GOOS != "linux" || (runtime.GOARCH != "amd64" && runtime.GOARCH != "ppc64le") { - t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) + switch platform := runtime.GOOS + "/" + runtime.GOARCH; platform { + case "darwin/amd64": + case "linux/amd64": + case "linux/ppc64le": + default: + t.Skipf("not yet supported on %s", platform) } if runtime.Compiler == "gccgo" { t.Skip("gccgo does not have SetCgoTraceback") @@ -352,7 +343,7 @@ func TestCgoPprofThreadNoTraceback(t *testing.T) { } func TestRaceProf(t *testing.T) { - if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { + if (runtime.GOOS != "linux" && runtime.GOOS != "freebsd") || runtime.GOARCH != "amd64" { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) } if runtime.Compiler == "gccgo" { @@ -384,7 +375,7 @@ func TestRaceProf(t *testing.T) { func TestRaceSignal(t *testing.T) { t.Parallel() - if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" { + if (runtime.GOOS != "linux" && runtime.GOOS != "freebsd") || runtime.GOARCH != "amd64" { t.Skipf("not yet supported on %s/%s", runtime.GOOS, runtime.GOARCH) } @@ -514,3 +505,19 @@ func TestCgoTracebackSigpanic(t *testing.T) { t.Fatalf("failure incorrectly contains %q. output:\n%s\n", nowant, got) } } + +// Test that C code called via cgo can use large Windows thread stacks +// and call back in to Go without crashing. See issue #20975. +// +// See also TestBigStackCallbackSyscall. +func TestBigStackCallbackCgo(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("skipping windows specific test") + } + t.Parallel() + got := runTestProg(t, "testprogcgo", "BigStack") + want := "OK\n" + if got != want { + t.Errorf("expected %q got %v", want, got) + } +} diff --git a/libgo/go/runtime/crash_gccgo_test.go b/libgo/go/runtime/crash_gccgo_test.go index c216e54..d4a826e 100644 --- a/libgo/go/runtime/crash_gccgo_test.go +++ b/libgo/go/runtime/crash_gccgo_test.go @@ -38,8 +38,8 @@ func TestGccgoCrashTracebackNodebug(t *testing.T) { } cc := strings.Fields(os.Getenv("CC")) - cc = append(cc, "-x", "c++", "-") - out, _ := exec.Command(cc[0], cc[1:]...).CombinedOutput() + cc = append(cc, "-o", os.DevNull, "-x", "c++", "-") + out, _ := testenv.CleanCmdEnv(exec.Command(cc[0], cc[1:]...)).CombinedOutput() if bytes.Contains(out, []byte("error trying to exec 'cc1plus'")) { t.Skip("no C++ compiler") } diff --git a/libgo/go/runtime/crash_nonunix_test.go b/libgo/go/runtime/crash_nonunix_test.go index 2ce995c..bf349a5 100644 --- a/libgo/go/runtime/crash_nonunix_test.go +++ b/libgo/go/runtime/crash_nonunix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build windows plan9 nacl +// +build windows plan9 nacl js,wasm package runtime_test diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go index 602630d..91a5c16 100644 --- a/libgo/go/runtime/crash_test.go +++ b/libgo/go/runtime/crash_test.go @@ -652,3 +652,112 @@ func TestBadTraceback(t *testing.T) { } } } + +func TestTimePprof(t *testing.T) { + if runtime.Compiler == "gccgo" { + t.Skip("gccgo may not have the pprof tool") + } + fn := runTestProg(t, "testprog", "TimeProf") + fn = strings.TrimSpace(fn) + defer os.Remove(fn) + + cmd := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "tool", "pprof", "-top", "-nodecount=1", fn)) + cmd.Env = append(cmd.Env, "PPROF_TMPDIR="+os.TempDir()) + top, err := cmd.CombinedOutput() + t.Logf("%s", top) + if err != nil { + t.Error(err) + } else if bytes.Contains(top, []byte("ExternalCode")) { + t.Error("profiler refers to ExternalCode") + } +} + +// Test that runtime.abort does so. +func TestAbort(t *testing.T) { + // Pass GOTRACEBACK to ensure we get runtime frames. + output := runTestProg(t, "testprog", "Abort", "GOTRACEBACK=system") + if want := "runtime.abort"; !strings.Contains(output, want) { + t.Errorf("output does not contain %q:\n%s", want, output) + } + if strings.Contains(output, "BAD") { + t.Errorf("output contains BAD:\n%s", output) + } + // Check that it's a signal traceback. + want := "PC=" + // For systems that use a breakpoint, check specifically for that. + if runtime.Compiler == "gc" { + switch runtime.GOARCH { + case "386", "amd64": + switch runtime.GOOS { + case "plan9": + want = "sys: breakpoint" + case "windows": + want = "Exception 0x80000003" + default: + want = "SIGTRAP" + } + } + } + if !strings.Contains(output, want) { + t.Errorf("output does not contain %q:\n%s", want, output) + } +} + +// For TestRuntimePanic: test a panic in the runtime package without +// involving the testing harness. +func init() { + if os.Getenv("GO_TEST_RUNTIME_PANIC") == "1" { + defer func() { + if r := recover(); r != nil { + // We expect to crash, so exit 0 + // to indicate failure. + os.Exit(0) + } + }() + runtime.PanicForTesting(nil, 1) + // We expect to crash, so exit 0 to indicate failure. + os.Exit(0) + } +} + +func TestRuntimePanic(t *testing.T) { + testenv.MustHaveExec(t) + cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestRuntimePanic")) + cmd.Env = append(cmd.Env, "GO_TEST_RUNTIME_PANIC=1") + out, err := cmd.CombinedOutput() + t.Logf("%s", out) + if err == nil { + t.Error("child process did not fail") + } else if want := "runtime.unexportedPanicForTesting"; !bytes.Contains(out, []byte(want)) { + t.Errorf("output did not contain expected string %q", want) + } +} + +// Test that g0 stack overflows are handled gracefully. +func TestG0StackOverflow(t *testing.T) { + testenv.MustHaveExec(t) + + switch runtime.GOOS { + case "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "android": + t.Skipf("g0 stack is wrong on pthread platforms (see golang.org/issue/26061)") + } + + if os.Getenv("TEST_G0_STACK_OVERFLOW") != "1" { + cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestG0StackOverflow", "-test.v")) + cmd.Env = append(cmd.Env, "TEST_G0_STACK_OVERFLOW=1") + out, err := cmd.CombinedOutput() + // Don't check err since it's expected to crash. + if n := strings.Count(string(out), "morestack on g0\n"); n != 1 { + t.Fatalf("%s\n(exit status %v)", out, err) + } + // Check that it's a signal-style traceback. + if runtime.GOOS != "windows" { + if want := "PC="; !strings.Contains(string(out), want) { + t.Errorf("output does not contain %q:\n%s", want, out) + } + } + return + } + + runtime.G0StackOverflow() +} diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go index 7cddd29..5be2ec4 100644 --- a/libgo/go/runtime/debug.go +++ b/libgo/go/runtime/debug.go @@ -15,6 +15,10 @@ import ( // The number of logical CPUs on the local machine can be queried with NumCPU. // This call will go away when the scheduler improves. func GOMAXPROCS(n int) int { + if GOARCH == "wasm" && n > 1 { + n = 1 // WebAssembly has no threads yet, so only one CPU is possible. + } + lock(&sched.lock) ret := int(gomaxprocs) unlock(&sched.lock) diff --git a/libgo/go/runtime/debug/heapdump_test.go b/libgo/go/runtime/debug/heapdump_test.go index 7d5b950..c986efc 100644 --- a/libgo/go/runtime/debug/heapdump_test.go +++ b/libgo/go/runtime/debug/heapdump_test.go @@ -13,8 +13,8 @@ import ( ) func TestWriteHeapDumpNonempty(t *testing.T) { - if runtime.GOOS == "nacl" { - t.Skip("WriteHeapDump is not available on NaCl.") + if runtime.GOOS == "nacl" || runtime.GOOS == "js" { + t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS) } f, err := ioutil.TempFile("", "heapdumptest") if err != nil { @@ -42,8 +42,8 @@ func objfin(x *Obj) { } func TestWriteHeapDumpFinalizers(t *testing.T) { - if runtime.GOOS == "nacl" { - t.Skip("WriteHeapDump is not available on NaCl.") + if runtime.GOOS == "nacl" || runtime.GOOS == "js" { + t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS) } f, err := ioutil.TempFile("", "heapdumptest") if err != nil { diff --git a/libgo/go/runtime/debug_test.go b/libgo/go/runtime/debug_test.go new file mode 100644 index 0000000..38c764f --- /dev/null +++ b/libgo/go/runtime/debug_test.go @@ -0,0 +1,207 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO: This test could be implemented on all (most?) UNIXes if we +// added syscall.Tgkill more widely. + +// We skip all of these tests under race mode because our test thread +// spends all of its time in the race runtime, which isn't a safe +// point. + +// +build ignore_for_gccgo +// +build amd64 +// +build linux +// +build !race + +package runtime_test + +import ( + "fmt" + "runtime" + "runtime/debug" + "sync/atomic" + "syscall" + "testing" +) + +func startDebugCallWorker(t *testing.T) (g *runtime.G, after func()) { + // This can deadlock if there aren't enough threads or if a GC + // tries to interrupt an atomic loop (see issue #10958). + ogomaxprocs := runtime.GOMAXPROCS(2) + ogcpercent := debug.SetGCPercent(-1) + + ready := make(chan *runtime.G) + var stop uint32 + done := make(chan error) + go debugCallWorker(ready, &stop, done) + g = <-ready + return g, func() { + atomic.StoreUint32(&stop, 1) + err := <-done + if err != nil { + t.Fatal(err) + } + runtime.GOMAXPROCS(ogomaxprocs) + debug.SetGCPercent(ogcpercent) + } +} + +func debugCallWorker(ready chan<- *runtime.G, stop *uint32, done chan<- error) { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + ready <- runtime.Getg() + + x := 2 + debugCallWorker2(stop, &x) + if x != 1 { + done <- fmt.Errorf("want x = 2, got %d; register pointer not adjusted?", x) + } + close(done) +} + +func debugCallWorker2(stop *uint32, x *int) { + for atomic.LoadUint32(stop) == 0 { + // Strongly encourage x to live in a register so we + // can test pointer register adjustment. + *x++ + } + *x = 1 +} + +func debugCallTKill(tid int) error { + return syscall.Tgkill(syscall.Getpid(), tid, syscall.SIGTRAP) +} + +func TestDebugCall(t *testing.T) { + g, after := startDebugCallWorker(t) + defer after() + + // Inject a call into the debugCallWorker goroutine and test + // basic argument and result passing. + var args struct { + x int + yRet int + } + fn := func(x int) (yRet int) { + return x + 1 + } + args.x = 42 + if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil { + t.Fatal(err) + } + if args.yRet != 43 { + t.Fatalf("want 43, got %d", args.yRet) + } +} + +func TestDebugCallLarge(t *testing.T) { + g, after := startDebugCallWorker(t) + defer after() + + // Inject a call with a large call frame. + const N = 128 + var args struct { + in [N]int + out [N]int + } + fn := func(in [N]int) (out [N]int) { + for i := range in { + out[i] = in[i] + 1 + } + return + } + var want [N]int + for i := range args.in { + args.in[i] = i + want[i] = i + 1 + } + if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil { + t.Fatal(err) + } + if want != args.out { + t.Fatalf("want %v, got %v", want, args.out) + } +} + +func TestDebugCallGC(t *testing.T) { + g, after := startDebugCallWorker(t) + defer after() + + // Inject a call that performs a GC. + if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill); err != nil { + t.Fatal(err) + } +} + +func TestDebugCallGrowStack(t *testing.T) { + g, after := startDebugCallWorker(t) + defer after() + + // Inject a call that grows the stack. debugCallWorker checks + // for stack pointer breakage. + if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill); err != nil { + t.Fatal(err) + } +} + +//go:nosplit +func debugCallUnsafePointWorker(gpp **runtime.G, ready, stop *uint32) { + // The nosplit causes this function to not contain safe-points + // except at calls. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + *gpp = runtime.Getg() + + for atomic.LoadUint32(stop) == 0 { + atomic.StoreUint32(ready, 1) + } +} + +func TestDebugCallUnsafePoint(t *testing.T) { + // This can deadlock if there aren't enough threads or if a GC + // tries to interrupt an atomic loop (see issue #10958). + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) + defer debug.SetGCPercent(debug.SetGCPercent(-1)) + + // Test that the runtime refuses call injection at unsafe points. + var g *runtime.G + var ready, stop uint32 + defer atomic.StoreUint32(&stop, 1) + go debugCallUnsafePointWorker(&g, &ready, &stop) + for atomic.LoadUint32(&ready) == 0 { + runtime.Gosched() + } + + _, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill) + if msg := "call not at safe point"; err == nil || err.Error() != msg { + t.Fatalf("want %q, got %s", msg, err) + } +} + +func TestDebugCallPanic(t *testing.T) { + // This can deadlock if there aren't enough threads. + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2)) + + ready := make(chan *runtime.G) + var stop uint32 + defer atomic.StoreUint32(&stop, 1) + go func() { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + ready <- runtime.Getg() + for atomic.LoadUint32(&stop) == 0 { + } + }() + g := <-ready + + p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill) + if err != nil { + t.Fatal(err) + } + if ps, ok := p.(string); !ok || ps != "test" { + t.Fatalf("wanted panic %v, got %v", "test", p) + } +} diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go index ddf3c02..399e88f 100644 --- a/libgo/go/runtime/env_posix.go +++ b/libgo/go/runtime/env_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows package runtime diff --git a/libgo/go/runtime/error.go b/libgo/go/runtime/error.go index 1a038cf..b1a3f68 100644 --- a/libgo/go/runtime/error.go +++ b/libgo/go/runtime/error.go @@ -19,55 +19,40 @@ type Error interface { // A TypeAssertionError explains a failed type assertion. type TypeAssertionError struct { - interfaceString string - concreteString string - assertedString string - missingMethod string // one method needed by Interface, missing from Concrete + _interface *_type + concrete *_type + asserted *_type + missingMethod string // one method needed by Interface, missing from Concrete } func (*TypeAssertionError) RuntimeError() {} func (e *TypeAssertionError) Error() string { - inter := e.interfaceString - if inter == "" { - inter = "interface" + inter := "interface" + if e._interface != nil { + inter = e._interface.string() } - if e.concreteString == "" { - return "interface conversion: " + inter + " is nil, not " + e.assertedString + as := e.asserted.string() + if e.concrete == nil { + return "interface conversion: " + inter + " is nil, not " + as } + cs := e.concrete.string() if e.missingMethod == "" { - return "interface conversion: " + inter + " is " + e.concreteString + - ", not " + e.assertedString + msg := "interface conversion: " + inter + " is " + cs + ", not " + as + if cs == as { + // provide slightly clearer error message + if e.concrete.pkgpath() != e.asserted.pkgpath() { + msg += " (types from different packages)" + } else { + msg += " (types from different scopes)" + } + } + return msg } - return "interface conversion: " + e.concreteString + " is not " + e.assertedString + + return "interface conversion: " + cs + " is not " + as + ": missing method " + e.missingMethod } -// For calling from C. -func NewTypeAssertionError(ps1, ps2, ps3 *string, pmeth *string, ret *interface{}) { - var s1, s2, s3, meth string - - if ps1 != nil { - s1 = *ps1 - } - if ps2 != nil { - s2 = *ps2 - } - if ps3 != nil { - s3 = *ps3 - } - if pmeth != nil { - meth = *pmeth - } - - // For gccgo, strip out quoted strings. - s1 = unquote(s1) - s2 = unquote(s2) - s3 = unquote(s3) - - *ret = &TypeAssertionError{s1, s2, s3, meth} -} - // Remove quoted strings from gccgo reflection strings. func unquote(s string) string { ls := len(s) @@ -135,7 +120,7 @@ type stringer interface { func typestring(x interface{}) string { e := efaceOf(&x) - return *e._type.string + return e._type.string() } // printany prints an argument passed to panic. diff --git a/libgo/go/runtime/export_debug_test.go b/libgo/go/runtime/export_debug_test.go new file mode 100644 index 0000000..2d2d535 --- /dev/null +++ b/libgo/go/runtime/export_debug_test.go @@ -0,0 +1,169 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore_for_gccgo +// +build amd64 +// +build linux + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// InjectDebugCall injects a debugger call to fn into g. args must be +// a pointer to a valid call frame (including arguments and return +// space) for fn, or nil. tkill must be a function that will send +// SIGTRAP to thread ID tid. gp must be locked to its OS thread and +// running. +// +// On success, InjectDebugCall returns the panic value of fn or nil. +// If fn did not panic, its results will be available in args. +func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (interface{}, error) { + if gp.lockedm == 0 { + return nil, plainError("goroutine not locked to thread") + } + + tid := int(gp.lockedm.ptr().procid) + if tid == 0 { + return nil, plainError("missing tid") + } + + f := efaceOf(&fn) + if f._type == nil || f._type.kind&kindMask != kindFunc { + return nil, plainError("fn must be a function") + } + fv := (*funcval)(f.data) + + a := efaceOf(&args) + if a._type != nil && a._type.kind&kindMask != kindPtr { + return nil, plainError("args must be a pointer or nil") + } + argp := a.data + var argSize uintptr + if argp != nil { + argSize = (*ptrtype)(unsafe.Pointer(a._type)).elem.size + } + + h := new(debugCallHandler) + h.gp = gp + h.fv, h.argp, h.argSize = fv, argp, argSize + h.handleF = h.handle // Avoid allocating closure during signal + noteclear(&h.done) + + defer func() { testSigtrap = nil }() + testSigtrap = h.inject + if err := tkill(tid); err != nil { + return nil, err + } + // Wait for completion. + notetsleepg(&h.done, -1) + if len(h.err) != 0 { + return nil, h.err + } + return h.panic, nil +} + +type debugCallHandler struct { + gp *g + fv *funcval + argp unsafe.Pointer + argSize uintptr + panic interface{} + + handleF func(info *siginfo, ctxt *sigctxt, gp2 *g) bool + + err plainError + done note + savedRegs sigcontext + savedFP fpstate1 +} + +func (h *debugCallHandler) inject(info *siginfo, ctxt *sigctxt, gp2 *g) bool { + switch h.gp.atomicstatus { + case _Grunning: + if getg().m != h.gp.m { + println("trap on wrong M", getg().m, h.gp.m) + return false + } + // Push current PC on the stack. + rsp := ctxt.rsp() - sys.PtrSize + *(*uint64)(unsafe.Pointer(uintptr(rsp))) = ctxt.rip() + ctxt.set_rsp(rsp) + // Write the argument frame size. + *(*uintptr)(unsafe.Pointer(uintptr(rsp - 16))) = h.argSize + // Save current registers. + h.savedRegs = *ctxt.regs() + h.savedFP = *h.savedRegs.fpstate + h.savedRegs.fpstate = nil + // Set PC to debugCallV1. + ctxt.set_rip(uint64(funcPC(debugCallV1))) + default: + h.err = plainError("goroutine in unexpected state at call inject") + return true + } + // Switch to the debugCall protocol and resume execution. + testSigtrap = h.handleF + return true +} + +func (h *debugCallHandler) handle(info *siginfo, ctxt *sigctxt, gp2 *g) bool { + // Sanity check. + if getg().m != h.gp.m { + println("trap on wrong M", getg().m, h.gp.m) + return false + } + f := findfunc(uintptr(ctxt.rip())) + if !(hasprefix(funcname(f), "runtime.debugCall") || hasprefix(funcname(f), "debugCall")) { + println("trap in unknown function", funcname(f)) + return false + } + if *(*byte)(unsafe.Pointer(uintptr(ctxt.rip() - 1))) != 0xcc { + println("trap at non-INT3 instruction pc =", hex(ctxt.rip())) + return false + } + + switch status := ctxt.rax(); status { + case 0: + // Frame is ready. Copy the arguments to the frame. + sp := ctxt.rsp() + memmove(unsafe.Pointer(uintptr(sp)), h.argp, h.argSize) + // Push return PC. + sp -= sys.PtrSize + ctxt.set_rsp(sp) + *(*uint64)(unsafe.Pointer(uintptr(sp))) = ctxt.rip() + // Set PC to call and context register. + ctxt.set_rip(uint64(h.fv.fn)) + ctxt.regs().rcx = uint64(uintptr(unsafe.Pointer(h.fv))) + case 1: + // Function returned. Copy frame back out. + sp := ctxt.rsp() + memmove(h.argp, unsafe.Pointer(uintptr(sp)), h.argSize) + case 2: + // Function panicked. Copy panic out. + sp := ctxt.rsp() + memmove(unsafe.Pointer(&h.panic), unsafe.Pointer(uintptr(sp)), 2*sys.PtrSize) + case 8: + // Call isn't safe. Get the reason. + sp := ctxt.rsp() + reason := *(*string)(unsafe.Pointer(uintptr(sp))) + h.err = plainError(reason) + case 16: + // Restore all registers except RIP and RSP. + rip, rsp := ctxt.rip(), ctxt.rsp() + fp := ctxt.regs().fpstate + *ctxt.regs() = h.savedRegs + ctxt.regs().fpstate = fp + *fp = h.savedFP + ctxt.set_rip(rip) + ctxt.set_rsp(rsp) + // Done + notewakeup(&h.done) + default: + h.err = plainError("unexpected debugCallV1 status") + } + // Resume execution. + return true +} diff --git a/libgo/go/runtime/export_linux_test.go b/libgo/go/runtime/export_linux_test.go index 183a6ee..96ff1c7 100644 --- a/libgo/go/runtime/export_linux_test.go +++ b/libgo/go/runtime/export_linux_test.go @@ -6,5 +6,11 @@ package runtime -//var NewOSProc0 = newosproc0 -//var Mincore = mincore +import "unsafe" + +// var NewOSProc0 = newosproc0 +// var Mincore = mincore + +func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 { + return epollctl(epfd, op, fd, (*epollevent)(ev)) +} diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index 5e798e3..7f4811c 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -21,7 +21,6 @@ import ( //var Fcmp64 = fcmp64 //var Fintto64 = fintto64 //var F64toint = f64toint -//var Sqrt = sqrt var Entersyscall = entersyscall var Exitsyscall = exitsyscall @@ -372,6 +371,8 @@ func (rw *RWMutex) Unlock() { rw.rw.unlock() } +const RuntimeHmapSize = unsafe.Sizeof(hmap{}) + func MapBucketsCount(m map[int]int) int { h := *(**hmap)(unsafe.Pointer(&m)) return 1 << h.B @@ -395,3 +396,61 @@ func LockOSCounts() (external, internal uint32) { } return g.m.lockedExt, g.m.lockedInt } + +func KeepNArenaHints(n int) { + hint := mheap_.arenaHints + for i := 1; i < n; i++ { + hint = hint.next + if hint == nil { + return + } + } + hint.next = nil +} + +// MapNextArenaHint reserves a page at the next arena growth hint, +// preventing the arena from growing there, and returns the range of +// addresses that are no longer viable. +func MapNextArenaHint() (start, end uintptr) { + hint := mheap_.arenaHints + addr := hint.addr + if hint.down { + start, end = addr-heapArenaBytes, addr + addr -= physPageSize + } else { + start, end = addr, addr+heapArenaBytes + } + sysReserve(unsafe.Pointer(addr), physPageSize) + return +} + +func GetNextArenaHint() uintptr { + return mheap_.arenaHints.addr +} + +type G = g + +func Getg() *G { + return getg() +} + +//go:noinline +func PanicForTesting(b []byte, i int) byte { + return unexportedPanicForTesting(b, i) +} + +//go:noinline +func unexportedPanicForTesting(b []byte, i int) byte { + return b[i] +} + +func G0StackOverflow() { + systemstack(func() { + stackOverflow(nil) + }) +} + +func stackOverflow(x *byte) { + var buf [256]byte + stackOverflow(&buf[0]) +} diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go index b3afd10..c9d10f1 100644 --- a/libgo/go/runtime/extern.go +++ b/libgo/go/runtime/extern.go @@ -116,6 +116,12 @@ It is a comma-separated list of name=val pairs setting these named variables: schedtrace: setting schedtrace=X causes the scheduler to emit a single line to standard error every X milliseconds, summarizing the scheduler state. + tracebackancestors: setting tracebackancestors=N extends tracebacks with the stacks at + which goroutines were created, where N limits the number of ancestor goroutines to + report. This also extends the information returned by runtime.Stack. Ancestor's goroutine + IDs will refer to the ID of the goroutine at the time of creation; it's possible for this + ID to be reused for another goroutine. Setting N to 0 will report no ancestry information. + The net and net/http packages also refer to debugging variables in GODEBUG. See the documentation for those packages for details. diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go index a8c52d2..180919b 100644 --- a/libgo/go/runtime/gc_test.go +++ b/libgo/go/runtime/gc_test.go @@ -10,6 +10,7 @@ import ( "reflect" "runtime" "runtime/debug" + "sync" "sync/atomic" "testing" "time" @@ -44,7 +45,7 @@ func TestGcDeepNesting(t *testing.T) { } } -func TestGcHashmapIndirection(t *testing.T) { +func TestGcMapIndirection(t *testing.T) { defer debug.SetGCPercent(debug.SetGCPercent(1)) runtime.GC() type T struct { @@ -157,6 +158,10 @@ func TestHugeGCInfo(t *testing.T) { /* func TestPeriodicGC(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("no sysmon on wasm yet") + } + // Make sure we're not in the middle of a GC. runtime.GC() @@ -642,3 +647,34 @@ func BenchmarkBulkWriteBarrier(b *testing.B) { runtime.KeepAlive(ptrs) } + +func BenchmarkScanStackNoLocals(b *testing.B) { + var ready sync.WaitGroup + teardown := make(chan bool) + for j := 0; j < 10; j++ { + ready.Add(1) + go func() { + x := 100000 + countpwg(&x, &ready, teardown) + }() + } + ready.Wait() + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StartTimer() + runtime.GC() + runtime.GC() + b.StopTimer() + } + close(teardown) +} + +func countpwg(n *int, ready *sync.WaitGroup, teardown chan bool) { + if *n == 0 { + ready.Done() + <-teardown + return + } + *n-- + countpwg(n, ready, teardown) +} diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go index 4ac67dc..ca012bb 100644 --- a/libgo/go/runtime/gcinfo_test.go +++ b/libgo/go/runtime/gcinfo_test.go @@ -133,7 +133,7 @@ func infoBigStruct() []byte { typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64 typePointer, typeScalar, // i string } - case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x": + case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x", "wasm": return []byte{ typePointer, // q *int typeScalar, typeScalar, typeScalar, // w byte; e [17]byte @@ -186,6 +186,6 @@ var ( infoString = []byte{typePointer, typeScalar} infoSlice = []byte{typePointer, typeScalar, typeScalar} - infoEface = []byte{typePointer, typePointer} - infoIface = []byte{typePointer, typePointer} + infoEface = []byte{typeScalar, typePointer} + infoIface = []byte{typeScalar, typePointer} ) diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go index 74775a8..7c6513e 100644 --- a/libgo/go/runtime/hash64.go +++ b/libgo/go/runtime/hash64.go @@ -6,7 +6,7 @@ // xxhash: https://code.google.com/p/xxhash/ // cityhash: https://code.google.com/p/cityhash/ -// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64 +// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x wasm alpha arm64be ia64 mips64p32 mips64p32le sparc64 riscv64 package runtime @@ -26,7 +26,8 @@ const ( ) func memhash(p unsafe.Pointer, seed, s uintptr) uintptr { - if GOARCH == "amd64" && GOOS != "nacl" && useAeshash { + if (GOARCH == "amd64" || GOARCH == "arm64") && + GOOS != "nacl" && useAeshash { return aeshash(p, seed, s) } h := uint64(seed + s*hashkey[0]) diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go index 54c9160..070edb6 100644 --- a/libgo/go/runtime/hash_test.go +++ b/libgo/go/runtime/hash_test.go @@ -161,6 +161,9 @@ func TestSmhasherZeros(t *testing.T) { // Strings with up to two nonzero bytes all have distinct hashes. func TestSmhasherTwoNonzero(t *testing.T) { + if GOARCH == "wasm" { + t.Skip("Too slow on wasm") + } if testing.Short() { t.Skip("Skipping in short mode") } @@ -229,6 +232,9 @@ func TestSmhasherCyclic(t *testing.T) { // Test strings with only a few bits set func TestSmhasherSparse(t *testing.T) { + if GOARCH == "wasm" { + t.Skip("Too slow on wasm") + } if testing.Short() { t.Skip("Skipping in short mode") } @@ -264,6 +270,9 @@ func setbits(h *HashSet, b []byte, i int, k int) { // Test all possible combinations of n blocks from the set s. // "permutation" is a bad name here, but it is what Smhasher uses. func TestSmhasherPermutation(t *testing.T) { + if GOARCH == "wasm" { + t.Skip("Too slow on wasm") + } if testing.Short() { t.Skip("Skipping in short mode") } @@ -433,6 +442,9 @@ func (k *IfaceKey) name() string { // Flipping a single bit of a key should flip each output bit with 50% probability. func TestSmhasherAvalanche(t *testing.T) { + if GOARCH == "wasm" { + t.Skip("Too slow on wasm") + } if testing.Short() { t.Skip("Skipping in short mode") } @@ -508,6 +520,9 @@ func TestSmhasherWindowed(t *testing.T) { windowed(t, &BytesKey{make([]byte, 128)}) } func windowed(t *testing.T, k Key) { + if GOARCH == "wasm" { + t.Skip("Too slow on wasm") + } if testing.Short() { t.Skip("Skipping in short mode") } diff --git a/libgo/go/runtime/hashmap_fast.go b/libgo/go/runtime/hashmap_fast.go deleted file mode 100644 index e0fc981..0000000 --- a/libgo/go/runtime/hashmap_fast.go +++ /dev/null @@ -1,1237 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package runtime - -import ( - "runtime/internal/sys" - "unsafe" -) - -func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { - if raceenabled && h != nil { - callerpc := getcallerpc() - racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32)) - } - if h == nil || h.count == 0 { - return unsafe.Pointer(&zeroVal[0]) - } - if h.flags&hashWriting != 0 { - throw("concurrent map read and map write") - } - var b *bmap - if h.B == 0 { - // One-bucket table. No need to hash. - b = (*bmap)(h.buckets) - } else { - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := bucketMask(h.B) - b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) - if c := h.oldbuckets; c != nil { - if !h.sameSizeGrow() { - // There used to be half as many buckets; mask down one more power of two. - m >>= 1 - } - oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) - if !evacuated(oldb) { - b = oldb - } - } - } - for ; b != nil; b = b.overflow(t) { - for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { - if *(*uint32)(k) == key && b.tophash[i] != empty { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) - } - } - } - return unsafe.Pointer(&zeroVal[0]) -} - -func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { - if raceenabled && h != nil { - callerpc := getcallerpc() - racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32)) - } - if h == nil || h.count == 0 { - return unsafe.Pointer(&zeroVal[0]), false - } - if h.flags&hashWriting != 0 { - throw("concurrent map read and map write") - } - var b *bmap - if h.B == 0 { - // One-bucket table. No need to hash. - b = (*bmap)(h.buckets) - } else { - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := bucketMask(h.B) - b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) - if c := h.oldbuckets; c != nil { - if !h.sameSizeGrow() { - // There used to be half as many buckets; mask down one more power of two. - m >>= 1 - } - oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) - if !evacuated(oldb) { - b = oldb - } - } - } - for ; b != nil; b = b.overflow(t) { - for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { - if *(*uint32)(k) == key && b.tophash[i] != empty { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true - } - } - } - return unsafe.Pointer(&zeroVal[0]), false -} - -func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { - if raceenabled && h != nil { - callerpc := getcallerpc() - racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64)) - } - if h == nil || h.count == 0 { - return unsafe.Pointer(&zeroVal[0]) - } - if h.flags&hashWriting != 0 { - throw("concurrent map read and map write") - } - var b *bmap - if h.B == 0 { - // One-bucket table. No need to hash. - b = (*bmap)(h.buckets) - } else { - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := bucketMask(h.B) - b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) - if c := h.oldbuckets; c != nil { - if !h.sameSizeGrow() { - // There used to be half as many buckets; mask down one more power of two. - m >>= 1 - } - oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) - if !evacuated(oldb) { - b = oldb - } - } - } - for ; b != nil; b = b.overflow(t) { - for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { - if *(*uint64)(k) == key && b.tophash[i] != empty { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) - } - } - } - return unsafe.Pointer(&zeroVal[0]) -} - -func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { - if raceenabled && h != nil { - callerpc := getcallerpc() - racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64)) - } - if h == nil || h.count == 0 { - return unsafe.Pointer(&zeroVal[0]), false - } - if h.flags&hashWriting != 0 { - throw("concurrent map read and map write") - } - var b *bmap - if h.B == 0 { - // One-bucket table. No need to hash. - b = (*bmap)(h.buckets) - } else { - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - m := bucketMask(h.B) - b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) - if c := h.oldbuckets; c != nil { - if !h.sameSizeGrow() { - // There used to be half as many buckets; mask down one more power of two. - m >>= 1 - } - oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) - if !evacuated(oldb) { - b = oldb - } - } - } - for ; b != nil; b = b.overflow(t) { - for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { - if *(*uint64)(k) == key && b.tophash[i] != empty { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true - } - } - } - return unsafe.Pointer(&zeroVal[0]), false -} - -func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { - if raceenabled && h != nil { - callerpc := getcallerpc() - racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr)) - } - if h == nil || h.count == 0 { - return unsafe.Pointer(&zeroVal[0]) - } - if h.flags&hashWriting != 0 { - throw("concurrent map read and map write") - } - key := stringStructOf(&ky) - if h.B == 0 { - // One-bucket table. - b := (*bmap)(h.buckets) - if key.len < 32 { - // short key, doing lots of comparisons is ok - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { - continue - } - if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) - } - } - return unsafe.Pointer(&zeroVal[0]) - } - // long key, try not to do more comparisons than necessary - keymaybe := uintptr(bucketCnt) - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { - continue - } - if k.str == key.str { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) - } - // check first 4 bytes - if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { - continue - } - // check last 4 bytes - if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) { - continue - } - if keymaybe != bucketCnt { - // Two keys are potential matches. Use hash to distinguish them. - goto dohash - } - keymaybe = i - } - if keymaybe != bucketCnt { - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) - if memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)) - } - } - return unsafe.Pointer(&zeroVal[0]) - } -dohash: - hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - m := bucketMask(h.B) - b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) - if c := h.oldbuckets; c != nil { - if !h.sameSizeGrow() { - // There used to be half as many buckets; mask down one more power of two. - m >>= 1 - } - oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) - if !evacuated(oldb) { - b = oldb - } - } - top := tophash(hash) - for ; b != nil; b = b.overflow(t) { - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] != top { - continue - } - if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) - } - } - } - return unsafe.Pointer(&zeroVal[0]) -} - -func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { - if raceenabled && h != nil { - callerpc := getcallerpc() - racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr)) - } - if h == nil || h.count == 0 { - return unsafe.Pointer(&zeroVal[0]), false - } - if h.flags&hashWriting != 0 { - throw("concurrent map read and map write") - } - key := stringStructOf(&ky) - if h.B == 0 { - // One-bucket table. - b := (*bmap)(h.buckets) - if key.len < 32 { - // short key, doing lots of comparisons is ok - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { - continue - } - if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true - } - } - return unsafe.Pointer(&zeroVal[0]), false - } - // long key, try not to do more comparisons than necessary - keymaybe := uintptr(bucketCnt) - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] == empty { - continue - } - if k.str == key.str { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true - } - // check first 4 bytes - if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { - continue - } - // check last 4 bytes - if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) { - continue - } - if keymaybe != bucketCnt { - // Two keys are potential matches. Use hash to distinguish them. - goto dohash - } - keymaybe = i - } - if keymaybe != bucketCnt { - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) - if memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true - } - } - return unsafe.Pointer(&zeroVal[0]), false - } -dohash: - hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - m := bucketMask(h.B) - b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) - if c := h.oldbuckets; c != nil { - if !h.sameSizeGrow() { - // There used to be half as many buckets; mask down one more power of two. - m >>= 1 - } - oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) - if !evacuated(oldb) { - b = oldb - } - } - top := tophash(hash) - for ; b != nil; b = b.overflow(t) { - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] != top { - continue - } - if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { - return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true - } - } - } - return unsafe.Pointer(&zeroVal[0]), false -} - -func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { - if h == nil { - panic(plainError("assignment to entry in nil map")) - } - if raceenabled { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting - - if h.buckets == nil { - h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) - } - -again: - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_fast32(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - - var insertb *bmap - var inserti uintptr - var insertk unsafe.Pointer - - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { - if insertb == nil { - inserti = i - insertb = b - } - continue - } - k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) - if k != key { - continue - } - inserti = i - insertb = b - goto done - } - ovf := b.overflow(t) - if ovf == nil { - break - } - b = ovf - } - - // Did not find mapping for key. Allocate new cell & add entry. - - // If we hit the max load factor or we have too many overflow buckets, - // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { - hashGrow(t, h) - goto again // Growing the table invalidates everything, so try again - } - - if insertb == nil { - // all current buckets are full, allocate a new one. - insertb = h.newoverflow(t, b) - inserti = 0 // not necessary, but avoids needlessly spilling inserti - } - insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks - - insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) - // store new key at insert position - *(*uint32)(insertk) = key - - h.count++ - -done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting - return val -} - -func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { - if h == nil { - panic(plainError("assignment to entry in nil map")) - } - if raceenabled { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting - - if h.buckets == nil { - h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) - } - -again: - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_fast32(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - - var insertb *bmap - var inserti uintptr - var insertk unsafe.Pointer - - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { - if insertb == nil { - inserti = i - insertb = b - } - continue - } - k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4))) - if k != key { - continue - } - inserti = i - insertb = b - goto done - } - ovf := b.overflow(t) - if ovf == nil { - break - } - b = ovf - } - - // Did not find mapping for key. Allocate new cell & add entry. - - // If we hit the max load factor or we have too many overflow buckets, - // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { - hashGrow(t, h) - goto again // Growing the table invalidates everything, so try again - } - - if insertb == nil { - // all current buckets are full, allocate a new one. - insertb = h.newoverflow(t, b) - inserti = 0 // not necessary, but avoids needlessly spilling inserti - } - insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks - - insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) - // store new key at insert position - *(*unsafe.Pointer)(insertk) = key - - h.count++ - -done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting - return val -} - -func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { - if h == nil { - panic(plainError("assignment to entry in nil map")) - } - if raceenabled { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting - - if h.buckets == nil { - h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) - } - -again: - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_fast64(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - - var insertb *bmap - var inserti uintptr - var insertk unsafe.Pointer - - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { - if insertb == nil { - insertb = b - inserti = i - } - continue - } - k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) - if k != key { - continue - } - insertb = b - inserti = i - goto done - } - ovf := b.overflow(t) - if ovf == nil { - break - } - b = ovf - } - - // Did not find mapping for key. Allocate new cell & add entry. - - // If we hit the max load factor or we have too many overflow buckets, - // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { - hashGrow(t, h) - goto again // Growing the table invalidates everything, so try again - } - - if insertb == nil { - // all current buckets are full, allocate a new one. - insertb = h.newoverflow(t, b) - inserti = 0 // not necessary, but avoids needlessly spilling inserti - } - insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks - - insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) - // store new key at insert position - *(*uint64)(insertk) = key - - h.count++ - -done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting - return val -} - -func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { - if h == nil { - panic(plainError("assignment to entry in nil map")) - } - if raceenabled { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting - - if h.buckets == nil { - h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) - } - -again: - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_fast64(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - - var insertb *bmap - var inserti uintptr - var insertk unsafe.Pointer - - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] == empty { - if insertb == nil { - insertb = b - inserti = i - } - continue - } - k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8))) - if k != key { - continue - } - insertb = b - inserti = i - goto done - } - ovf := b.overflow(t) - if ovf == nil { - break - } - b = ovf - } - - // Did not find mapping for key. Allocate new cell & add entry. - - // If we hit the max load factor or we have too many overflow buckets, - // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { - hashGrow(t, h) - goto again // Growing the table invalidates everything, so try again - } - - if insertb == nil { - // all current buckets are full, allocate a new one. - insertb = h.newoverflow(t, b) - inserti = 0 // not necessary, but avoids needlessly spilling inserti - } - insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks - - insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) - // store new key at insert position - *(*unsafe.Pointer)(insertk) = key - - h.count++ - -done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting - return val -} - -func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer { - if h == nil { - panic(plainError("assignment to entry in nil map")) - } - if raceenabled { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr)) - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - key := stringStructOf(&s) - hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapassign. - h.flags |= hashWriting - - if h.buckets == nil { - h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) - } - -again: - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_faststr(t, h, bucket) - } - b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) - top := tophash(hash) - - var insertb *bmap - var inserti uintptr - var insertk unsafe.Pointer - - for { - for i := uintptr(0); i < bucketCnt; i++ { - if b.tophash[i] != top { - if b.tophash[i] == empty && insertb == nil { - insertb = b - inserti = i - } - continue - } - k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) - if k.len != key.len { - continue - } - if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) { - continue - } - // already have a mapping for key. Update it. - inserti = i - insertb = b - goto done - } - ovf := b.overflow(t) - if ovf == nil { - break - } - b = ovf - } - - // Did not find mapping for key. Allocate new cell & add entry. - - // If we hit the max load factor or we have too many overflow buckets, - // and we're not already in the middle of growing, start growing. - if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { - hashGrow(t, h) - goto again // Growing the table invalidates everything, so try again - } - - if insertb == nil { - // all current buckets are full, allocate a new one. - insertb = h.newoverflow(t, b) - inserti = 0 // not necessary, but avoids needlessly spilling inserti - } - insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks - - insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize) - // store new key at insert position - *((*stringStruct)(insertk)) = *key - h.count++ - -done: - val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize)) - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting - return val -} - -func mapdelete_fast32(t *maptype, h *hmap, key uint32) { - if raceenabled && h != nil { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32)) - } - if h == nil || h.count == 0 { - return - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapdelete - h.flags |= hashWriting - - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_fast32(t, h, bucket) - } - b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) -search: - for ; b != nil; b = b.overflow(t) { - for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { - if key != *(*uint32)(k) || b.tophash[i] == empty { - continue - } - // Only clear key if there are pointers in it. - if t.key.kind&kindNoPointers == 0 { - memclrHasPointers(k, t.key.size) - } - // Only clear value if there are pointers in it. - if t.elem.kind&kindNoPointers == 0 { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) - memclrHasPointers(v, t.elem.size) - } - b.tophash[i] = empty - h.count-- - break search - } - } - - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting -} - -func mapdelete_fast64(t *maptype, h *hmap, key uint64) { - if raceenabled && h != nil { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64)) - } - if h == nil || h.count == 0 { - return - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - - hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapdelete - h.flags |= hashWriting - - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_fast64(t, h, bucket) - } - b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) -search: - for ; b != nil; b = b.overflow(t) { - for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { - if key != *(*uint64)(k) || b.tophash[i] == empty { - continue - } - // Only clear key if there are pointers in it. - if t.key.kind&kindNoPointers == 0 { - memclrHasPointers(k, t.key.size) - } - // Only clear value if there are pointers in it. - if t.elem.kind&kindNoPointers == 0 { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) - memclrHasPointers(v, t.elem.size) - } - b.tophash[i] = empty - h.count-- - break search - } - } - - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting -} - -func mapdelete_faststr(t *maptype, h *hmap, ky string) { - if raceenabled && h != nil { - callerpc := getcallerpc() - racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr)) - } - if h == nil || h.count == 0 { - return - } - if h.flags&hashWriting != 0 { - throw("concurrent map writes") - } - - key := stringStructOf(&ky) - hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) - - // Set hashWriting after calling alg.hash for consistency with mapdelete - h.flags |= hashWriting - - bucket := hash & bucketMask(h.B) - if h.growing() { - growWork_faststr(t, h, bucket) - } - b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) - top := tophash(hash) -search: - for ; b != nil; b = b.overflow(t) { - for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { - k := (*stringStruct)(kptr) - if k.len != key.len || b.tophash[i] != top { - continue - } - if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) { - continue - } - // Clear key's pointer. - k.str = nil - // Only clear value if there are pointers in it. - if t.elem.kind&kindNoPointers == 0 { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) - memclrHasPointers(v, t.elem.size) - } - b.tophash[i] = empty - h.count-- - break search - } - } - - if h.flags&hashWriting == 0 { - throw("concurrent map writes") - } - h.flags &^= hashWriting -} - -func growWork_fast32(t *maptype, h *hmap, bucket uintptr) { - // make sure we evacuate the oldbucket corresponding - // to the bucket we're about to use - evacuate_fast32(t, h, bucket&h.oldbucketmask()) - - // evacuate one more oldbucket to make progress on growing - if h.growing() { - evacuate_fast32(t, h, h.nevacuate) - } -} - -func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { - b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) - newbit := h.noldbuckets() - if !evacuated(b) { - // TODO: reuse overflow buckets instead of using new ones, if there - // is no iterator using the old buckets. (If !oldIterator.) - - // xy contains the x and y (low and high) evacuation destinations. - var xy [2]evacDst - x := &xy[0] - x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) - x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*4) - - if !h.sameSizeGrow() { - // Only calculate y pointers if we're growing bigger. - // Otherwise GC can see bad pointers. - y := &xy[1] - y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) - y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*4) - } - - for ; b != nil; b = b.overflow(t) { - k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*4) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) { - top := b.tophash[i] - if top == empty { - b.tophash[i] = evacuatedEmpty - continue - } - if top < minTopHash { - throw("bad map state") - } - var useY uint8 - if !h.sameSizeGrow() { - // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). - hash := t.key.hashfn(k, uintptr(h.hash0)) - if hash&newbit != 0 { - useY = 1 - } - } - - b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap - dst := &xy[useY] // evacuation destination - - if dst.i == bucketCnt { - dst.b = h.newoverflow(t, dst.b) - dst.i = 0 - dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*4) - } - dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check - - // Copy key. - if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { - writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k)) - } else { - *(*uint32)(dst.k) = *(*uint32)(k) - } - - typedmemmove(t.elem, dst.v, v) - dst.i++ - // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer - // at the end of the bucket to protect against pointing past the - // end of the bucket. - dst.k = add(dst.k, 4) - dst.v = add(dst.v, uintptr(t.valuesize)) - } - } - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { - b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) - // Preserve b.tophash because the evacuation - // state is maintained there. - ptr := add(b, dataOffset) - n := uintptr(t.bucketsize) - dataOffset - memclrHasPointers(ptr, n) - } - } - - if oldbucket == h.nevacuate { - advanceEvacuationMark(h, t, newbit) - } -} - -func growWork_fast64(t *maptype, h *hmap, bucket uintptr) { - // make sure we evacuate the oldbucket corresponding - // to the bucket we're about to use - evacuate_fast64(t, h, bucket&h.oldbucketmask()) - - // evacuate one more oldbucket to make progress on growing - if h.growing() { - evacuate_fast64(t, h, h.nevacuate) - } -} - -func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { - b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) - newbit := h.noldbuckets() - if !evacuated(b) { - // TODO: reuse overflow buckets instead of using new ones, if there - // is no iterator using the old buckets. (If !oldIterator.) - - // xy contains the x and y (low and high) evacuation destinations. - var xy [2]evacDst - x := &xy[0] - x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) - x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*8) - - if !h.sameSizeGrow() { - // Only calculate y pointers if we're growing bigger. - // Otherwise GC can see bad pointers. - y := &xy[1] - y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) - y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*8) - } - - for ; b != nil; b = b.overflow(t) { - k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*8) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) { - top := b.tophash[i] - if top == empty { - b.tophash[i] = evacuatedEmpty - continue - } - if top < minTopHash { - throw("bad map state") - } - var useY uint8 - if !h.sameSizeGrow() { - // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). - hash := t.key.hashfn(k, uintptr(h.hash0)) - if hash&newbit != 0 { - useY = 1 - } - } - - b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap - dst := &xy[useY] // evacuation destination - - if dst.i == bucketCnt { - dst.b = h.newoverflow(t, dst.b) - dst.i = 0 - dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*8) - } - dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check - - // Copy key. - if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { - if sys.PtrSize == 8 { - writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k)) - } else { - // There are three ways to squeeze at least one 32 bit pointer into 64 bits. - // Give up and call typedmemmove. - typedmemmove(t.key, dst.k, k) - } - } else { - *(*uint64)(dst.k) = *(*uint64)(k) - } - - typedmemmove(t.elem, dst.v, v) - dst.i++ - // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer - // at the end of the bucket to protect against pointing past the - // end of the bucket. - dst.k = add(dst.k, 8) - dst.v = add(dst.v, uintptr(t.valuesize)) - } - } - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { - b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) - // Preserve b.tophash because the evacuation - // state is maintained there. - ptr := add(b, dataOffset) - n := uintptr(t.bucketsize) - dataOffset - memclrHasPointers(ptr, n) - } - } - - if oldbucket == h.nevacuate { - advanceEvacuationMark(h, t, newbit) - } -} - -func growWork_faststr(t *maptype, h *hmap, bucket uintptr) { - // make sure we evacuate the oldbucket corresponding - // to the bucket we're about to use - evacuate_faststr(t, h, bucket&h.oldbucketmask()) - - // evacuate one more oldbucket to make progress on growing - if h.growing() { - evacuate_faststr(t, h, h.nevacuate) - } -} - -func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { - b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) - newbit := h.noldbuckets() - if !evacuated(b) { - // TODO: reuse overflow buckets instead of using new ones, if there - // is no iterator using the old buckets. (If !oldIterator.) - - // xy contains the x and y (low and high) evacuation destinations. - var xy [2]evacDst - x := &xy[0] - x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) - x.k = add(unsafe.Pointer(x.b), dataOffset) - x.v = add(x.k, bucketCnt*2*sys.PtrSize) - - if !h.sameSizeGrow() { - // Only calculate y pointers if we're growing bigger. - // Otherwise GC can see bad pointers. - y := &xy[1] - y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) - y.k = add(unsafe.Pointer(y.b), dataOffset) - y.v = add(y.k, bucketCnt*2*sys.PtrSize) - } - - for ; b != nil; b = b.overflow(t) { - k := add(unsafe.Pointer(b), dataOffset) - v := add(k, bucketCnt*2*sys.PtrSize) - for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) { - top := b.tophash[i] - if top == empty { - b.tophash[i] = evacuatedEmpty - continue - } - if top < minTopHash { - throw("bad map state") - } - var useY uint8 - if !h.sameSizeGrow() { - // Compute hash to make our evacuation decision (whether we need - // to send this key/value to bucket x or bucket y). - hash := t.key.hashfn(k, uintptr(h.hash0)) - if hash&newbit != 0 { - useY = 1 - } - } - - b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap - dst := &xy[useY] // evacuation destination - - if dst.i == bucketCnt { - dst.b = h.newoverflow(t, dst.b) - dst.i = 0 - dst.k = add(unsafe.Pointer(dst.b), dataOffset) - dst.v = add(dst.k, bucketCnt*2*sys.PtrSize) - } - dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check - - // Copy key. - *(*string)(dst.k) = *(*string)(k) - - typedmemmove(t.elem, dst.v, v) - dst.i++ - // These updates might push these pointers past the end of the - // key or value arrays. That's ok, as we have the overflow pointer - // at the end of the bucket to protect against pointing past the - // end of the bucket. - dst.k = add(dst.k, 2*sys.PtrSize) - dst.v = add(dst.v, uintptr(t.valuesize)) - } - } - // Unlink the overflow buckets & clear key/value to help GC. - // Unlink the overflow buckets & clear key/value to help GC. - if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { - b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) - // Preserve b.tophash because the evacuation - // state is maintained there. - ptr := add(b, dataOffset) - n := uintptr(t.bucketsize) - dataOffset - memclrHasPointers(ptr, n) - } - } - - if oldbucket == h.nevacuate { - advanceEvacuationMark(h, t, newbit) - } -} diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go index a4b168d..e92ea39 100644 --- a/libgo/go/runtime/heapdump.go +++ b/libgo/go/runtime/heapdump.go @@ -184,7 +184,7 @@ func dumptype(t *_type) { dumpint(uint64(uintptr(unsafe.Pointer(t)))) dumpint(uint64(t.size)) if x := t.uncommontype; x == nil || t.pkgPath == nil || *t.pkgPath == "" { - dumpstr(*t.string) + dumpstr(t.string()) } else { pkgpathstr := *t.pkgPath pkgpath := stringStructOf(&pkgpathstr) @@ -233,9 +233,8 @@ type childInfo struct { // dump kinds & offsets of interesting fields in bv func dumpbv(cbv *bitvector, offset uintptr) { - bv := gobv(*cbv) - for i := uintptr(0); i < bv.n; i++ { - if bv.bytedata[i/8]>>(i%8)&1 == 1 { + for i := uintptr(0); i < uintptr(cbv.n); i++ { + if cbv.ptrbit(i) == 1 { dumpint(fieldKindPtr) dumpint(uint64(offset + i*sys.PtrSize)) } @@ -254,7 +253,7 @@ func dumpgoroutine(gp *g) { dumpbool(isSystemGoroutine(gp)) dumpbool(false) // isbackground dumpint(uint64(gp.waitsince)) - dumpstr(gp.waitreason) + dumpstr(gp.waitreason.String()) dumpint(0) dumpint(uint64(uintptr(unsafe.Pointer(gp.m)))) dumpint(uint64(uintptr(unsafe.Pointer(gp._defer)))) @@ -372,8 +371,26 @@ func dumpparams() { dumpbool(true) // big-endian ptrs } dumpint(sys.PtrSize) - dumpint(uint64(mheap_.arena_start)) - dumpint(uint64(mheap_.arena_used)) + var arenaStart, arenaEnd uintptr + for i1 := range mheap_.arenas { + if mheap_.arenas[i1] == nil { + continue + } + for i, ha := range mheap_.arenas[i1] { + if ha == nil { + continue + } + base := arenaBase(arenaIdx(i1)<<arenaL1Shift | arenaIdx(i)) + if arenaStart == 0 || base < arenaStart { + arenaStart = base + } + if base+heapArenaBytes > arenaEnd { + arenaEnd = base + heapArenaBytes + } + } + } + dumpint(uint64(arenaStart)) + dumpint(uint64(arenaEnd)) dumpstr(sys.GOARCH) dumpstr(sys.Goexperiment) dumpint(uint64(ncpu)) @@ -509,7 +526,7 @@ func mdump() { func writeheapdump_m(fd uintptr) { _g_ := getg() casgstatus(_g_.m.curg, _Grunning, _Gwaiting) - _g_.waitreason = "dumping heap" + _g_.waitreason = waitReasonDumpingHeap // Update stats so we can dump them. // As a side effect, flushes all the MCaches so the MSpan.freelist diff --git a/libgo/go/runtime/iface.go b/libgo/go/runtime/iface.go index 62d47ce..8ed67c1 100644 --- a/libgo/go/runtime/iface.go +++ b/libgo/go/runtime/iface.go @@ -94,7 +94,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer { if canfail { return nil } - panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsi.methods[0].name}) + panic(&TypeAssertionError{nil, rhs, lhs, *lhsi.methods[0].name}) } methods := make([]unsafe.Pointer, len(lhsi.methods)+1) @@ -110,7 +110,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer { if canfail { return nil } - panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsMethod.name}) + panic(&TypeAssertionError{nil, rhs, lhs, *lhsMethod.name}) } rhsMethod = &rhs.methods[ri] @@ -126,7 +126,7 @@ func getitab(lhs, rhs *_type, canfail bool) unsafe.Pointer { if canfail { return nil } - panic(&TypeAssertionError{"", *rhs.string, *lhs.string, *lhsMethod.name}) + panic(&TypeAssertionError{nil, rhs, lhs, *lhsMethod.name}) } methods[li+1] = unsafe.Pointer(rhsMethod.tfn) @@ -147,7 +147,7 @@ func requireitab(lhs, rhs *_type) unsafe.Pointer { // impossible or if the rhs type is nil. func assertitab(lhs, rhs *_type) unsafe.Pointer { if rhs == nil { - panic(&TypeAssertionError{"", "", *lhs.string, ""}) + panic(&TypeAssertionError{nil, nil, lhs, ""}) } if lhs.kind&kindMask != kindInterface { @@ -167,10 +167,10 @@ func assertitab(lhs, rhs *_type) unsafe.Pointer { // type, panicing if not. func assertI2T(lhs, rhs, inter *_type) { if rhs == nil { - panic(&TypeAssertionError{"", "", *lhs.string, ""}) + panic(&TypeAssertionError{nil, nil, lhs, ""}) } if !eqtype(lhs, rhs) { - panic(&TypeAssertionError{*inter.string, *rhs.string, *lhs.string, ""}) + panic(&TypeAssertionError{inter, rhs, lhs, ""}) } } @@ -327,8 +327,44 @@ func ifaceT2Ip(to, from *_type) bool { func reflect_ifaceE2I(inter *interfacetype, e eface, dst *iface) { t := e._type if t == nil { - panic(TypeAssertionError{"", "", *inter.typ.string, ""}) + panic(TypeAssertionError{nil, nil, &inter.typ, ""}) } dst.tab = requireitab((*_type)(unsafe.Pointer(inter)), t) dst.data = e.data } + +// staticbytes is used to avoid convT2E for byte-sized values. +var staticbytes = [...]byte{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +} diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go index b697aa8..25ece43 100644 --- a/libgo/go/runtime/internal/atomic/atomic_test.go +++ b/libgo/go/runtime/internal/atomic/atomic_test.go @@ -93,8 +93,10 @@ func TestUnaligned64(t *testing.T) { } x := make([]uint32, 4) - up64 := (*uint64)(unsafe.Pointer(&x[1])) // misaligned - p64 := (*int64)(unsafe.Pointer(&x[1])) // misaligned + u := unsafe.Pointer(uintptr(unsafe.Pointer(&x[0])) | 4) // force alignment to 4 + + up64 := (*uint64)(u) // misaligned + p64 := (*int64)(u) // misaligned shouldPanic(t, "Load64", func() { atomic.Load64(up64) }) shouldPanic(t, "Loadint64", func() { atomic.Loadint64(p64) }) diff --git a/libgo/go/runtime/internal/atomic/bench_test.go b/libgo/go/runtime/internal/atomic/bench_test.go index 47010e3..083a75c 100644 --- a/libgo/go/runtime/internal/atomic/bench_test.go +++ b/libgo/go/runtime/internal/atomic/bench_test.go @@ -26,3 +26,39 @@ func BenchmarkAtomicStore64(b *testing.B) { atomic.Store64(&x, 0) } } + +func BenchmarkAtomicLoad(b *testing.B) { + var x uint32 + sink = &x + for i := 0; i < b.N; i++ { + _ = atomic.Load(&x) + } +} + +func BenchmarkAtomicStore(b *testing.B) { + var x uint32 + sink = &x + for i := 0; i < b.N; i++ { + atomic.Store(&x, 0) + } +} + +func BenchmarkXadd(b *testing.B) { + var x uint32 + ptr := &x + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + atomic.Xadd(ptr, 1) + } + }) +} + +func BenchmarkXadd64(b *testing.B) { + var x uint64 + ptr := &x + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + atomic.Xadd64(ptr, 1) + } + }) +} diff --git a/libgo/go/runtime/internal/atomic/stubs.go b/libgo/go/runtime/internal/atomic/stubs.go index 497b980..62e30d1 100644 --- a/libgo/go/runtime/internal/atomic/stubs.go +++ b/libgo/go/runtime/internal/atomic/stubs.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !wasm + package atomic import "unsafe" diff --git a/libgo/go/runtime/internal/sys/intrinsics.go b/libgo/go/runtime/internal/sys/intrinsics.go index 2928280..6906938 100644 --- a/libgo/go/runtime/internal/sys/intrinsics.go +++ b/libgo/go/runtime/internal/sys/intrinsics.go @@ -32,6 +32,30 @@ func Ctz32(x uint32) int { return int(builtinCtz32(x)) } +// Ctz8 returns the number of trailing zero bits in x; the result is 8 for x == 0. +func Ctz8(x uint8) int { + return int(ntz8tab[x]) +} + +var ntz8tab = [256]uint8{ + 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, +} + //extern __builtin_bswap64 func bswap64(uint64) uint64 diff --git a/libgo/go/runtime/lfstack.go b/libgo/go/runtime/lfstack.go index 4787c5b..406561a 100644 --- a/libgo/go/runtime/lfstack.go +++ b/libgo/go/runtime/lfstack.go @@ -55,3 +55,13 @@ func (head *lfstack) pop() unsafe.Pointer { func (head *lfstack) empty() bool { return atomic.Load64((*uint64)(head)) == 0 } + +// lfnodeValidate panics if node is not a valid address for use with +// lfstack.push. This only needs to be called when node is allocated. +func lfnodeValidate(node *lfnode) { + if lfstackUnpack(lfstackPack(node, ^uintptr(0))) != node { + printlock() + println("runtime: bad lfnode address", hex(uintptr(unsafe.Pointer(node)))) + throw("bad lfnode address") + } +} diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go index dca1718..401f83d 100644 --- a/libgo/go/runtime/lfstack_64bit.go +++ b/libgo/go/runtime/lfstack_64bit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x arm64be alpha sparc64 ia64 riscv64 +// +build amd64 arm64 mips64 mips64le ppc64 ppc64le s390x wasm arm64be alpha sparc64 ia64 riscv64 package runtime @@ -11,21 +11,17 @@ import "unsafe" const ( // addrBits is the number of bits needed to represent a virtual address. // - // In Linux the user address space for each architecture is limited as - // follows (taken from the processor.h file for the architecture): + // See heapAddrBits for a table of address space sizes on + // various architectures. 48 bits is enough for all + // architectures except s390x. // - // Architecture Name Maximum Value (exclusive) - // --------------------------------------------------------------------- - // arm64 TASK_SIZE_64 Depends on configuration. - // ppc64{,le} TASK_SIZE_USER64 0x400000000000UL (46 bit addresses) - // mips64{,le} TASK_SIZE64 0x010000000000UL (40 bit addresses) - // s390x TASK_SIZE 0x020000000000UL (41 bit addresses) - // - // These values may increase over time. - // - // On AMD64, virtual addresses are 48-bit numbers sign extended to 64. + // On AMD64, virtual addresses are 48-bit (or 57-bit) numbers sign extended to 64. // We shift the address left 16 to eliminate the sign extended part and make // room in the bottom for the count. + // + // On s390x, virtual addresses are 64-bit. There's not much we + // can do about this, so we just hope that the kernel doesn't + // get to really high addresses and panic if it does. addrBits = 48 // In addition to the 16 bits taken from the top, we can take 3 from the diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go index b2c9ccb..f7ca1f0 100644 --- a/libgo/go/runtime/lock_futex.go +++ b/libgo/go/runtime/lock_futex.go @@ -241,3 +241,9 @@ func notetsleepg(n *note, ns int64) bool { exitsyscall() return ok } + +func pauseSchedulerUntilCallback() bool { + return false +} + +func checkTimeouts() {} diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go new file mode 100644 index 0000000..df321e5 --- /dev/null +++ b/libgo/go/runtime/lock_js.go @@ -0,0 +1,172 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build js,wasm + +package runtime + +import ( + _ "unsafe" +) + +// js/wasm has no support for threads yet. There is no preemption. +// Waiting for a mutex is implemented by allowing other goroutines +// to run until the mutex gets unlocked. + +const ( + mutex_unlocked = 0 + mutex_locked = 1 + + note_cleared = 0 + note_woken = 1 + note_timeout = 2 + + active_spin = 4 + active_spin_cnt = 30 + passive_spin = 1 +) + +func lock(l *mutex) { + for l.key == mutex_locked { + mcall(gosched_m) + } + l.key = mutex_locked +} + +func unlock(l *mutex) { + if l.key == mutex_unlocked { + throw("unlock of unlocked lock") + } + l.key = mutex_unlocked +} + +// One-time notifications. + +type noteWithTimeout struct { + gp *g + deadline int64 +} + +var ( + notes = make(map[*note]*g) + notesWithTimeout = make(map[*note]noteWithTimeout) +) + +func noteclear(n *note) { + n.key = note_cleared +} + +func notewakeup(n *note) { + // gp := getg() + if n.key == note_woken { + throw("notewakeup - double wakeup") + } + cleared := n.key == note_cleared + n.key = note_woken + if cleared { + goready(notes[n], 1) + } +} + +func notesleep(n *note) { + throw("notesleep not supported by js") +} + +func notetsleep(n *note, ns int64) bool { + throw("notetsleep not supported by js") + return false +} + +// same as runtime·notetsleep, but called on user g (not g0) +func notetsleepg(n *note, ns int64) bool { + gp := getg() + if gp == gp.m.g0 { + throw("notetsleepg on g0") + } + + if ns >= 0 { + deadline := nanotime() + ns + delay := ns/1000000 + 1 // round up + if delay > 1<<31-1 { + delay = 1<<31 - 1 // cap to max int32 + } + + id := scheduleCallback(delay) + mp := acquirem() + notes[n] = gp + notesWithTimeout[n] = noteWithTimeout{gp: gp, deadline: deadline} + releasem(mp) + + gopark(nil, nil, waitReasonSleep, traceEvNone, 1) + + clearScheduledCallback(id) // note might have woken early, clear timeout + mp = acquirem() + delete(notes, n) + delete(notesWithTimeout, n) + releasem(mp) + + return n.key == note_woken + } + + for n.key != note_woken { + mp := acquirem() + notes[n] = gp + releasem(mp) + + gopark(nil, nil, waitReasonZero, traceEvNone, 1) + + mp = acquirem() + delete(notes, n) + releasem(mp) + } + return true +} + +// checkTimeouts resumes goroutines that are waiting on a note which has reached its deadline. +func checkTimeouts() { + now := nanotime() + for n, nt := range notesWithTimeout { + if n.key == note_cleared && now > nt.deadline { + n.key = note_timeout + goready(nt.gp, 1) + } + } +} + +var waitingForCallback *g + +// sleepUntilCallback puts the current goroutine to sleep until a callback is triggered. +// It is currently only used by the callback routine of the syscall/js package. +//go:linkname sleepUntilCallback syscall/js.sleepUntilCallback +func sleepUntilCallback() { + waitingForCallback = getg() + gopark(nil, nil, waitReasonZero, traceEvNone, 1) + waitingForCallback = nil +} + +// pauseSchedulerUntilCallback gets called from the scheduler and pauses the execution +// of Go's WebAssembly code until a callback is triggered. Then it checks for note timeouts +// and resumes goroutines that are waiting for a callback. +func pauseSchedulerUntilCallback() bool { + if waitingForCallback == nil && len(notesWithTimeout) == 0 { + return false + } + + pause() + checkTimeouts() + if waitingForCallback != nil { + goready(waitingForCallback, 1) + } + return true +} + +// pause pauses the execution of Go's WebAssembly code until a callback is triggered. +func pause() + +// scheduleCallback tells the WebAssembly environment to trigger a callback after ms milliseconds. +// It returns a timer id that can be used with clearScheduledCallback. +func scheduleCallback(ms int64) int32 + +// clearScheduledCallback clears a callback scheduled by scheduleCallback. +func clearScheduledCallback(id int32) diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go index b5cce6a..237513c 100644 --- a/libgo/go/runtime/lock_sema.go +++ b/libgo/go/runtime/lock_sema.go @@ -294,3 +294,9 @@ func notetsleepg(n *note, ns int64) bool { exitsyscall() return ok } + +func pauseSchedulerUntilCallback() bool { + return false +} + +func checkTimeouts() {} diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go index 523989e..ac4759f 100644 --- a/libgo/go/runtime/malloc.go +++ b/libgo/go/runtime/malloc.go @@ -78,9 +78,34 @@ // // 3. We don't zero pages that never get reused. +// Virtual memory layout +// +// The heap consists of a set of arenas, which are 64MB on 64-bit and +// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also +// aligned to the arena size. +// +// Each arena has an associated heapArena object that stores the +// metadata for that arena: the heap bitmap for all words in the arena +// and the span map for all pages in the arena. heapArena objects are +// themselves allocated off-heap. +// +// Since arenas are aligned, the address space can be viewed as a +// series of arena frames. The arena map (mheap_.arenas) maps from +// arena frame number to *heapArena, or nil for parts of the address +// space not backed by the Go heap. The arena map is structured as a +// two-level array consisting of a "L1" arena map and many "L2" arena +// maps; however, since arenas are large, on many architectures, the +// arena map consists of a single, large L2 map. +// +// The arena map covers the entire possible address space, allowing +// the Go heap to use any part of the address space. The allocator +// attempts to keep arenas contiguous so that large spans (and hence +// large objects) can cross arenas. + package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -124,9 +149,8 @@ const ( _TinySize = 16 _TinySizeClass = int8(2) - _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc - _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. - _HeapAllocChunk = 1 << 20 // Chunk size for heap growth + _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc + _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. // Per-P, per order stack segment cache size. _StackCacheSize = 32 * 1024 @@ -145,25 +169,144 @@ const ( // plan9 | 4KB | 3 _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9 - // Number of bits in page to span calculations (4k pages). - // On Windows 64-bit we limit the arena to 32GB or 35 bits. - // Windows counts memory used by page table into committed memory - // of the process, so we can't reserve too much memory. - // See https://golang.org/issue/5402 and https://golang.org/issue/5236. - // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits. - // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. - // The only exception is mips32 which only has access to low 2GB of virtual memory. - // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory, - // but as most devices have less than 4GB of physical memory anyway, we - // try to be conservative here, and only ask for a 2GB heap. - _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle)) - _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift - - // _MaxMem is the maximum heap arena size minus 1. + // heapAddrBits is the number of bits in a heap address. On + // amd64, addresses are sign-extended beyond heapAddrBits. On + // other arches, they are zero-extended. + // + // On 64-bit platforms, we limit this to 48 bits based on a + // combination of hardware and OS limitations. + // + // amd64 hardware limits addresses to 48 bits, sign-extended + // to 64 bits. Addresses where the top 16 bits are not either + // all 0 or all 1 are "non-canonical" and invalid. Because of + // these "negative" addresses, we offset addresses by 1<<47 + // (arenaBaseOffset) on amd64 before computing indexes into + // the heap arenas index. In 2017, amd64 hardware added + // support for 57 bit addresses; however, currently only Linux + // supports this extension and the kernel will never choose an + // address above 1<<47 unless mmap is called with a hint + // address above 1<<47 (which we never do). + // + // arm64 hardware (as of ARMv8) limits user addresses to 48 + // bits, in the range [0, 1<<48). + // + // ppc64, mips64, and s390x support arbitrary 64 bit addresses + // in hardware. However, since Go only supports Linux on + // these, we lean on OS limits. Based on Linux's processor.h, + // the user address space is limited as follows on 64-bit + // architectures: + // + // Architecture Name Maximum Value (exclusive) + // --------------------------------------------------------------------- + // amd64 TASK_SIZE_MAX 0x007ffffffff000 (47 bit addresses) + // arm64 TASK_SIZE_64 0x01000000000000 (48 bit addresses) + // ppc64{,le} TASK_SIZE_USER64 0x00400000000000 (46 bit addresses) + // mips64{,le} TASK_SIZE64 0x00010000000000 (40 bit addresses) + // s390x TASK_SIZE 1<<64 (64 bit addresses) + // + // These limits may increase over time, but are currently at + // most 48 bits except on s390x. On all architectures, Linux + // starts placing mmap'd regions at addresses that are + // significantly below 48 bits, so even if it's possible to + // exceed Go's 48 bit limit, it's extremely unlikely in + // practice. + // + // On 32-bit platforms, we accept the full 32-bit address + // space because doing so is cheap. + // mips32 only has access to the low 2GB of virtual memory, so + // we further limit it to 31 bits. + // + // WebAssembly currently has a limit of 4GB linear memory. + heapAddrBits = (_64bit*(1-sys.GoarchWasm))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + + // maxAlloc is the maximum size of an allocation. On 64-bit, + // it's theoretically possible to allocate 1<<heapAddrBits bytes. On + // 32-bit, however, this is one less than 1<<32 because the + // number of bytes in the address space doesn't actually fit + // in a uintptr. + maxAlloc = (1 << heapAddrBits) - (1-_64bit)*1 + + // The number of bits in a heap address, the size of heap + // arenas, and the L1 and L2 arena map sizes are related by // - // On 32-bit, this is also the maximum heap pointer value, - // since the arena starts at address 0. - _MaxMem = 1<<_MHeapMap_TotalBits - 1 + // (1 << addrBits) = arenaBytes * L1entries * L2entries + // + // Currently, we balance these as follows: + // + // Platform Addr bits Arena size L1 entries L2 size + // -------------- --------- ---------- ---------- ------- + // */64-bit 48 64MB 1 32MB + // windows/64-bit 48 4MB 64 8MB + // */32-bit 32 4MB 1 4KB + // */mips(le) 31 4MB 1 2KB + + // heapArenaBytes is the size of a heap arena. The heap + // consists of mappings of size heapArenaBytes, aligned to + // heapArenaBytes. The initial heap mapping is one arena. + // + // This is currently 64MB on 64-bit non-Windows and 4MB on + // 32-bit and on Windows. We use smaller arenas on Windows + // because all committed memory is charged to the process, + // even if it's not touched. Hence, for processes with small + // heaps, the mapped arena space needs to be commensurate. + // This is particularly important with the race detector, + // since it significantly amplifies the cost of committed + // memory. + heapArenaBytes = 1 << logHeapArenaBytes + + // logHeapArenaBytes is log_2 of heapArenaBytes. For clarity, + // prefer using heapArenaBytes where possible (we need the + // constant to compute some other constants). + logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + + // heapArenaBitmapBytes is the size of each heap arena's bitmap. + heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2) + + pagesPerArena = heapArenaBytes / pageSize + + // arenaL1Bits is the number of bits of the arena number + // covered by the first level arena map. + // + // This number should be small, since the first level arena + // map requires PtrSize*(1<<arenaL1Bits) of space in the + // binary's BSS. It can be zero, in which case the first level + // index is effectively unused. There is a performance benefit + // to this, since the generated code can be more efficient, + // but comes at the cost of having a large L2 mapping. + // + // We use the L1 map on 64-bit Windows because the arena size + // is small, but the address space is still 48 bits, and + // there's a high cost to having a large L2. + arenaL1Bits = 6 * (_64bit * sys.GoosWindows) + + // arenaL2Bits is the number of bits of the arena number + // covered by the second level arena index. + // + // The size of each arena map allocation is proportional to + // 1<<arenaL2Bits, so it's important that this not be too + // large. 48 bits leads to 32MB arena index allocations, which + // is about the practical threshold. + arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits + + // arenaL1Shift is the number of bits to shift an arena frame + // number by to compute an index into the first level arena map. + arenaL1Shift = arenaL2Bits + + // arenaBits is the total bits in a combined arena map index. + // This is split between the index into the L1 arena map and + // the L2 arena map. + arenaBits = arenaL1Bits + arenaL2Bits + + // arenaBaseOffset is the pointer value that corresponds to + // index 0 in the heap arena map. + // + // On amd64, the address space is 48 bits, sign extended to 64 + // bits. This offset lets us handle "negative" addresses (or + // high addresses if viewed as unsigned). + // + // On other platforms, the user address space is contiguous + // and starts at 0, so no offset is necessary. + arenaBaseOffset uintptr = sys.GoarchAmd64 * (1 << 47) // Max number of threads to run garbage collection. // 2, 3, and 4 are all plausible maximums depending @@ -209,18 +352,12 @@ var physPageSize uintptr // SysReserve reserves address space without allocating memory. // If the pointer passed to it is non-nil, the caller wants the // reservation there, but SysReserve can still choose another -// location if that one is unavailable. On some systems and in some -// cases SysReserve will simply check that the address space is -// available and not actually reserve it. If SysReserve returns -// non-nil, it sets *reserved to true if the address space is -// reserved, false if it has merely been checked. +// location if that one is unavailable. // NOTE: SysReserve returns OS-aligned memory, but the heap allocator // may use larger alignment, so the caller must be careful to realign the // memory obtained by sysAlloc. // // SysMap maps previously reserved address space for use. -// The reserved argument is true if the address space was really -// reserved, not merely checked. // // SysFault marks a (already sysAlloc'd) region to fault // if accessed. Used only for debugging the runtime. @@ -233,6 +370,12 @@ func mallocinit() { // Not used for gccgo. // testdefersizes() + if heapArenaBitmapBytes&(heapArenaBitmapBytes-1) != 0 { + // heapBits expects modular arithmetic on bitmap + // addresses to work. + throw("heapArenaBitmapBytes not a power of 2") + } + // Copy class sizes out for statistics table. for i := range class_to_size { memstats.by_size[i].size = uint32(class_to_size[i]) @@ -252,55 +395,47 @@ func mallocinit() { throw("bad system page size") } - // The auxiliary regions start at p and are laid out in the - // following order: spans, bitmap, arena. - var p, pSize uintptr - var reserved bool - - // The spans array holds one *mspan per _PageSize of arena. - var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize - spansSize = round(spansSize, _PageSize) - // The bitmap holds 2 bits per word of arena. - var bitmapSize uintptr = (_MaxMem + 1) / (sys.PtrSize * 8 / 2) - bitmapSize = round(bitmapSize, _PageSize) - - // Set up the allocation arena, a contiguous area of memory where - // allocated data will be found. - if sys.PtrSize == 8 { - // On a 64-bit machine, allocate from a single contiguous reservation. - // 512 GB (MaxMem) should be big enough for now. + // Initialize the heap. + mheap_.init() + _g_ := getg() + _g_.m.mcache = allocmcache() + + // Create initial arena growth hints. + if sys.PtrSize == 8 && GOARCH != "wasm" { + // On a 64-bit machine, we pick the following hints + // because: + // + // 1. Starting from the middle of the address space + // makes it easier to grow out a contiguous range + // without running in to some other mapping. // - // The code will work with the reservation at any address, but ask - // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f). - // Allocating a 512 GB region takes away 39 bits, and the amd64 - // doesn't let us choose the top 17 bits, so that leaves the 9 bits - // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means - // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df. - // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid + // 2. This makes Go heap addresses more easily + // recognizable when debugging. + // + // 3. Stack scanning in gccgo is still conservative, + // so it's important that addresses be distinguishable + // from other data. + // + // Starting at 0x00c0 means that the valid memory addresses + // will begin 0x00c0, 0x00c1, ... + // In little-endian, that's c0 00, c1 00, ... None of those are valid // UTF-8 sequences, and they are otherwise as far away from // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 // addresses. An earlier attempt to use 0x11f8 caused out of memory errors // on OS X during thread allocations. 0x00c0 causes conflicts with // AddressSanitizer which reserves all memory up to 0x0100. - // These choices are both for debuggability and to reduce the - // odds of a conservative garbage collector (as is still used in gccgo) + // These choices reduce the odds of a conservative garbage collector // not collecting memory because some non-pointer block of memory // had a bit pattern that matched a memory address. // - // Actually we reserve 544 GB (because the bitmap ends up being 32 GB) - // but it hardly matters: e0 00 is not valid UTF-8 either. - // - // If this fails we fall back to the 32 bit memory mechanism - // // However, on arm64, we ignore all this advice above and slam the // allocation at 0x40 << 32 because when using 4k pages with 3-level // translation buffers, the user address space is limited to 39 bits // On darwin/arm64, the address space is even smaller. // On AIX, mmap adresses range starts at 0x0700000000000000 for 64-bit // processes. The new address space allocator starts at 0x0A00000000000000. - arenaSize := round(_MaxMem, _PageSize) - pSize = bitmapSize + spansSize + arenaSize + _PageSize - for i := 0; i <= 0x7f; i++ { + for i := 0x7f; i >= 0; i-- { + var p uintptr switch { case GOARCH == "arm64" && GOOS == "darwin": p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) @@ -312,225 +447,283 @@ func mallocinit() { } else { p = uintptr(i)<<42 | uintptrMask&(0x70<<52) } + case raceenabled: + // The TSAN runtime requires the heap + // to be in the range [0x00c000000000, + // 0x00e000000000). + p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32) + if p >= uintptrMask&0x00e000000000 { + continue + } default: p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) } - p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) - if p != 0 { - break - } + hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) + hint.addr = p + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } - } + } else { + // On a 32-bit machine, we're much more concerned + // about keeping the usable heap contiguous. + // Hence: + // + // 1. We reserve space for all heapArenas up front so + // they don't get interleaved with the heap. They're + // ~258MB, so this isn't too bad. (We could reserve a + // smaller amount of space up front if this is a + // problem.) + // + // 2. We hint the heap to start right above the end of + // the binary so we have the best chance of keeping it + // contiguous. + // + // 3. We try to stake out a reasonably large initial + // heap reservation. - if p == 0 { - // On a 32-bit machine, we can't typically get away - // with a giant virtual address space reservation. - // Instead we map the memory information bitmap - // immediately after the data segment, large enough - // to handle the entire 4GB address space (256 MB), - // along with a reservation for an initial arena. - // When that gets used up, we'll start asking the kernel - // for any memory anywhere. + const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{}) + meta := uintptr(sysReserve(nil, arenaMetaSize)) + if meta != 0 { + mheap_.heapArenaAlloc.init(meta, arenaMetaSize) + } // We want to start the arena low, but if we're linked // against C code, it's possible global constructors // have called malloc and adjusted the process' brk. // Query the brk so we can avoid trying to map the - // arena over it (which will cause the kernel to put - // the arena somewhere else, likely at a high + // region over it (which will cause the kernel to put + // the region somewhere else, likely at a high // address). procBrk := sbrk0() - // If we fail to allocate, try again with a smaller arena. - // This is necessary on Android L where we share a process - // with ART, which reserves virtual memory aggressively. - // In the worst case, fall back to a 0-sized initial arena, - // in the hope that subsequent reservations will succeed. + // If we ask for the end of the data segment but the + // operating system requires a little more space + // before we can start allocating, it will give out a + // slightly higher pointer. Except QEMU, which is + // buggy, as usual: it won't adjust the pointer + // upward. So adjust it upward a little bit ourselves: + // 1/4 MB to get away from the running binary image. + p := getEnd() + if p < procBrk { + p = procBrk + } + if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end { + p = mheap_.heapArenaAlloc.end + } + p = round(p+(256<<10), heapArenaBytes) + // Because we're worried about fragmentation on + // 32-bit, we try to make a large initial reservation. arenaSizes := [...]uintptr{ 512 << 20, 256 << 20, 128 << 20, - 0, } - for _, arenaSize := range &arenaSizes { - // SysReserve treats the address we ask for, end, as a hint, - // not as an absolute requirement. If we ask for the end - // of the data segment but the operating system requires - // a little more space before we can start allocating, it will - // give out a slightly higher pointer. Except QEMU, which - // is buggy, as usual: it won't adjust the pointer upward. - // So adjust it upward a little bit ourselves: 1/4 MB to get - // away from the running binary image and then round up - // to a MB boundary. - p = round(getEnd()+(1<<18), 1<<20) - pSize = bitmapSize + spansSize + arenaSize + _PageSize - if p <= procBrk && procBrk < p+pSize { - // Move the start above the brk, - // leaving some room for future brk - // expansion. - p = round(procBrk+(1<<20), 1<<20) - } - p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) - if p != 0 { + a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes) + if a != nil { + mheap_.arena.init(uintptr(a), size) + p = uintptr(a) + size // For hint below break } } - if p == 0 { - throw("runtime: cannot reserve arena virtual address space") - } - } - - // PageSize can be larger than OS definition of page size, - // so SysReserve can give us a PageSize-unaligned pointer. - // To overcome this we ask for PageSize more and round up the pointer. - p1 := round(p, _PageSize) - pSize -= p1 - p - - spansStart := p1 - p1 += spansSize - mheap_.bitmap = p1 + bitmapSize - p1 += bitmapSize - if sys.PtrSize == 4 { - // Set arena_start such that we can accept memory - // reservations located anywhere in the 4GB virtual space. - mheap_.arena_start = 0 - } else { - mheap_.arena_start = p1 + hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) + hint.addr = p + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } - mheap_.arena_end = p + pSize - mheap_.arena_used = p1 - mheap_.arena_alloc = p1 - mheap_.arena_reserved = reserved - - if mheap_.arena_start&(_PageSize-1) != 0 { - println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) - throw("misrounded allocation in mallocinit") - } - - // Initialize the rest of the allocator. - mheap_.init(spansStart, spansSize) - _g_ := getg() - _g_.m.mcache = allocmcache() } -// sysAlloc allocates the next n bytes from the heap arena. The -// returned pointer is always _PageSize aligned and between -// h.arena_start and h.arena_end. sysAlloc returns nil on failure. +// sysAlloc allocates heap arena space for at least n bytes. The +// returned pointer is always heapArenaBytes-aligned and backed by +// h.arenas metadata. The returned size is always a multiple of +// heapArenaBytes. sysAlloc returns nil on failure. // There is no corresponding free function. -func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer { - // strandLimit is the maximum number of bytes to strand from - // the current arena block. If we would need to strand more - // than this, we fall back to sysAlloc'ing just enough for - // this allocation. - const strandLimit = 16 << 20 - - if n > h.arena_end-h.arena_alloc { - // If we haven't grown the arena to _MaxMem yet, try - // to reserve some more address space. - p_size := round(n+_PageSize, 256<<20) - new_end := h.arena_end + p_size // Careful: can overflow - if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem { - // TODO: It would be bad if part of the arena - // is reserved and part is not. - var reserved bool - p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved)) - if p == 0 { - // TODO: Try smaller reservation - // growths in case we're in a crowded - // 32-bit address space. - goto reservationFailed - } - // p can be just about anywhere in the address - // space, including before arena_end. - if p == h.arena_end { - // The new block is contiguous with - // the current block. Extend the - // current arena block. - h.arena_end = new_end - h.arena_reserved = reserved - } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit { - // We were able to reserve more memory - // within the arena space, but it's - // not contiguous with our previous - // reservation. It could be before or - // after our current arena_used. - // - // Keep everything page-aligned. - // Our pages are bigger than hardware pages. - h.arena_end = p + p_size - p = round(p, _PageSize) - h.arena_alloc = p - h.arena_reserved = reserved - } else { - // We got a mapping, but either - // - // 1) It's not in the arena, so we - // can't use it. (This should never - // happen on 32-bit.) - // - // 2) We would need to discard too - // much of our current arena block to - // use it. - // - // We haven't added this allocation to - // the stats, so subtract it from a - // fake stat (but avoid underflow). - // - // We'll fall back to a small sysAlloc. - stat := uint64(p_size) - sysFree(unsafe.Pointer(p), p_size, &stat) +// +// h must be locked. +func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { + n = round(n, heapArenaBytes) + + // First, try the arena pre-reservation. + v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys) + if v != nil { + size = n + goto mapped + } + + // Try to grow the heap at a hint address. + for h.arenaHints != nil { + hint := h.arenaHints + p := hint.addr + if hint.down { + p -= n + } + if p+n < p { + // We can't use this, so don't ask. + v = nil + } else if arenaIndex(p+n-1) >= 1<<arenaBits { + // Outside addressable heap. Can't use. + v = nil + } else { + v = sysReserve(unsafe.Pointer(p), n) + } + if p == uintptr(v) { + // Success. Update the hint. + if !hint.down { + p += n } + hint.addr = p + size = n + break } + // Failed. Discard this hint and try the next. + // + // TODO: This would be cleaner if sysReserve could be + // told to only return the requested address. In + // particular, this is already how Windows behaves, so + // it would simply things there. + if v != nil { + sysFree(v, n, nil) + } + h.arenaHints = hint.next + h.arenaHintAlloc.free(unsafe.Pointer(hint)) } - if n <= h.arena_end-h.arena_alloc { - // Keep taking from our reservation. - p := h.arena_alloc - sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys) - h.arena_alloc += n - if h.arena_alloc > h.arena_used { - h.setArenaUsed(h.arena_alloc, true) + if size == 0 { + if raceenabled { + // The race detector assumes the heap lives in + // [0x00c000000000, 0x00e000000000), but we + // just ran out of hints in this region. Give + // a nice failure. + throw("too many address space collisions for -race mode") } - if p&(_PageSize-1) != 0 { - throw("misrounded allocation in MHeap_SysAlloc") + // All of the hints failed, so we'll take any + // (sufficiently aligned) address the kernel will give + // us. + v, size = sysReserveAligned(nil, n, heapArenaBytes) + if v == nil { + return nil, 0 } - return unsafe.Pointer(p) + + // Create new hints for extending this region. + hint := (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr, hint.down = uintptr(v), true + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + hint = (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr = uintptr(v) + size + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } -reservationFailed: - // If using 64-bit, our reservation is all we have. - if sys.PtrSize != 4 { - return nil + // Check for bad pointers or pointers we can't use. + { + var bad string + p := uintptr(v) + if p+size < p { + bad = "region exceeds uintptr range" + } else if arenaIndex(p) >= 1<<arenaBits { + bad = "base outside usable address space" + } else if arenaIndex(p+size-1) >= 1<<arenaBits { + bad = "end outside usable address space" + } + if bad != "" { + // This should be impossible on most architectures, + // but it would be really confusing to debug. + print("runtime: memory allocated by OS [", hex(p), ", ", hex(p+size), ") not in usable address space: ", bad, "\n") + throw("memory reservation exceeds address space limit") + } } - // On 32-bit, once the reservation is gone we can - // try to get memory at a location chosen by the OS. - p_size := round(n, _PageSize) + _PageSize - p := uintptr(sysAlloc(p_size, &memstats.heap_sys)) - if p == 0 { - return nil + if uintptr(v)&(heapArenaBytes-1) != 0 { + throw("misrounded allocation in sysAlloc") } - if p < h.arena_start || p+p_size-h.arena_start > _MaxMem { - // This shouldn't be possible because _MaxMem is the - // whole address space on 32-bit. - top := uint64(h.arena_start) + _MaxMem - print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n") - sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys) - return nil + // Back the reservation. + sysMap(v, size, &memstats.heap_sys) + +mapped: + // Create arena metadata. + for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ { + l2 := h.arenas[ri.l1()] + if l2 == nil { + // Allocate an L2 arena map. + l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil)) + if l2 == nil { + throw("out of memory allocating heap arena map") + } + atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2)) + } + + if l2[ri.l2()] != nil { + throw("arena already initialized") + } + var r *heapArena + r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + throw("out of memory allocating heap arena metadata") + } + } + + // Store atomically just in case an object from the + // new heap arena becomes visible before the heap lock + // is released (which shouldn't happen, but there's + // little downside to this). + atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r)) } - p += -p & (_PageSize - 1) - if p+n > h.arena_used { - h.setArenaUsed(p+n, true) + // Tell the race detector about the new heap memory. + if raceenabled { + racemapshadow(v, size) } - if p&(_PageSize-1) != 0 { - throw("misrounded allocation in MHeap_SysAlloc") + return +} + +// sysReserveAligned is like sysReserve, but the returned pointer is +// aligned to align bytes. It may reserve either n or n+align bytes, +// so it returns the size that was reserved. +func sysReserveAligned(v unsafe.Pointer, size, align uintptr) (unsafe.Pointer, uintptr) { + // Since the alignment is rather large in uses of this + // function, we're not likely to get it by chance, so we ask + // for a larger region and remove the parts we don't need. + retries := 0 +retry: + p := uintptr(sysReserve(v, size+align)) + switch { + case p == 0: + return nil, 0 + case p&(align-1) == 0: + // We got lucky and got an aligned region, so we can + // use the whole thing. + return unsafe.Pointer(p), size + align + case GOOS == "windows": + // On Windows we can't release pieces of a + // reservation, so we release the whole thing and + // re-reserve the aligned sub-region. This may race, + // so we may have to try again. + sysFree(unsafe.Pointer(p), size+align, nil) + p = round(p, align) + p2 := sysReserve(unsafe.Pointer(p), size) + if p != uintptr(p2) { + // Must have raced. Try again. + sysFree(p2, size, nil) + if retries++; retries == 100 { + throw("failed to allocate aligned heap memory; too many retries") + } + goto retry + } + // Success. + return p2, size + default: + // Trim off the unaligned parts. + pAligned := round(p, align) + sysFree(unsafe.Pointer(p), pAligned-p, nil) + end := pAligned + size + endLen := (p + size + align) - end + if endLen > 0 { + sysFree(unsafe.Pointer(end), endLen, nil) + } + return unsafe.Pointer(pAligned), size } - return unsafe.Pointer(p) } // base address for all 0-byte allocations @@ -862,7 +1055,7 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { throw("out of memory") } s.limit = s.base() + size - heapBitsForSpan(s.base()).initSpan(s) + heapBitsForAddr(s.base()).initSpan(s) return s } @@ -875,7 +1068,7 @@ func newobject(typ *_type) unsafe.Pointer { //go:linkname reflect_unsafe_New reflect.unsafe_New func reflect_unsafe_New(typ *_type) unsafe.Pointer { - return newobject(typ) + return mallocgc(typ.size, typ, true) } // newarray allocates an array of n elements of type typ. @@ -1046,6 +1239,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { return p } +// linearAlloc is a simple linear allocator that pre-reserves a region +// of memory and then maps that region as needed. The caller is +// responsible for locking. +type linearAlloc struct { + next uintptr // next free byte + mapped uintptr // one byte past end of mapped space + end uintptr // end of reserved space +} + +func (l *linearAlloc) init(base, size uintptr) { + l.next, l.mapped = base, base + l.end = base + size +} + +func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { + p := round(l.next, align) + if p+size > l.end { + return nil + } + l.next = p + size + if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped { + // We need to map more of the reserved space. + sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat) + l.mapped = pEnd + } + return unsafe.Pointer(p) +} + // notInHeap is off-heap memory allocated by a lower-level allocator // like sysAlloc or persistentAlloc. // diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go index ab580f8..30a7d84 100644 --- a/libgo/go/runtime/malloc_test.go +++ b/libgo/go/runtime/malloc_test.go @@ -7,16 +7,25 @@ package runtime_test import ( "flag" "fmt" + "internal/race" + "internal/testenv" + "os" + "os/exec" "reflect" . "runtime" + "strings" "testing" "time" "unsafe" ) +var testMemStatsCount int + func TestMemStats(t *testing.T) { t.Skip("skipping test with gccgo") + testMemStatsCount++ + // Make sure there's at least one forced GC. GC() @@ -32,6 +41,13 @@ func TestMemStats(t *testing.T) { } le := func(thresh float64) func(interface{}) error { return func(x interface{}) error { + // These sanity tests aren't necessarily valid + // with high -test.count values, so only run + // them once. + if testMemStatsCount > 1 { + return nil + } + if reflect.ValueOf(x).Convert(reflect.TypeOf(thresh)).Float() < thresh { return nil } @@ -50,7 +66,7 @@ func TestMemStats(t *testing.T) { // PauseTotalNs can be 0 if timer resolution is poor. fields := map[string][]func(interface{}) error{ "Alloc": {nz, le(1e10)}, "TotalAlloc": {nz, le(1e11)}, "Sys": {nz, le(1e10)}, - "Lookups": {nz, le(1e10)}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)}, + "Lookups": {eq(uint64(0))}, "Mallocs": {nz, le(1e10)}, "Frees": {nz, le(1e10)}, "HeapAlloc": {nz, le(1e10)}, "HeapSys": {nz, le(1e10)}, "HeapIdle": {le(1e10)}, "HeapInuse": {nz, le(1e10)}, "HeapReleased": {le(1e10)}, "HeapObjects": {nz, le(1e10)}, "StackInuse": {nz, le(1e10)}, "StackSys": {nz, le(1e10)}, @@ -154,6 +170,64 @@ func TestTinyAlloc(t *testing.T) { } } +type acLink struct { + x [1 << 20]byte +} + +var arenaCollisionSink []*acLink + +func TestArenaCollision(t *testing.T) { + testenv.MustHaveExec(t) + + // Test that mheap.sysAlloc handles collisions with other + // memory mappings. + if os.Getenv("TEST_ARENA_COLLISION") != "1" { + cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestArenaCollision", "-test.v")) + cmd.Env = append(cmd.Env, "TEST_ARENA_COLLISION=1") + out, err := cmd.CombinedOutput() + if race.Enabled { + // This test runs the runtime out of hint + // addresses, so it will start mapping the + // heap wherever it can. The race detector + // doesn't support this, so look for the + // expected failure. + if want := "too many address space collisions"; !strings.Contains(string(out), want) { + t.Fatalf("want %q, got:\n%s", want, string(out)) + } + } else if !strings.Contains(string(out), "PASS\n") || err != nil { + t.Fatalf("%s\n(exit status %v)", string(out), err) + } + return + } + disallowed := [][2]uintptr{} + // Drop all but the next 3 hints. 64-bit has a lot of hints, + // so it would take a lot of memory to go through all of them. + KeepNArenaHints(3) + // Consume these 3 hints and force the runtime to find some + // fallback hints. + for i := 0; i < 5; i++ { + // Reserve memory at the next hint so it can't be used + // for the heap. + start, end := MapNextArenaHint() + disallowed = append(disallowed, [2]uintptr{start, end}) + // Allocate until the runtime tries to use the hint we + // just mapped over. + hint := GetNextArenaHint() + for GetNextArenaHint() == hint { + ac := new(acLink) + arenaCollisionSink = append(arenaCollisionSink, ac) + // The allocation must not have fallen into + // one of the reserved regions. + p := uintptr(unsafe.Pointer(ac)) + for _, d := range disallowed { + if d[0] <= p && p < d[1] { + t.Fatalf("allocation %#x in reserved region [%#x, %#x)", p, d[0], d[1]) + } + } + } + } +} + var mallocSink uintptr func BenchmarkMalloc8(b *testing.B) { diff --git a/libgo/go/runtime/hashmap.go b/libgo/go/runtime/map.go index 53b05b1..8e97bc5 100644 --- a/libgo/go/runtime/hashmap.go +++ b/libgo/go/runtime/map.go @@ -87,7 +87,7 @@ const ( // Maximum key or value size to keep inline (instead of mallocing per element). // Must fit in a uint8. // Fast versions cannot handle big values - the cutoff size for - // fast versions in ../../cmd/internal/gc/walk.go must be at most this value. + // fast versions in cmd/compile/internal/gc/walk.go must be at most this value. maxKeySize = 128 maxValueSize = 128 @@ -121,8 +121,8 @@ const ( // A header for a Go map. type hmap struct { - // Note: the format of the Hmap is encoded in ../../cmd/internal/gc/reflect.go and - // ../reflect/type.go. Don't change this structure without also changing that code! + // Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go. + // Make sure this stays in sync with the compiler's definition. count int // # live cells == size of map. Must be first (used by len() builtin) flags uint8 B uint8 // log_2 of # of buckets (can hold up to loadFactor * 2^B items) @@ -141,7 +141,7 @@ type mapextra struct { // If both key and value do not contain pointers and are inline, then we mark bucket // type as containing no pointers. This avoids scanning such maps. // However, bmap.overflow is a pointer. In order to keep overflow buckets - // alive, we store pointers to all overflow buckets in hmap.overflow and h.map.oldoverflow. + // alive, we store pointers to all overflow buckets in hmap.extra.overflow and hmap.extra.oldoverflow. // overflow and oldoverflow are only used if key and value do not contain pointers. // overflow contains overflow buckets for hmap.buckets. // oldoverflow contains overflow buckets for hmap.oldbuckets. @@ -167,7 +167,7 @@ type bmap struct { } // A hash iteration structure. -// If you modify hiter, also change cmd/internal/gc/reflect.go to indicate +// If you modify hiter, also change cmd/compile/internal/gc/reflect.go to indicate // the layout of this structure. type hiter struct { key unsafe.Pointer // Must be in first position. Write nil to indicate iteration end (see cmd/internal/gc/range.go). @@ -333,7 +333,7 @@ func makemap(t *maptype, hint int, h *hmap) *hmap { // If hint is large zeroing this memory could take a while. if h.B != 0 { var nextOverflow *bmap - h.buckets, nextOverflow = makeBucketArray(t, h.B) + h.buckets, nextOverflow = makeBucketArray(t, h.B, nil) if nextOverflow != nil { h.extra = new(mapextra) h.extra.nextOverflow = nextOverflow @@ -343,6 +343,57 @@ func makemap(t *maptype, hint int, h *hmap) *hmap { return h } +// makeBucketArray initializes a backing array for map buckets. +// 1<<b is the minimum number of buckets to allocate. +// dirtyalloc should either be nil or a bucket array previously +// allocated by makeBucketArray with the same t and b parameters. +// If dirtyalloc is nil a new backing array will be alloced and +// otherwise dirtyalloc will be cleared and reused as backing array. +func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) { + base := bucketShift(b) + nbuckets := base + // For small b, overflow buckets are unlikely. + // Avoid the overhead of the calculation. + if b >= 4 { + // Add on the estimated number of overflow buckets + // required to insert the median number of elements + // used with this value of b. + nbuckets += bucketShift(b - 4) + sz := t.bucket.size * nbuckets + up := roundupsize(sz) + if up != sz { + nbuckets = up / t.bucket.size + } + } + + if dirtyalloc == nil { + buckets = newarray(t.bucket, int(nbuckets)) + } else { + // dirtyalloc was previously generated by + // the above newarray(t.bucket, int(nbuckets)) + // but may not be empty. + buckets = dirtyalloc + size := t.bucket.size * nbuckets + if t.bucket.kind&kindNoPointers == 0 { + memclrHasPointers(buckets, size) + } else { + memclrNoHeapPointers(buckets, size) + } + } + + if base != nbuckets { + // We preallocated some overflow buckets. + // To keep the overhead of tracking these overflow buckets to a minimum, + // we use the convention that if a preallocated overflow bucket's overflow + // pointer is nil, then there are more available by bumping the pointer. + // We need a safe non-nil pointer for the last overflow bucket; just use buckets. + nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize))) + last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize))) + last.setoverflow(t, (*bmap)(buckets)) + } + return buckets, nextOverflow +} + // mapaccess1 returns a pointer to h[key]. Never returns nil, instead // it will return a reference to the zero object for the value type if // the key is not in the map. @@ -696,14 +747,13 @@ search: } else if t.key.kind&kindNoPointers == 0 { memclrHasPointers(k, t.key.size) } - // Only clear value if there are pointers in it. - if t.indirectvalue || t.elem.kind&kindNoPointers == 0 { - v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) - if t.indirectvalue { - *(*unsafe.Pointer)(v) = nil - } else { - memclrHasPointers(v, t.elem.size) - } + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) + if t.indirectvalue { + *(*unsafe.Pointer)(v) = nil + } else if t.elem.kind&kindNoPointers == 0 { + memclrHasPointers(v, t.elem.size) + } else { + memclrNoHeapPointers(v, t.elem.size) } b.tophash[i] = empty h.count-- @@ -746,7 +796,7 @@ func mapiterinit(t *maptype, h *hmap, it *hiter) { } if unsafe.Sizeof(hiter{})/sys.PtrSize != 12 { - throw("hash_iter size incorrect") // see ../../cmd/internal/gc/reflect.go + throw("hash_iter size incorrect") // see cmd/compile/internal/gc/reflect.go } it.t = t it.h = h @@ -915,34 +965,49 @@ next: goto next } -func makeBucketArray(t *maptype, b uint8) (buckets unsafe.Pointer, nextOverflow *bmap) { - base := bucketShift(b) - nbuckets := base - // For small b, overflow buckets are unlikely. - // Avoid the overhead of the calculation. - if b >= 4 { - // Add on the estimated number of overflow buckets - // required to insert the median number of elements - // used with this value of b. - nbuckets += bucketShift(b - 4) - sz := t.bucket.size * nbuckets - up := roundupsize(sz) - if up != sz { - nbuckets = up / t.bucket.size - } +// mapclear deletes all keys from a map. +func mapclear(t *maptype, h *hmap) { + if raceenabled && h != nil { + callerpc := getcallerpc() + pc := funcPC(mapclear) + racewritepc(unsafe.Pointer(h), callerpc, pc) } - buckets = newarray(t.bucket, int(nbuckets)) - if base != nbuckets { - // We preallocated some overflow buckets. - // To keep the overhead of tracking these overflow buckets to a minimum, - // we use the convention that if a preallocated overflow bucket's overflow - // pointer is nil, then there are more available by bumping the pointer. - // We need a safe non-nil pointer for the last overflow bucket; just use buckets. - nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize))) - last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize))) - last.setoverflow(t, (*bmap)(buckets)) + + if h == nil || h.count == 0 { + return } - return buckets, nextOverflow + + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + + h.flags |= hashWriting + + h.flags &^= sameSizeGrow + h.oldbuckets = nil + h.nevacuate = 0 + h.noverflow = 0 + h.count = 0 + + // Keep the mapextra allocation but clear any extra information. + if h.extra != nil { + *h.extra = mapextra{} + } + + // makeBucketArray clears the memory pointed to by h.buckets + // and recovers any overflow buckets by generating them + // as if h.buckets was newly alloced. + _, nextOverflow := makeBucketArray(t, h.B, h.buckets) + if nextOverflow != nil { + // If overflow buckets are created then h.extra + // will have been allocated during initial bucket creation. + h.extra.nextOverflow = nextOverflow + } + + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting } func hashGrow(t *maptype, h *hmap) { @@ -955,7 +1020,7 @@ func hashGrow(t *maptype, h *hmap) { h.flags |= sameSizeGrow } oldbuckets := h.buckets - newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger) + newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil) flags := h.flags &^ (iterator | oldIterator) if h.flags&iterator != 0 { @@ -1059,7 +1124,6 @@ type evacDst struct { func evacuate(t *maptype, h *hmap, oldbucket uintptr) { b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) newbit := h.noldbuckets() - hashfn := t.key.hashfn if !evacuated(b) { // TODO: reuse overflow buckets instead of using new ones, if there // is no iterator using the old buckets. (If !oldIterator.) @@ -1100,7 +1164,7 @@ func evacuate(t *maptype, h *hmap, oldbucket uintptr) { if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need // to send this key/value to bucket x or bucket y). - hash := hashfn(k2, uintptr(h.hash0)) + hash := t.key.hashfn(k2, uintptr(h.hash0)) if h.flags&iterator != 0 && !t.reflexivekey && !t.key.equalfn(k2, k2) { // If key != key (NaNs), then the hash could be (and probably // will be) entirely different from the old hash. Moreover, @@ -1203,6 +1267,7 @@ func ismapkey(t *_type) bool { //go:linkname reflect_makemap reflect.makemap func reflect_makemap(t *maptype, cap int) *hmap { + // Check invariants and reflects math. if !ismapkey(t.key) { throw("runtime.reflect_makemap: unsupported map key type") } @@ -1294,5 +1359,5 @@ func reflect_ismapkey(t *_type) bool { return ismapkey(t) } -const maxZero = 1024 // must match value in ../cmd/compile/internal/gc/walk.go +const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go var zeroVal [maxZero]byte diff --git a/libgo/go/runtime/mapspeed_test.go b/libgo/go/runtime/map_benchmark_test.go index aec0c51..025c039 100644 --- a/libgo/go/runtime/mapspeed_test.go +++ b/libgo/go/runtime/map_benchmark_test.go @@ -341,3 +341,32 @@ func BenchmarkComplexAlgMap(b *testing.B) { _ = m[k] } } + +func BenchmarkGoMapClear(b *testing.B) { + b.Run("Reflexive", func(b *testing.B) { + for size := 1; size < 100000; size *= 10 { + b.Run(strconv.Itoa(size), func(b *testing.B) { + m := make(map[int]int, size) + for i := 0; i < b.N; i++ { + m[0] = size // Add one element so len(m) != 0 avoiding fast paths. + for k := range m { + delete(m, k) + } + } + }) + } + }) + b.Run("NonReflexive", func(b *testing.B) { + for size := 1; size < 100000; size *= 10 { + b.Run(strconv.Itoa(size), func(b *testing.B) { + m := make(map[float64]int, size) + for i := 0; i < b.N; i++ { + m[1.0] = size // Add one element so len(m) != 0 avoiding fast paths. + for k := range m { + delete(m, k) + } + } + }) + } + }) +} diff --git a/libgo/go/runtime/map_fast32.go b/libgo/go/runtime/map_fast32.go new file mode 100644 index 0000000..a9a06a8 --- /dev/null +++ b/libgo/go/runtime/map_fast32.go @@ -0,0 +1,413 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +func mapaccess1_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { + if raceenabled && h != nil { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast32)) + } + if h == nil || h.count == 0 { + return unsafe.Pointer(&zeroVal[0]) + } + if h.flags&hashWriting != 0 { + throw("concurrent map read and map write") + } + var b *bmap + if h.B == 0 { + // One-bucket table. No need to hash. + b = (*bmap)(h.buckets) + } else { + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + m := bucketMask(h.B) + b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) + if c := h.oldbuckets; c != nil { + if !h.sameSizeGrow() { + // There used to be half as many buckets; mask down one more power of two. + m >>= 1 + } + oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) + if !evacuated(oldb) { + b = oldb + } + } + } + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if *(*uint32)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + } + } + } + return unsafe.Pointer(&zeroVal[0]) +} + +func mapaccess2_fast32(t *maptype, h *hmap, key uint32) (unsafe.Pointer, bool) { + if raceenabled && h != nil { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast32)) + } + if h == nil || h.count == 0 { + return unsafe.Pointer(&zeroVal[0]), false + } + if h.flags&hashWriting != 0 { + throw("concurrent map read and map write") + } + var b *bmap + if h.B == 0 { + // One-bucket table. No need to hash. + b = (*bmap)(h.buckets) + } else { + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + m := bucketMask(h.B) + b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) + if c := h.oldbuckets; c != nil { + if !h.sameSizeGrow() { + // There used to be half as many buckets; mask down one more power of two. + m >>= 1 + } + oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) + if !evacuated(oldb) { + b = oldb + } + } + } + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if *(*uint32)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)), true + } + } + } + return unsafe.Pointer(&zeroVal[0]), false +} + +func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast32(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] == empty { + if insertb == nil { + inserti = i + insertb = b + } + continue + } + k := *((*uint32)(add(unsafe.Pointer(b), dataOffset+i*4))) + if k != key { + continue + } + inserti = i + insertb = b + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) + // store new key at insert position + *(*uint32)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast32)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast32(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] == empty { + if insertb == nil { + inserti = i + insertb = b + } + continue + } + k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*4))) + if k != key { + continue + } + inserti = i + insertb = b + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*4) + // store new key at insert position + *(*unsafe.Pointer)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*4+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapdelete_fast32(t *maptype, h *hmap, key uint32) { + if raceenabled && h != nil { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast32)) + } + if h == nil || h.count == 0 { + return + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapdelete + h.flags |= hashWriting + + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast32(t, h, bucket) + } + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) +search: + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 4) { + if key != *(*uint32)(k) || b.tophash[i] == empty { + continue + } + // Only clear key if there are pointers in it. + if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) + } + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*4+i*uintptr(t.valuesize)) + if t.elem.kind&kindNoPointers == 0 { + memclrHasPointers(v, t.elem.size) + } else { + memclrNoHeapPointers(v, t.elem.size) + } + b.tophash[i] = empty + h.count-- + break search + } + } + + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting +} + +func growWork_fast32(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_fast32(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_fast32(t, h, h.nevacuate) + } +} + +func evacuate_fast32(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*4) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*4) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*4) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 4), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.hashfn(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*4) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + // Write with a write barrier. + *(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k) + } else { + *(*uint32)(dst.k) = *(*uint32)(k) + } + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 4) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} diff --git a/libgo/go/runtime/map_fast64.go b/libgo/go/runtime/map_fast64.go new file mode 100644 index 0000000..a2a51fc --- /dev/null +++ b/libgo/go/runtime/map_fast64.go @@ -0,0 +1,419 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +func mapaccess1_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { + if raceenabled && h != nil { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_fast64)) + } + if h == nil || h.count == 0 { + return unsafe.Pointer(&zeroVal[0]) + } + if h.flags&hashWriting != 0 { + throw("concurrent map read and map write") + } + var b *bmap + if h.B == 0 { + // One-bucket table. No need to hash. + b = (*bmap)(h.buckets) + } else { + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + m := bucketMask(h.B) + b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) + if c := h.oldbuckets; c != nil { + if !h.sameSizeGrow() { + // There used to be half as many buckets; mask down one more power of two. + m >>= 1 + } + oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) + if !evacuated(oldb) { + b = oldb + } + } + } + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if *(*uint64)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + } + } + } + return unsafe.Pointer(&zeroVal[0]) +} + +func mapaccess2_fast64(t *maptype, h *hmap, key uint64) (unsafe.Pointer, bool) { + if raceenabled && h != nil { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_fast64)) + } + if h == nil || h.count == 0 { + return unsafe.Pointer(&zeroVal[0]), false + } + if h.flags&hashWriting != 0 { + throw("concurrent map read and map write") + } + var b *bmap + if h.B == 0 { + // One-bucket table. No need to hash. + b = (*bmap)(h.buckets) + } else { + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + m := bucketMask(h.B) + b = (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) + if c := h.oldbuckets; c != nil { + if !h.sameSizeGrow() { + // There used to be half as many buckets; mask down one more power of two. + m >>= 1 + } + oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) + if !evacuated(oldb) { + b = oldb + } + } + } + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if *(*uint64)(k) == key && b.tophash[i] != empty { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)), true + } + } + } + return unsafe.Pointer(&zeroVal[0]), false +} + +func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast64(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] == empty { + if insertb == nil { + insertb = b + inserti = i + } + continue + } + k := *((*uint64)(add(unsafe.Pointer(b), dataOffset+i*8))) + if k != key { + continue + } + insertb = b + inserti = i + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) + // store new key at insert position + *(*uint64)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_fast64)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast64(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] == empty { + if insertb == nil { + insertb = b + inserti = i + } + continue + } + k := *((*unsafe.Pointer)(add(unsafe.Pointer(b), dataOffset+i*8))) + if k != key { + continue + } + insertb = b + inserti = i + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = tophash(hash) // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*8) + // store new key at insert position + *(*unsafe.Pointer)(insertk) = key + + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*8+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapdelete_fast64(t *maptype, h *hmap, key uint64) { + if raceenabled && h != nil { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_fast64)) + } + if h == nil || h.count == 0 { + return + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + + hash := t.key.hashfn(noescape(unsafe.Pointer(&key)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapdelete + h.flags |= hashWriting + + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_fast64(t, h, bucket) + } + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) +search: + for ; b != nil; b = b.overflow(t) { + for i, k := uintptr(0), b.keys(); i < bucketCnt; i, k = i+1, add(k, 8) { + if key != *(*uint64)(k) || b.tophash[i] == empty { + continue + } + // Only clear key if there are pointers in it. + if t.key.kind&kindNoPointers == 0 { + memclrHasPointers(k, t.key.size) + } + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*8+i*uintptr(t.valuesize)) + if t.elem.kind&kindNoPointers == 0 { + memclrHasPointers(v, t.elem.size) + } else { + memclrNoHeapPointers(v, t.elem.size) + } + b.tophash[i] = empty + h.count-- + break search + } + } + + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting +} + +func growWork_fast64(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_fast64(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_fast64(t, h, h.nevacuate) + } +} + +func evacuate_fast64(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*8) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*8) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*8) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 8), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.hashfn(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*8) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled { + if sys.PtrSize == 8 { + // Write with a write barrier. + *(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k) + } else { + // There are three ways to squeeze at least one 32 bit pointer into 64 bits. + // Give up and call typedmemmove. + typedmemmove(t.key, dst.k, k) + } + } else { + *(*uint64)(dst.k) = *(*uint64)(k) + } + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 8) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} diff --git a/libgo/go/runtime/map_faststr.go b/libgo/go/runtime/map_faststr.go new file mode 100644 index 0000000..5812b3f --- /dev/null +++ b/libgo/go/runtime/map_faststr.go @@ -0,0 +1,430 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +func mapaccess1_faststr(t *maptype, h *hmap, ky string) unsafe.Pointer { + if raceenabled && h != nil { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess1_faststr)) + } + if h == nil || h.count == 0 { + return unsafe.Pointer(&zeroVal[0]) + } + if h.flags&hashWriting != 0 { + throw("concurrent map read and map write") + } + key := stringStructOf(&ky) + if h.B == 0 { + // One-bucket table. + b := (*bmap)(h.buckets) + if key.len < 32 { + // short key, doing lots of comparisons is ok + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { + continue + } + if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + } + } + return unsafe.Pointer(&zeroVal[0]) + } + // long key, try not to do more comparisons than necessary + keymaybe := uintptr(bucketCnt) + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { + continue + } + if k.str == key.str { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + } + // check first 4 bytes + if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { + continue + } + // check last 4 bytes + if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) { + continue + } + if keymaybe != bucketCnt { + // Two keys are potential matches. Use hash to distinguish them. + goto dohash + } + keymaybe = i + } + if keymaybe != bucketCnt { + k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) + if memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)) + } + } + return unsafe.Pointer(&zeroVal[0]) + } +dohash: + hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) + m := bucketMask(h.B) + b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) + if c := h.oldbuckets; c != nil { + if !h.sameSizeGrow() { + // There used to be half as many buckets; mask down one more power of two. + m >>= 1 + } + oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) + if !evacuated(oldb) { + b = oldb + } + } + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { + continue + } + if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + } + } + } + return unsafe.Pointer(&zeroVal[0]) +} + +func mapaccess2_faststr(t *maptype, h *hmap, ky string) (unsafe.Pointer, bool) { + if raceenabled && h != nil { + callerpc := getcallerpc() + racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapaccess2_faststr)) + } + if h == nil || h.count == 0 { + return unsafe.Pointer(&zeroVal[0]), false + } + if h.flags&hashWriting != 0 { + throw("concurrent map read and map write") + } + key := stringStructOf(&ky) + if h.B == 0 { + // One-bucket table. + b := (*bmap)(h.buckets) + if key.len < 32 { + // short key, doing lots of comparisons is ok + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { + continue + } + if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + } + } + return unsafe.Pointer(&zeroVal[0]), false + } + // long key, try not to do more comparisons than necessary + keymaybe := uintptr(bucketCnt) + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] == empty { + continue + } + if k.str == key.str { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + } + // check first 4 bytes + if *((*[4]byte)(key.str)) != *((*[4]byte)(k.str)) { + continue + } + // check last 4 bytes + if *((*[4]byte)(add(key.str, uintptr(key.len)-4))) != *((*[4]byte)(add(k.str, uintptr(key.len)-4))) { + continue + } + if keymaybe != bucketCnt { + // Two keys are potential matches. Use hash to distinguish them. + goto dohash + } + keymaybe = i + } + if keymaybe != bucketCnt { + k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+keymaybe*2*sys.PtrSize)) + if memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+keymaybe*uintptr(t.valuesize)), true + } + } + return unsafe.Pointer(&zeroVal[0]), false + } +dohash: + hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) + m := bucketMask(h.B) + b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) + if c := h.oldbuckets; c != nil { + if !h.sameSizeGrow() { + // There used to be half as many buckets; mask down one more power of two. + m >>= 1 + } + oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) + if !evacuated(oldb) { + b = oldb + } + } + top := tophash(hash) + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { + continue + } + if k.str == key.str || memequal(k.str, key.str, uintptr(key.len)) { + return add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)), true + } + } + } + return unsafe.Pointer(&zeroVal[0]), false +} + +func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer { + if h == nil { + panic(plainError("assignment to entry in nil map")) + } + if raceenabled { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapassign_faststr)) + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + key := stringStructOf(&s) + hash := t.key.hashfn(noescape(unsafe.Pointer(&s)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapassign. + h.flags |= hashWriting + + if h.buckets == nil { + h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) + } + +again: + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_faststr(t, h, bucket) + } + b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) + top := tophash(hash) + + var insertb *bmap + var inserti uintptr + var insertk unsafe.Pointer + + for { + for i := uintptr(0); i < bucketCnt; i++ { + if b.tophash[i] != top { + if b.tophash[i] == empty && insertb == nil { + insertb = b + inserti = i + } + continue + } + k := (*stringStruct)(add(unsafe.Pointer(b), dataOffset+i*2*sys.PtrSize)) + if k.len != key.len { + continue + } + if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) { + continue + } + // already have a mapping for key. Update it. + inserti = i + insertb = b + goto done + } + ovf := b.overflow(t) + if ovf == nil { + break + } + b = ovf + } + + // Did not find mapping for key. Allocate new cell & add entry. + + // If we hit the max load factor or we have too many overflow buckets, + // and we're not already in the middle of growing, start growing. + if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { + hashGrow(t, h) + goto again // Growing the table invalidates everything, so try again + } + + if insertb == nil { + // all current buckets are full, allocate a new one. + insertb = h.newoverflow(t, b) + inserti = 0 // not necessary, but avoids needlessly spilling inserti + } + insertb.tophash[inserti&(bucketCnt-1)] = top // mask inserti to avoid bounds checks + + insertk = add(unsafe.Pointer(insertb), dataOffset+inserti*2*sys.PtrSize) + // store new key at insert position + *((*stringStruct)(insertk)) = *key + h.count++ + +done: + val := add(unsafe.Pointer(insertb), dataOffset+bucketCnt*2*sys.PtrSize+inserti*uintptr(t.valuesize)) + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting + return val +} + +func mapdelete_faststr(t *maptype, h *hmap, ky string) { + if raceenabled && h != nil { + callerpc := getcallerpc() + racewritepc(unsafe.Pointer(h), callerpc, funcPC(mapdelete_faststr)) + } + if h == nil || h.count == 0 { + return + } + if h.flags&hashWriting != 0 { + throw("concurrent map writes") + } + + key := stringStructOf(&ky) + hash := t.key.hashfn(noescape(unsafe.Pointer(&ky)), uintptr(h.hash0)) + + // Set hashWriting after calling alg.hash for consistency with mapdelete + h.flags |= hashWriting + + bucket := hash & bucketMask(h.B) + if h.growing() { + growWork_faststr(t, h, bucket) + } + b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) + top := tophash(hash) +search: + for ; b != nil; b = b.overflow(t) { + for i, kptr := uintptr(0), b.keys(); i < bucketCnt; i, kptr = i+1, add(kptr, 2*sys.PtrSize) { + k := (*stringStruct)(kptr) + if k.len != key.len || b.tophash[i] != top { + continue + } + if k.str != key.str && !memequal(k.str, key.str, uintptr(key.len)) { + continue + } + // Clear key's pointer. + k.str = nil + v := add(unsafe.Pointer(b), dataOffset+bucketCnt*2*sys.PtrSize+i*uintptr(t.valuesize)) + if t.elem.kind&kindNoPointers == 0 { + memclrHasPointers(v, t.elem.size) + } else { + memclrNoHeapPointers(v, t.elem.size) + } + b.tophash[i] = empty + h.count-- + break search + } + } + + if h.flags&hashWriting == 0 { + throw("concurrent map writes") + } + h.flags &^= hashWriting +} + +func growWork_faststr(t *maptype, h *hmap, bucket uintptr) { + // make sure we evacuate the oldbucket corresponding + // to the bucket we're about to use + evacuate_faststr(t, h, bucket&h.oldbucketmask()) + + // evacuate one more oldbucket to make progress on growing + if h.growing() { + evacuate_faststr(t, h, h.nevacuate) + } +} + +func evacuate_faststr(t *maptype, h *hmap, oldbucket uintptr) { + b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) + newbit := h.noldbuckets() + if !evacuated(b) { + // TODO: reuse overflow buckets instead of using new ones, if there + // is no iterator using the old buckets. (If !oldIterator.) + + // xy contains the x and y (low and high) evacuation destinations. + var xy [2]evacDst + x := &xy[0] + x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) + x.k = add(unsafe.Pointer(x.b), dataOffset) + x.v = add(x.k, bucketCnt*2*sys.PtrSize) + + if !h.sameSizeGrow() { + // Only calculate y pointers if we're growing bigger. + // Otherwise GC can see bad pointers. + y := &xy[1] + y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) + y.k = add(unsafe.Pointer(y.b), dataOffset) + y.v = add(y.k, bucketCnt*2*sys.PtrSize) + } + + for ; b != nil; b = b.overflow(t) { + k := add(unsafe.Pointer(b), dataOffset) + v := add(k, bucketCnt*2*sys.PtrSize) + for i := 0; i < bucketCnt; i, k, v = i+1, add(k, 2*sys.PtrSize), add(v, uintptr(t.valuesize)) { + top := b.tophash[i] + if top == empty { + b.tophash[i] = evacuatedEmpty + continue + } + if top < minTopHash { + throw("bad map state") + } + var useY uint8 + if !h.sameSizeGrow() { + // Compute hash to make our evacuation decision (whether we need + // to send this key/value to bucket x or bucket y). + hash := t.key.hashfn(k, uintptr(h.hash0)) + if hash&newbit != 0 { + useY = 1 + } + } + + b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY, enforced in makemap + dst := &xy[useY] // evacuation destination + + if dst.i == bucketCnt { + dst.b = h.newoverflow(t, dst.b) + dst.i = 0 + dst.k = add(unsafe.Pointer(dst.b), dataOffset) + dst.v = add(dst.k, bucketCnt*2*sys.PtrSize) + } + dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check + + // Copy key. + *(*string)(dst.k) = *(*string)(k) + + typedmemmove(t.elem, dst.v, v) + dst.i++ + // These updates might push these pointers past the end of the + // key or value arrays. That's ok, as we have the overflow pointer + // at the end of the bucket to protect against pointing past the + // end of the bucket. + dst.k = add(dst.k, 2*sys.PtrSize) + dst.v = add(dst.v, uintptr(t.valuesize)) + } + } + // Unlink the overflow buckets & clear key/value to help GC. + // Unlink the overflow buckets & clear key/value to help GC. + if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 { + b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) + // Preserve b.tophash because the evacuation + // state is maintained there. + ptr := add(b, dataOffset) + n := uintptr(t.bucketsize) - dataOffset + memclrHasPointers(ptr, n) + } + } + + if oldbucket == h.nevacuate { + advanceEvacuationMark(h, t, newbit) + } +} diff --git a/libgo/go/runtime/map_test.go b/libgo/go/runtime/map_test.go index 6d7097e..13f1d2e 100644 --- a/libgo/go/runtime/map_test.go +++ b/libgo/go/runtime/map_test.go @@ -9,6 +9,7 @@ import ( "math" "reflect" "runtime" + "runtime/internal/sys" "sort" "strconv" "strings" @@ -16,6 +17,17 @@ import ( "testing" ) +func TestHmapSize(t *testing.T) { + // The structure of hmap is defined in runtime/map.go + // and in cmd/compile/internal/gc/reflect.go and must be in sync. + // The size of hmap should be 48 bytes on 64 bit and 28 bytes on 32 bit platforms. + var hmapSize = uintptr(8 + 5*sys.PtrSize) + if runtime.RuntimeHmapSize != hmapSize { + t.Errorf("sizeof(runtime.hmap{})==%d, want %d", runtime.RuntimeHmapSize, hmapSize) + } + +} + // negative zero is a good test because: // 1) 0 and -0 are equal, yet have distinct representations. // 2) 0 is represented as all zeros, -0 isn't. @@ -52,14 +64,7 @@ func TestNegativeZero(t *testing.T) { } } -// nan is a good test because nan != nan, and nan has -// a randomized hash value. -func TestNan(t *testing.T) { - m := make(map[float64]int, 0) - nan := math.NaN() - m[nan] = 1 - m[nan] = 2 - m[nan] = 4 +func testMapNan(t *testing.T, m map[float64]int) { if len(m) != 3 { t.Error("length wrong") } @@ -78,6 +83,67 @@ func TestNan(t *testing.T) { } } +// nan is a good test because nan != nan, and nan has +// a randomized hash value. +func TestMapAssignmentNan(t *testing.T) { + m := make(map[float64]int, 0) + nan := math.NaN() + + // Test assignment. + m[nan] = 1 + m[nan] = 2 + m[nan] = 4 + testMapNan(t, m) +} + +// nan is a good test because nan != nan, and nan has +// a randomized hash value. +func TestMapOperatorAssignmentNan(t *testing.T) { + m := make(map[float64]int, 0) + nan := math.NaN() + + // Test assignment operations. + m[nan] += 1 + m[nan] += 2 + m[nan] += 4 + testMapNan(t, m) +} + +func TestMapOperatorAssignment(t *testing.T) { + m := make(map[int]int, 0) + + // "m[k] op= x" is rewritten into "m[k] = m[k] op x" + // differently when op is / or % than when it isn't. + // Simple test to make sure they all work as expected. + m[0] = 12345 + m[0] += 67890 + m[0] /= 123 + m[0] %= 456 + + const want = (12345 + 67890) / 123 % 456 + if got := m[0]; got != want { + t.Errorf("got %d, want %d", got, want) + } +} + +var sinkAppend bool + +func TestMapAppendAssignment(t *testing.T) { + m := make(map[int][]int, 0) + + m[0] = nil + m[0] = append(m[0], 12345) + m[0] = append(m[0], 67890) + sinkAppend, m[0] = !sinkAppend, append(m[0], 123, 456) + a := []int{7, 8, 9, 0} + m[0] = append(m[0], a...) + + want := []int{12345, 67890, 123, 456, 7, 8, 9, 0} + if got := m[0]; !reflect.DeepEqual(got, want) { + t.Errorf("got %v, want %v", got, want) + } +} + // Maps aren't actually copied on assignment. func TestAlias(t *testing.T) { m := make(map[int]int, 0) @@ -92,18 +158,25 @@ func TestAlias(t *testing.T) { func TestGrowWithNaN(t *testing.T) { m := make(map[float64]int, 4) nan := math.NaN() + + // Use both assignment and assignment operations as they may + // behave differently. m[nan] = 1 m[nan] = 2 - m[nan] = 4 + m[nan] += 4 + cnt := 0 s := 0 growflag := true for k, v := range m { if growflag { // force a hashtable resize - for i := 0; i < 100; i++ { + for i := 0; i < 50; i++ { m[float64(i)] = i } + for i := 50; i < 100; i++ { + m[float64(i)] += i + } growflag = false } if k != k { @@ -128,8 +201,8 @@ func TestGrowWithNegativeZero(t *testing.T) { negzero := math.Copysign(0.0, -1.0) m := make(map[FloatInt]int, 4) m[FloatInt{0.0, 0}] = 1 - m[FloatInt{0.0, 1}] = 2 - m[FloatInt{0.0, 2}] = 4 + m[FloatInt{0.0, 1}] += 2 + m[FloatInt{0.0, 2}] += 4 m[FloatInt{0.0, 3}] = 8 growflag := true s := 0 @@ -211,9 +284,12 @@ func TestIterGrowAndDelete(t *testing.T) { // an iterator is still using them. func TestIterGrowWithGC(t *testing.T) { m := make(map[int]int, 4) - for i := 0; i < 16; i++ { + for i := 0; i < 8; i++ { m[i] = i } + for i := 8; i < 16; i++ { + m[i] += i + } growflag := true bitmask := 0 for k := range m { @@ -364,11 +440,11 @@ func TestEmptyKeyAndValue(t *testing.T) { // ("quick keys") as well as long keys. func TestSingleBucketMapStringKeys_DupLen(t *testing.T) { testMapLookups(t, map[string]string{ - "x": "x1val", - "xx": "x2val", - "foo": "fooval", - "bar": "barval", // same key length as "foo" - "xxxx": "x4val", + "x": "x1val", + "xx": "x2val", + "foo": "fooval", + "bar": "barval", // same key length as "foo" + "xxxx": "x4val", strings.Repeat("x", 128): "longval1", strings.Repeat("y", 128): "longval2", }) @@ -627,7 +703,7 @@ func TestMapBuckets(t *testing.T) { // have a nil bucket pointer due to starting with preallocated buckets // on the stack. Escaping maps start with a non-nil bucket pointer if // hint size is above bucketCnt and thereby have more than one bucket. - // These tests depend on bucketCnt and loadFactor* in hashmap.go. + // These tests depend on bucketCnt and loadFactor* in map.go. t.Run("mapliteral", func(t *testing.T) { for _, tt := range mapBucketTests { localMap := map[int]int{} @@ -802,6 +878,23 @@ func benchmarkMapAssignInt32(b *testing.B, n int) { } } +func benchmarkMapOperatorAssignInt32(b *testing.B, n int) { + a := make(map[int32]int) + for i := 0; i < b.N; i++ { + a[int32(i&(n-1))] += i + } +} + +func benchmarkMapAppendAssignInt32(b *testing.B, n int) { + a := make(map[int32][]int) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + key := int32(i & (n - 1)) + a[key] = append(a[key], i) + } +} + func benchmarkMapDeleteInt32(b *testing.B, n int) { a := make(map[int32]int, n) b.ResetTimer() @@ -824,6 +917,23 @@ func benchmarkMapAssignInt64(b *testing.B, n int) { } } +func benchmarkMapOperatorAssignInt64(b *testing.B, n int) { + a := make(map[int64]int) + for i := 0; i < b.N; i++ { + a[int64(i&(n-1))] += i + } +} + +func benchmarkMapAppendAssignInt64(b *testing.B, n int) { + a := make(map[int64][]int) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + key := int64(i & (n - 1)) + a[key] = append(a[key], i) + } +} + func benchmarkMapDeleteInt64(b *testing.B, n int) { a := make(map[int64]int, n) b.ResetTimer() @@ -851,6 +961,33 @@ func benchmarkMapAssignStr(b *testing.B, n int) { } } +func benchmarkMapOperatorAssignStr(b *testing.B, n int) { + k := make([]string, n) + for i := 0; i < len(k); i++ { + k[i] = strconv.Itoa(i) + } + b.ResetTimer() + a := make(map[string]string) + for i := 0; i < b.N; i++ { + key := k[i&(n-1)] + a[key] += key + } +} + +func benchmarkMapAppendAssignStr(b *testing.B, n int) { + k := make([]string, n) + for i := 0; i < len(k); i++ { + k[i] = strconv.Itoa(i) + } + a := make(map[string][]string) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + key := k[i&(n-1)] + a[key] = append(a[key], key) + } +} + func benchmarkMapDeleteStr(b *testing.B, n int) { i2s := make([]string, n) for i := 0; i < n; i++ { @@ -886,8 +1023,127 @@ func BenchmarkMapAssign(b *testing.B) { b.Run("Str", runWith(benchmarkMapAssignStr, 1<<8, 1<<16)) } +func BenchmarkMapOperatorAssign(b *testing.B) { + b.Run("Int32", runWith(benchmarkMapOperatorAssignInt32, 1<<8, 1<<16)) + b.Run("Int64", runWith(benchmarkMapOperatorAssignInt64, 1<<8, 1<<16)) + b.Run("Str", runWith(benchmarkMapOperatorAssignStr, 1<<8, 1<<16)) +} + +func BenchmarkMapAppendAssign(b *testing.B) { + b.Run("Int32", runWith(benchmarkMapAppendAssignInt32, 1<<8, 1<<16)) + b.Run("Int64", runWith(benchmarkMapAppendAssignInt64, 1<<8, 1<<16)) + b.Run("Str", runWith(benchmarkMapAppendAssignStr, 1<<8, 1<<16)) +} + func BenchmarkMapDelete(b *testing.B) { b.Run("Int32", runWith(benchmarkMapDeleteInt32, 100, 1000, 10000)) b.Run("Int64", runWith(benchmarkMapDeleteInt64, 100, 1000, 10000)) b.Run("Str", runWith(benchmarkMapDeleteStr, 100, 1000, 10000)) } + +func TestDeferDeleteSlow(t *testing.T) { + ks := []complex128{0, 1, 2, 3} + + m := make(map[interface{}]int) + for i, k := range ks { + m[k] = i + } + if len(m) != len(ks) { + t.Errorf("want %d elements, got %d", len(ks), len(m)) + } + + func() { + for _, k := range ks { + defer delete(m, k) + } + }() + if len(m) != 0 { + t.Errorf("want 0 elements, got %d", len(m)) + } +} + +// TestIncrementAfterDeleteValueInt and other test Issue 25936. +// Value types int, int32, int64 are affected. Value type string +// works as expected. +func TestIncrementAfterDeleteValueInt(t *testing.T) { + const key1 = 12 + const key2 = 13 + + m := make(map[int]int) + m[key1] = 99 + delete(m, key1) + m[key2]++ + if n2 := m[key2]; n2 != 1 { + t.Errorf("incremented 0 to %d", n2) + } +} + +func TestIncrementAfterDeleteValueInt32(t *testing.T) { + const key1 = 12 + const key2 = 13 + + m := make(map[int]int32) + m[key1] = 99 + delete(m, key1) + m[key2]++ + if n2 := m[key2]; n2 != 1 { + t.Errorf("incremented 0 to %d", n2) + } +} + +func TestIncrementAfterDeleteValueInt64(t *testing.T) { + const key1 = 12 + const key2 = 13 + + m := make(map[int]int64) + m[key1] = 99 + delete(m, key1) + m[key2]++ + if n2 := m[key2]; n2 != 1 { + t.Errorf("incremented 0 to %d", n2) + } +} + +func TestIncrementAfterDeleteKeyStringValueInt(t *testing.T) { + const key1 = "" + const key2 = "x" + + m := make(map[string]int) + m[key1] = 99 + delete(m, key1) + m[key2] += 1 + if n2 := m[key2]; n2 != 1 { + t.Errorf("incremented 0 to %d", n2) + } +} + +func TestIncrementAfterDeleteKeyValueString(t *testing.T) { + const key1 = "" + const key2 = "x" + + m := make(map[string]string) + m[key1] = "99" + delete(m, key1) + m[key2] += "1" + if n2 := m[key2]; n2 != "1" { + t.Errorf("appended '1' to empty (nil) string, got %s", n2) + } +} + +// TestIncrementAfterBulkClearKeyStringValueInt tests that map bulk +// deletion (mapclear) still works as expected. Note that it was not +// affected by Issue 25936. +func TestIncrementAfterBulkClearKeyStringValueInt(t *testing.T) { + const key1 = "" + const key2 = "x" + + m := make(map[string]int) + m[key1] = 99 + for k := range m { + delete(m, k) + } + m[key2]++ + if n2 := m[key2]; n2 != 1 { + t.Errorf("incremented 0 to %d", n2) + } +} diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go index 3b8f714..24e5865 100644 --- a/libgo/go/runtime/mbarrier.go +++ b/libgo/go/runtime/mbarrier.go @@ -6,10 +6,10 @@ // // For the concurrent garbage collector, the Go compiler implements // updates to pointer-valued fields that may be in heap objects by -// emitting calls to write barriers. This file contains the actual write barrier -// implementation, gcmarkwb_m, and the various wrappers called by the -// compiler to implement pointer assignment, slice assignment, -// typed memmove, and so on. +// emitting calls to write barriers. The main write barrier for +// individual pointer writes is gcWriteBarrier and is implemented in +// assembly. This file contains write barrier entry points for bulk +// operations. See also mwbbuf.go. package runtime @@ -21,14 +21,10 @@ import ( // For gccgo, use go:linkname to rename compiler-called functions to // themselves, so that the compiler will export them. // -//go:linkname writebarrierptr runtime.writebarrierptr //go:linkname typedmemmove runtime.typedmemmove //go:linkname typedslicecopy runtime.typedslicecopy -// gcmarkwb_m is the mark-phase write barrier, the only barrier we have. -// The rest of this file exists only to make calls to this function. -// -// This is a hybrid barrier that combines a Yuasa-style deletion +// Go uses a hybrid barrier that combines a Yuasa-style deletion // barrier—which shades the object whose reference is being // overwritten—with Dijkstra insertion barrier—which shades the object // whose reference is being written. The insertion part of the barrier @@ -144,105 +140,17 @@ import ( // reachable by some goroutine that currently cannot reach it. // // -//go:nowritebarrierrec -//go:systemstack -func gcmarkwb_m(slot *uintptr, ptr uintptr) { - if writeBarrier.needed { - // Note: This turns bad pointer writes into bad - // pointer reads, which could be confusing. We avoid - // reading from obviously bad pointers, which should - // take care of the vast majority of these. We could - // patch this up in the signal handler, or use XCHG to - // combine the read and the write. Checking inheap is - // insufficient since we need to track changes to - // roots outside the heap. - // - // Note: profbuf.go omits a barrier during signal handler - // profile logging; that's safe only because this deletion barrier exists. - // If we remove the deletion barrier, we'll have to work out - // a new way to handle the profile logging. - if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize { - if optr := *slot; optr != 0 { - shade(optr) - } - } - // TODO: Make this conditional on the caller's stack color. - if ptr != 0 && inheap(ptr) { - shade(ptr) - } - } -} - -// writebarrierptr_prewrite1 invokes a write barrier for *dst = src -// prior to the write happening. -// -// Write barrier calls must not happen during critical GC and scheduler -// related operations. In particular there are times when the GC assumes -// that the world is stopped but scheduler related code is still being -// executed, dealing with syscalls, dealing with putting gs on runnable -// queues and so forth. This code cannot execute write barriers because -// the GC might drop them on the floor. Stopping the world involves removing -// the p associated with an m. We use the fact that m.p == nil to indicate -// that we are in one these critical section and throw if the write is of -// a pointer to a heap object. -//go:nosplit -func writebarrierptr_prewrite1(dst *uintptr, src uintptr) { - mp := acquirem() - if mp.inwb || mp.dying > 0 { - // We explicitly allow write barriers in startpanic_m, - // since we're going down anyway. Ignore them here. - releasem(mp) - return - } - systemstack(func() { - if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) { - throw("writebarrierptr_prewrite1 called with mp.p == nil") - } - mp.inwb = true - gcmarkwb_m(dst, src) - }) - mp.inwb = false - releasem(mp) -} - -// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer, -// but if we do that, Go inserts a write barrier on *dst = src. -//go:nosplit -func writebarrierptr(dst *uintptr, src uintptr) { - if writeBarrier.cgo { - cgoCheckWriteBarrier(dst, src) - } - if !writeBarrier.needed { - *dst = src - return - } - if src != 0 && src < minPhysPageSize { - systemstack(func() { - print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n") - throw("bad pointer in write barrier") - }) - } - writebarrierptr_prewrite1(dst, src) - *dst = src -} - -// writebarrierptr_prewrite is like writebarrierptr, but the store -// will be performed by the caller after this call. The caller must -// not allow preemption between this call and the write. +// Signal handler pointer writes: // -//go:nosplit -func writebarrierptr_prewrite(dst *uintptr, src uintptr) { - if writeBarrier.cgo { - cgoCheckWriteBarrier(dst, src) - } - if !writeBarrier.needed { - return - } - if src != 0 && src < minPhysPageSize { - systemstack(func() { throw("bad pointer in write barrier") }) - } - writebarrierptr_prewrite1(dst, src) -} +// In general, the signal handler cannot safely invoke the write +// barrier because it may run without a P or even during the write +// barrier. +// +// There is exactly one exception: profbuf.go omits a barrier during +// signal handler profile logging. That's safe only because of the +// deletion barrier. See profbuf.go for a detailed argument. If we +// remove the deletion barrier, we'll have to work out a new way to +// handle the profile logging. // typedmemmove copies a value of type t to dst from src. // Must be nosplit, see #16026. @@ -252,6 +160,9 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) { // //go:nosplit func typedmemmove(typ *_type, dst, src unsafe.Pointer) { + if dst == src { + return + } if typ.kind&kindNoPointers == 0 { bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size) } @@ -335,6 +246,10 @@ func typedslicecopy(typ *_type, dst, src slice) int { cgoCheckSliceCopy(typ, dst, src, n) } + if dstp == srcp { + return n + } + // Note: No point in checking typ.kind&kindNoPointers here: // compiler only emits calls to typedslicecopy for types with pointers, // and growslice and reflect_typedslicecopy check for pointers diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go index c6c8e6a..42c2015 100644 --- a/libgo/go/runtime/mbitmap.go +++ b/libgo/go/runtime/mbitmap.go @@ -13,12 +13,11 @@ // // Heap bitmap // -// The allocated heap comes from a subset of the memory in the range [start, used), -// where start == mheap_.arena_start and used == mheap_.arena_used. -// The heap bitmap comprises 2 bits for each pointer-sized word in that range, -// stored in bytes indexed backward in memory from start. -// That is, the byte at address start-1 holds the 2-bit entries for the four words -// start through start+3*ptrSize, the byte at start-2 holds the entries for +// The heap bitmap comprises 2 bits for each pointer-sized word in the heap, +// stored in the heapArena metadata backing each heap arena. +// That is, if ha is the heapArena for the arena starting a start, +// then ha.bitmap[0] holds the 2-bit entries for the four words start +// through start+3*ptrSize, ha.bitmap[1] holds the entries for // start+4*ptrSize through start+7*ptrSize, and so on. // // In each 2-bit entry, the lower bit holds the same information as in the 1-bit @@ -85,8 +84,8 @@ const ( bitPointer = 1 << 0 bitScan = 1 << 4 - heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries - heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte + heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries + wordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte // all scan/pointer bits in a byte bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) @@ -104,8 +103,6 @@ func addb(p *byte, n uintptr) *byte { } // subtractb returns the byte pointer p-n. -// subtractb is typically used when traversing the pointer tables referred to by hbits -// which are arranged in reverse order. //go:nowritebarrier //go:nosplit func subtractb(p *byte, n uintptr) *byte { @@ -126,8 +123,6 @@ func add1(p *byte) *byte { } // subtract1 returns the byte pointer p-1. -// subtract1 is typically used when traversing the pointer tables referred to by hbits -// which are arranged in reverse order. //go:nowritebarrier // // nosplit because it is used during write barriers and must not be preempted. @@ -139,28 +134,6 @@ func subtract1(p *byte) *byte { return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) } -// mapBits maps any additional bitmap memory needed for the new arena memory. -// -// Don't call this directly. Call mheap.setArenaUsed. -// -//go:nowritebarrier -func (h *mheap) mapBits(arena_used uintptr) { - // Caller has added extra mappings to the arena. - // Add extra mappings of bitmap words as needed. - // We allocate extra bitmap pieces in chunks of bitmapChunk. - const bitmapChunk = 8192 - - n := (arena_used - mheap_.arena_start) / heapBitmapScale - n = round(n, bitmapChunk) - n = round(n, physPageSize) - if h.bitmap_mapped >= n { - return - } - - sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) - h.bitmap_mapped = n -} - // heapBits provides access to the bitmap bits for a single heap word. // The methods on heapBits take value receivers so that the compiler // can more easily inline calls to those methods and registerize the @@ -168,8 +141,14 @@ func (h *mheap) mapBits(arena_used uintptr) { type heapBits struct { bitp *uint8 shift uint32 + arena uint32 // Index of heap arena containing bitp + last *uint8 // Last byte arena's bitmap } +// Make the compiler check that heapBits.arena is large enough to hold +// the maximum arena frame number. +var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1} + // markBits provides access to the mark bit for an object in the heap. // bytep points to the byte holding the mark bit. // mask is a byte with a single bit set that can be &ed with *bytep @@ -191,7 +170,7 @@ func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { return markBits{bytep, mask, allocBitIndex} } -// refillaCache takes 8 bytes s.allocBits starting at whichByte +// refillAllocCache takes 8 bytes s.allocBits starting at whichByte // and negates them so that ctz (count trailing zeros) instructions // can be used. It then places these 8 bytes into the cached 64 bit // s.allocCache. @@ -278,7 +257,7 @@ func (s *mspan) objIndex(p uintptr) uintptr { return 0 } if s.baseMask != 0 { - // s.baseMask is 0, elemsize is a power of two, so shift by s.divShift + // s.baseMask is non-0, elemsize is a power of two, so shift by s.divShift return byteOffset >> s.divShift } return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2) @@ -329,9 +308,6 @@ func (m markBits) clearMarked() { // markBitsForSpan returns the markBits for the span base address base. func markBitsForSpan(base uintptr) (mbits markBits) { - if base < mheap_.arena_start || base >= mheap_.arena_used { - throw("markBitsForSpan: base out of range") - } mbits = markBitsForAddr(base) if mbits.mask != 1 { throw("markBitsForSpan: unaligned start") @@ -351,31 +327,36 @@ func (m *markBits) advance() { } // heapBitsForAddr returns the heapBits for the address addr. -// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). +// The caller must ensure addr is in an allocated span. +// In particular, be careful not to point past the end of an object. // // nosplit because it is used during write barriers and must not be preempted. //go:nosplit -func heapBitsForAddr(addr uintptr) heapBits { - // 2 bits per work, 4 pairs per byte, and a mask is hard coded. - off := (addr - mheap_.arena_start) / sys.PtrSize - return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)} -} - -// heapBitsForSpan returns the heapBits for the span base address base. -func heapBitsForSpan(base uintptr) (hbits heapBits) { - if base < mheap_.arena_start || base >= mheap_.arena_used { - print("runtime: base ", hex(base), " not in range [", hex(mheap_.arena_start), ",", hex(mheap_.arena_used), ")\n") - throw("heapBitsForSpan: base out of range") +func heapBitsForAddr(addr uintptr) (h heapBits) { + // 2 bits per word, 4 pairs per byte, and a mask is hard coded. + arena := arenaIndex(addr) + ha := mheap_.arenas[arena.l1()][arena.l2()] + // The compiler uses a load for nil checking ha, but in this + // case we'll almost never hit that cache line again, so it + // makes more sense to do a value check. + if ha == nil { + // addr is not in the heap. Return nil heapBits, which + // we expect to crash in the caller. + return } - return heapBitsForAddr(base) + h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes] + h.shift = uint32((addr / sys.PtrSize) & 3) + h.arena = uint32(arena) + h.last = &ha.bitmap[len(ha.bitmap)-1] + return } -// heapBitsForObject returns the base address for the heap object -// containing the address p, the heapBits for base, -// the object's span, and of the index of the object in s. -// If p does not point into a heap object, -// return base == 0 -// otherwise return the base of the object. +// findObject returns the base address for the heap object containing +// the address p, the object's span, and the index of the object in s. +// If p does not point into a heap object, it returns base == 0. +// +// If p points is an invalid heap pointer and debug.invalidptr != 0, +// findObject panics. // // For gccgo, the forStack parameter is true if the value came from the stack. // The stack is collected conservatively and may contain invalid pointers. @@ -383,16 +364,9 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) { // refBase and refOff optionally give the base address of the object // in which the pointer p was found and the byte offset at which it // was found. These are used for error reporting. -func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) { - arenaStart := mheap_.arena_start - if p < arenaStart || p >= mheap_.arena_used { - return - } - off := p - arenaStart - idx := off >> _PageShift - // p points into the heap, but possibly to the middle of an object. - // Consult the span table to find the block beginning. - s = mheap_.spans[idx] +func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *mspan, objIndex uintptr) { + s = spanOf(p) + // If p is a bad pointer, it may not be in s's bounds. if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse { if s == nil || s.state == _MSpanManual || forStack { // If s is nil, the virtual address has never been part of the heap. @@ -419,7 +393,7 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, } else { print(" to unused region of span") } - print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n") + print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n") if refBase != 0 { print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") gcDumpObject("object", refBase, refOff) @@ -458,8 +432,6 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, base += objIndex * s.elemsize } } - // Now that we know the actual base, compute heapBits to return to caller. - hbits = heapBitsForAddr(base) return } @@ -471,9 +443,42 @@ func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, //go:nosplit func (h heapBits) next() heapBits { if h.shift < 3*heapBitsShift { - return heapBits{h.bitp, h.shift + heapBitsShift} + h.shift += heapBitsShift + } else if h.bitp != h.last { + h.bitp, h.shift = add1(h.bitp), 0 + } else { + // Move to the next arena. + return h.nextArena() } - return heapBits{subtract1(h.bitp), 0} + return h +} + +// nextArena advances h to the beginning of the next heap arena. +// +// This is a slow-path helper to next. gc's inliner knows that +// heapBits.next can be inlined even though it calls this. This is +// marked noinline so it doesn't get inlined into next and cause next +// to be too big to inline. +// +//go:nosplit +//go:noinline +func (h heapBits) nextArena() heapBits { + h.arena++ + ai := arenaIdx(h.arena) + l2 := mheap_.arenas[ai.l1()] + if l2 == nil { + // We just passed the end of the object, which + // was also the end of the heap. Poison h. It + // should never be dereferenced at this point. + return heapBits{} + } + ha := l2[ai.l2()] + if ha == nil { + return heapBits{} + } + h.bitp, h.shift = &ha.bitmap[0], 0 + h.last = &ha.bitmap[len(ha.bitmap)-1] + return h } // forward returns the heapBits describing n pointer-sized words ahead of h in memory. @@ -481,9 +486,39 @@ func (h heapBits) next() heapBits { // h.forward(1) is equivalent to h.next(), just slower. // Note that forward does not modify h. The caller must record the result. // bits returns the heap bits for the current word. +//go:nosplit func (h heapBits) forward(n uintptr) heapBits { n += uintptr(h.shift) / heapBitsShift - return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} + nbitp := uintptr(unsafe.Pointer(h.bitp)) + n/4 + h.shift = uint32(n%4) * heapBitsShift + if nbitp <= uintptr(unsafe.Pointer(h.last)) { + h.bitp = (*uint8)(unsafe.Pointer(nbitp)) + return h + } + + // We're in a new heap arena. + past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1) + h.arena += 1 + uint32(past/heapArenaBitmapBytes) + ai := arenaIdx(h.arena) + if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil { + a := l2[ai.l2()] + h.bitp = &a.bitmap[past%heapArenaBitmapBytes] + h.last = &a.bitmap[len(a.bitmap)-1] + } else { + h.bitp, h.last = nil, nil + } + return h +} + +// forwardOrBoundary is like forward, but stops at boundaries between +// contiguous sections of the bitmap. It returns the number of words +// advanced over, which will be <= n. +func (h heapBits) forwardOrBoundary(n uintptr) (heapBits, uintptr) { + maxn := 4 * ((uintptr(unsafe.Pointer(h.last)) + 1) - uintptr(unsafe.Pointer(h.bitp))) + if n > maxn { + n = maxn + } + return h.forward(n), n } // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer. @@ -564,6 +599,8 @@ func (h heapBits) setCheckmarked(size uintptr) { // make sure the underlying allocation contains pointers, usually // by checking typ.kind&kindNoPointers. // +// Callers must perform cgo checks if writeBarrier.cgo. +// //go:nosplit func bulkBarrierPreWrite(dst, src, size uintptr) { if (dst|src|size)&(sys.PtrSize-1) != 0 { @@ -572,7 +609,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { if !writeBarrier.needed { return } - if !inheap(dst) { + if s := spanOf(dst); s == nil { // If dst is a global, use the data or BSS bitmaps to // execute write barriers. lo := 0 @@ -594,6 +631,14 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { } } return + } else if s.state != _MSpanInUse || dst < s.base() || s.limit <= dst { + // dst was heap memory at some point, but isn't now. + // It can't be a global. It must be either our stack, + // or in the case of direct channel sends, it could be + // another stack. Either way, no need for barriers. + // This will also catch if dst is in a freed span, + // though that should never have. + return } buf := &getg().m.p.ptr().wbBuf @@ -663,7 +708,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { } } -// typeBitsBulkBarrier executes writebarrierptr_prewrite for every +// typeBitsBulkBarrier executes a write barrier for every // pointer that would be copied from [src, src+size) to [dst, // dst+size) by a memmove using the type bitmap to locate those // pointer slots. @@ -677,23 +722,26 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { // Must not be preempted because it typically runs right before memmove, // and the GC must observe them as an atomic action. // +// Callers must perform cgo checks if writeBarrier.cgo. +// //go:nosplit func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { if typ == nil { throw("runtime: typeBitsBulkBarrier without type") } if typ.size != size { - println("runtime: typeBitsBulkBarrier with type ", *typ.string, " of size ", typ.size, " but memory size", size) + println("runtime: typeBitsBulkBarrier with type ", typ.string(), " of size ", typ.size, " but memory size", size) throw("runtime: invalid typeBitsBulkBarrier") } if typ.kind&kindGCProg != 0 { - println("runtime: typeBitsBulkBarrier with type ", *typ.string, " with GC prog") + println("runtime: typeBitsBulkBarrier with type ", typ.string(), " with GC prog") throw("runtime: invalid typeBitsBulkBarrier") } if !writeBarrier.needed { return } ptrmask := typ.gcdata + buf := &getg().m.p.ptr().wbBuf var bits uint32 for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize { if i&(sys.PtrSize*8-1) == 0 { @@ -705,7 +753,9 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { if bits&1 != 0 { dstx := (*uintptr)(unsafe.Pointer(dst + i)) srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } } } @@ -736,23 +786,28 @@ func (h heapBits) initSpan(s *mspan) { s.allocBits = newAllocBits(s.nelems) // Clear bits corresponding to objects. - if total%heapBitmapScale != 0 { + nw := total / sys.PtrSize + if nw%wordsPerBitmapByte != 0 { throw("initSpan: unaligned length") } - nbyte := total / heapBitmapScale - if sys.PtrSize == 8 && size == sys.PtrSize { - end := h.bitp - bitp := subtractb(end, nbyte-1) - for { - *bitp = bitPointerAll | bitScanAll - if bitp == end { - break + if h.shift != 0 { + throw("initSpan: unaligned base") + } + for nw > 0 { + hNext, anw := h.forwardOrBoundary(nw) + nbyte := anw / wordsPerBitmapByte + if sys.PtrSize == 8 && size == sys.PtrSize { + bitp := h.bitp + for i := uintptr(0); i < nbyte; i++ { + *bitp = bitPointerAll | bitScanAll + bitp = add1(bitp) } - bitp = add1(bitp) + } else { + memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte) } - return + h = hNext + nw -= anw } - memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) } // initCheckmarkSpan initializes a span for being checkmarked. @@ -764,10 +819,9 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { // Only possible on 64-bit system, since minimum size is 8. // Must clear type bit (checkmark bit) of every word. // The type bit is the lower of every two-bit pair. - bitp := h.bitp - for i := uintptr(0); i < n; i += 4 { - *bitp &^= bitPointerAll - bitp = subtract1(bitp) + for i := uintptr(0); i < n; i += wordsPerBitmapByte { + *h.bitp &^= bitPointerAll + h = h.forward(wordsPerBitmapByte) } return } @@ -788,10 +842,9 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { // Only possible on 64-bit system, since minimum size is 8. // Must clear type bit (checkmark bit) of every word. // The type bit is the lower of every two-bit pair. - bitp := h.bitp - for i := uintptr(0); i < n; i += 4 { - *bitp |= bitPointerAll - bitp = subtract1(bitp) + for i := uintptr(0); i < n; i += wordsPerBitmapByte { + *h.bitp |= bitPointerAll + h = h.forward(wordsPerBitmapByte) } } } @@ -958,6 +1011,19 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // This is a lot of lines of code, but it compiles into relatively few // machine instructions. + outOfPlace := false + if arenaIndex(x+size-1) != arenaIdx(h.arena) || (doubleCheck && fastrand()%2 == 0) { + // This object spans heap arenas, so the bitmap may be + // discontiguous. Unroll it into the object instead + // and then copy it out. + // + // In doubleCheck mode, we randomly do this anyway to + // stress test the bitmap copying path. + outOfPlace = true + h.bitp = (*uint8)(unsafe.Pointer(x)) + h.last = nil + } + var ( // Ptrmask input. p *byte // last ptrmask byte read @@ -996,9 +1062,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } ptrmask = debugPtrmask.data runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) - goto Phase4 } - return + goto Phase4 } // Note about sizes: @@ -1106,7 +1171,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { } if nw == 0 { // No pointers! Caller was supposed to check. - println("runtime: invalid type ", *typ.string) + println("runtime: invalid type ", typ.string()) throw("heapBitsSetType: called with non-pointer type") return } @@ -1116,7 +1181,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { nw = 2 } - // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). + // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2). // The leading byte is special because it contains the bits for word 1, // which does not have the scan bit set. // The leading half-byte is special because it's a half a byte, @@ -1146,7 +1211,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { goto Phase3 } *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) b >>= 4 nb -= 4 @@ -1167,7 +1232,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // the checkmark. *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) if w += 2; w >= nw { // We know that there is more data, because we handled 2-word objects above. // This must be at least a 6-word object. If we're out of pointer words, @@ -1197,7 +1262,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { break } *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) b >>= 4 // Load more bits. b has nb right now. @@ -1245,7 +1310,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { break } *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) b >>= 4 } @@ -1266,11 +1331,11 @@ Phase3: // The first is hb, the rest are zero. if w <= nw { *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) hb = 0 // for possible final half-byte below for w += 4; w <= nw; w += 4 { *hbitp = 0 - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) } } @@ -1287,11 +1352,91 @@ Phase3: } Phase4: - // Phase 4: all done, but perhaps double check. + // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary. + if outOfPlace { + // TODO: We could probably make this faster by + // handling [x+dataSize, x+size) specially. + h := heapBitsForAddr(x) + // cnw is the number of heap words, or bit pairs + // remaining (like nw above). + cnw := size / sys.PtrSize + src := (*uint8)(unsafe.Pointer(x)) + // We know the first and last byte of the bitmap are + // not the same, but it's still possible for small + // objects span arenas, so it may share bitmap bytes + // with neighboring objects. + // + // Handle the first byte specially if it's shared. See + // Phase 1 for why this is the only special case we need. + if doubleCheck { + if !(h.shift == 0 || (sys.PtrSize == 8 && h.shift == 2)) { + print("x=", x, " size=", size, " cnw=", h.shift, "\n") + throw("bad start shift") + } + } + if sys.PtrSize == 8 && h.shift == 2 { + *h.bitp = *h.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *src + h = h.next().next() + cnw -= 2 + src = addb(src, 1) + } + // We're now byte aligned. Copy out to per-arena + // bitmaps until the last byte (which may again be + // partial). + for cnw >= 4 { + // This loop processes four words at a time, + // so round cnw down accordingly. + hNext, words := h.forwardOrBoundary(cnw / 4 * 4) + + // n is the number of bitmap bytes to copy. + n := words / 4 + memmove(unsafe.Pointer(h.bitp), unsafe.Pointer(src), n) + cnw -= words + h = hNext + src = addb(src, n) + } + if doubleCheck && h.shift != 0 { + print("cnw=", cnw, " h.shift=", h.shift, "\n") + throw("bad shift after block copy") + } + // Handle the last byte if it's shared. + if cnw == 2 { + *h.bitp = *h.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *src + src = addb(src, 1) + h = h.next().next() + } + if doubleCheck { + if uintptr(unsafe.Pointer(src)) > x+size { + throw("copy exceeded object size") + } + if !(cnw == 0 || cnw == 2) { + print("x=", x, " size=", size, " cnw=", cnw, "\n") + throw("bad number of remaining words") + } + // Set up hbitp so doubleCheck code below can check it. + hbitp = h.bitp + } + // Zero the object where we wrote the bitmap. + memclrNoHeapPointers(unsafe.Pointer(x), uintptr(unsafe.Pointer(src))-x) + } + + // Double check the whole bitmap. if doubleCheck { - end := heapBitsForAddr(x + size) + // x+size may not point to the heap, so back up one + // word and then call next(). + end := heapBitsForAddr(x + size - sys.PtrSize).next() + endAI := arenaIdx(end.arena) + if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) { + // The unrolling code above walks hbitp just + // past the bitmap without moving to the next + // arena. Synthesize this for end.bitp. + end.arena-- + endAI = arenaIdx(end.arena) + end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes) + end.last = nil + } if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { - println("ended at wrong bitmap byte for", *typ.string, "x", dataSize/typ.size) + println("ended at wrong bitmap byte for", typ.string(), "x", dataSize/typ.size) print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") h0 := heapBitsForAddr(x) @@ -1327,15 +1472,15 @@ Phase4: } } if have != want { - println("mismatch writing bits for", *typ.string, "x", dataSize/typ.size) + println("mismatch writing bits for", typ.string(), "x", dataSize/typ.size) print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") - print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") + print("kindGCProg=", typ.kind&kindGCProg != 0, " outOfPlace=", outOfPlace, "\n") print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") h0 := heapBitsForAddr(x) print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") - println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want) + println("at word", i, "offset", i*sys.PtrSize, "have", hex(have), "want", hex(want)) if typ.kind&kindGCProg != 0 { println("GC program:") dumpGCProg(addb(typ.gcdata, 4)) @@ -1436,9 +1581,9 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u // so that scanobject can stop early in the final element. totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize } - endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) - endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) - memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) + endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4)) + endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/sys.PtrSize/wordsPerBitmapByte)) + memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg)) } // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. @@ -1497,11 +1642,11 @@ Run: } else { v := bits&bitPointerAll | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 v = bits&bitPointerAll | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } } @@ -1535,11 +1680,11 @@ Run: } else { v := bits&0xf | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 v = bits&0xf | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } } @@ -1599,11 +1744,11 @@ Run: npattern += 8 } } else { - src = add1(src) + src = subtract1(src) for npattern < n { pattern <<= 4 pattern |= uintptr(*src) & 0xf - src = add1(src) + src = subtract1(src) npattern += 4 } } @@ -1665,7 +1810,7 @@ Run: } else { for nbits >= 4 { *dst = uint8(bits&0xf | bitScanAll) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 nbits -= 4 } @@ -1710,10 +1855,10 @@ Run: } } else { // Leading src fragment. - src = addb(src, (off+3)/4) + src = subtractb(src, (off+3)/4) if frag := off & 3; frag != 0 { bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits - src = subtract1(src) + src = add1(src) nbits += frag c -= frag } @@ -1721,9 +1866,9 @@ Run: // The bits are rotating through the bit buffer. for i := c / 4; i > 0; i-- { bits |= (uintptr(*src) & 0xf) << nbits - src = subtract1(src) + src = add1(src) *dst = uint8(bits&0xf | bitScanAll) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } // Final src fragment. @@ -1745,12 +1890,12 @@ Run: bits >>= 8 } } else { - totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits + totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits nbits += -nbits & 3 for ; nbits > 0; nbits -= 4 { v := bits&0xf | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } } @@ -1839,12 +1984,11 @@ func getgcmask(ep interface{}) (mask []byte) { } // heap - var n uintptr - var base uintptr - if mlookup(uintptr(p), &base, &n, nil) != 0 { + if base, s, _ := findObject(uintptr(p), 0, 0, false); base != 0 { + hbits := heapBitsForAddr(base) + n := s.elemsize mask = make([]byte, n/sys.PtrSize) for i := uintptr(0); i < n; i += sys.PtrSize { - hbits := heapBitsForAddr(base + i) if hbits.isPointer() { mask[i/sys.PtrSize] = 1 } @@ -1852,6 +1996,7 @@ func getgcmask(ep interface{}) (mask []byte) { mask = mask[:i/sys.PtrSize] break } + hbits = hbits.next() } return } diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go index 766cfd1..3dacf96 100644 --- a/libgo/go/runtime/mcache.go +++ b/libgo/go/runtime/mcache.go @@ -37,7 +37,6 @@ type mcache struct { alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass // Local allocator stats, flushed during GC. - local_nlookup uintptr // number of pointer lookups local_largefree uintptr // bytes freed for large objects (>maxsmallsize) local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go index 150f4fd..50a4791 100644 --- a/libgo/go/runtime/mcentral.go +++ b/libgo/go/runtime/mcentral.go @@ -246,6 +246,6 @@ func (c *mcentral) grow() *mspan { p := s.base() s.limit = p + size*n - heapBitsForSpan(s.base()).initSpan(s) + heapBitsForAddr(s.base()).initSpan(s) return s } diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go index a087945..44f4648 100644 --- a/libgo/go/runtime/mem_gccgo.go +++ b/libgo/go/runtime/mem_gccgo.go @@ -21,9 +21,6 @@ func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) //extern munmap func munmap(addr unsafe.Pointer, length uintptr) int32 -//extern mincore -func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 - //extern madvise func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32 @@ -49,54 +46,6 @@ func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) (u return p, 0 } -// NOTE: vec must be just 1 byte long here. -// Mincore returns ENOMEM if any of the pages are unmapped, -// but we want to know that all of the pages are unmapped. -// To make these the same, we can only ask about one page -// at a time. See golang.org/issue/7476. -var addrspace_vec [1]byte - -func addrspace_free(v unsafe.Pointer, n uintptr) bool { - for off := uintptr(0); off < n; off += physPageSize { - // Use a length of 1 byte, which the kernel will round - // up to one physical page regardless of the true - // physical page size. - errval := 0 - if mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0]) < 0 { - errval = errno() - } - if errval == _ENOSYS { - // mincore is not available on this system. - // Assume the address is available. - return true - } - if errval == _EINVAL { - // Address is not a multiple of the physical - // page size. Shouldn't happen, but just ignore it. - continue - } - // ENOMEM means unmapped, which is what we want. - // Anything else we assume means the pages are mapped. - if errval != _ENOMEM { - return false - } - } - return true -} - -func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) (unsafe.Pointer, int) { - p, err := mmap(v, n, prot, flags, fd, offset) - // On some systems, mmap ignores v without - // MAP_FIXED, so retry if the address space is free. - if p != v && addrspace_free(v, n) { - if err == 0 { - munmap(p, n) - } - p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) - } - return p, err -} - // Don't split the stack as this method may be invoked without a valid G, which // prevents us from allocating more stack. //go:nosplit @@ -227,62 +176,17 @@ func sysFault(v unsafe.Pointer, n uintptr) { mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, mmapFD, 0) } -func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { - // On 64-bit, people with ulimit -v set complain if we reserve too - // much address space. Instead, assume that the reservation is okay - // if we can reserve at least 64K and check the assumption in SysMap. - // Only user-mode Linux (UML) rejects these requests. - if sys.PtrSize == 8 && uint64(n) > 1<<32 { - p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) - if p != v || err != 0 { - if err == 0 { - munmap(p, 64<<10) - } - return nil - } - munmap(p, 64<<10) - *reserved = false - return v - } - +func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) if err != 0 { return nil } - *reserved = true return p } -func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { +func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { mSysStatInc(sysStat, n) - // On 64-bit, we don't actually have v reserved, so tread carefully. - if !reserved { - flags := int32(_MAP_ANON | _MAP_PRIVATE) - if GOOS == "dragonfly" { - // TODO(jsing): For some reason DragonFly seems to return - // memory at a different address than we requested, even when - // there should be no reason for it to do so. This can be - // avoided by using MAP_FIXED, but I'm not sure we should need - // to do this - we do not on other platforms. - flags |= _MAP_FIXED - } - p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0) - if err == _ENOMEM { - throw("runtime: out of memory") - } - if p != v || err != 0 { - print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n") - throw("runtime: address space conflict") - } - return - } - - if GOOS == "aix" { - // AIX does not allow mapping a range that is already mapped. - // So always unmap first even if it is already unmapped. - munmap(v, n) - } p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0) if err == _ENOMEM { throw("runtime: out of memory") diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go index 62de604..b490cd8 100644 --- a/libgo/go/runtime/memmove_test.go +++ b/libgo/go/runtime/memmove_test.go @@ -450,6 +450,13 @@ func BenchmarkCopyFat512(b *testing.B) { _ = y } } +func BenchmarkCopyFat520(b *testing.B) { + var x [520 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} func BenchmarkCopyFat1024(b *testing.B) { var x [1024 / 4]uint32 for i := 0; i < b.N; i++ { diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go index 19573d8..1a7792c 100644 --- a/libgo/go/runtime/mfinal.go +++ b/libgo/go/runtime/mfinal.go @@ -142,7 +142,7 @@ func runfinq() { if fb == nil { fing = gp fingwait = true - goparkunlock(&finlock, "finalizer wait", traceEvGoBlock, 1) + goparkunlock(&finlock, waitReasonFinalizerWait, traceEvGoBlock, 1) continue } unlock(&finlock) @@ -233,8 +233,8 @@ func runfinq() { // is not guaranteed to run, because there is no ordering that // respects the dependencies. // -// The finalizer for obj is scheduled to run at some arbitrary time after -// obj becomes unreachable. +// The finalizer is scheduled to run at some arbitrary time after the +// program can no longer reach the object to which obj points. // There is no guarantee that finalizers will run before a program exits, // so typically they are useful only for releasing non-memory resources // associated with an object during a long-running program. @@ -284,7 +284,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { throw("runtime.SetFinalizer: first argument is nil") } if etyp.kind&kindMask != kindPtr { - throw("runtime.SetFinalizer: first argument is " + *etyp.string + ", not pointer") + throw("runtime.SetFinalizer: first argument is " + etyp.string() + ", not pointer") } ot := (*ptrtype)(unsafe.Pointer(etyp)) if ot.elem == nil { @@ -292,9 +292,9 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { } // find the containing object - _, base, _ := findObject(e.data) + base, _, _ := findObject(uintptr(e.data), 0, 0, false) - if base == nil { + if base == 0 { // 0-length objects are okay. if e.data == unsafe.Pointer(&zerobase) { return @@ -314,7 +314,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { return } - if e.data != base { + if uintptr(e.data) != base { // As an implementation detail we allow to set finalizers for an inner byte // of an object if it could come from tiny alloc (see mallocgc for details). if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize { @@ -333,14 +333,14 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { } if ftyp.kind&kindMask != kindFunc { - throw("runtime.SetFinalizer: second argument is " + *ftyp.string + ", not a function") + throw("runtime.SetFinalizer: second argument is " + ftyp.string() + ", not a function") } ft := (*functype)(unsafe.Pointer(ftyp)) if ft.dotdotdot { - throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string + " because dotdotdot") + throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string() + " because dotdotdot") } if len(ft.in) != 1 { - throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string) + throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string()) } fint := ft.in[0] switch { @@ -363,7 +363,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { goto okarg } } - throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string) + throw("runtime.SetFinalizer: cannot pass " + etyp.string() + " to finalizer " + ftyp.string()) okarg: // make sure we have a finalizer goroutine createfing() @@ -379,46 +379,6 @@ okarg: }) } -// Look up pointer v in heap. Return the span containing the object, -// the start of the object, and the size of the object. If the object -// does not exist, return nil, nil, 0. -func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) { - c := gomcache() - c.local_nlookup++ - if sys.PtrSize == 4 && c.local_nlookup >= 1<<30 { - // purge cache stats to prevent overflow - lock(&mheap_.lock) - purgecachedstats(c) - unlock(&mheap_.lock) - } - - // find span - arena_start := mheap_.arena_start - arena_used := mheap_.arena_used - if uintptr(v) < arena_start || uintptr(v) >= arena_used { - return - } - p := uintptr(v) >> pageShift - q := p - arena_start>>pageShift - s = mheap_.spans[q] - if s == nil { - return - } - x = unsafe.Pointer(s.base()) - - if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse { - s = nil - x = nil - return - } - - n = s.elemsize - if s.spanclass.sizeclass() != 0 { - x = add(x, (uintptr(v)-uintptr(x))/n*n) - } - return -} - // Mark KeepAlive as noinline so that it is easily detectable as an intrinsic. //go:noinline diff --git a/libgo/go/runtime/mfixalloc.go b/libgo/go/runtime/mfixalloc.go index 7496671..1febe78 100644 --- a/libgo/go/runtime/mfixalloc.go +++ b/libgo/go/runtime/mfixalloc.go @@ -11,7 +11,7 @@ package runtime import "unsafe" // FixAlloc is a simple free-list allocator for fixed size objects. -// Malloc uses a FixAlloc wrapped around sysAlloc to manages its +// Malloc uses a FixAlloc wrapped around sysAlloc to manage its // MCache and MSpan objects. // // Memory returned by fixalloc.alloc is zeroed by default, but the diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go index 626f088..4ef982d 100644 --- a/libgo/go/runtime/mgc.go +++ b/libgo/go/runtime/mgc.go @@ -232,21 +232,10 @@ func setGCPercent(in int32) (out int32) { gcSetTriggerRatio(memstats.triggerRatio) unlock(&mheap_.lock) - // If we just disabled GC, wait for any concurrent GC to + // If we just disabled GC, wait for any concurrent GC mark to // finish so we always return with no GC running. if in < 0 { - // Disable phase transitions. - lock(&work.sweepWaiters.lock) - if gcphase == _GCmark { - // GC is active. Wait until we reach sweeping. - gp := getg() - gp.schedlink = work.sweepWaiters.head - work.sweepWaiters.head.set(gp) - goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) - } else { - // GC isn't active. - unlock(&work.sweepWaiters.lock) - } + gcWaitOnMark(atomic.Load(&work.cycles)) } return out @@ -1091,21 +1080,10 @@ func GC() { // GC may move ahead on its own. For example, when we block // until mark termination N, we may wake up in cycle N+2. - gp := getg() - - // Prevent the GC phase or cycle count from changing. - lock(&work.sweepWaiters.lock) + // Wait until the current sweep termination, mark, and mark + // termination complete. n := atomic.Load(&work.cycles) - if gcphase == _GCmark { - // Wait until sweep termination, mark, and mark - // termination of cycle N complete. - gp.schedlink = work.sweepWaiters.head - work.sweepWaiters.head.set(gp) - goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) - } else { - // We're in sweep N already. - unlock(&work.sweepWaiters.lock) - } + gcWaitOnMark(n) // We're now in sweep N or later. Trigger GC cycle N+1, which // will first finish sweep N if necessary and then enter sweep @@ -1113,14 +1091,7 @@ func GC() { gcStart(gcBackgroundMode, gcTrigger{kind: gcTriggerCycle, n: n + 1}) // Wait for mark termination N+1 to complete. - lock(&work.sweepWaiters.lock) - if gcphase == _GCmark && atomic.Load(&work.cycles) == n+1 { - gp.schedlink = work.sweepWaiters.head - work.sweepWaiters.head.set(gp) - goparkunlock(&work.sweepWaiters.lock, "wait for GC cycle", traceEvGoBlock, 1) - } else { - unlock(&work.sweepWaiters.lock) - } + gcWaitOnMark(n + 1) // Finish sweep N+1 before returning. We do this both to // complete the cycle and because runtime.GC() is often used @@ -1157,6 +1128,32 @@ func GC() { releasem(mp) } +// gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has +// already completed this mark phase, it returns immediately. +func gcWaitOnMark(n uint32) { + for { + // Disable phase transitions. + lock(&work.sweepWaiters.lock) + nMarks := atomic.Load(&work.cycles) + if gcphase != _GCmark { + // We've already completed this cycle's mark. + nMarks++ + } + if nMarks > n { + // We're done. + unlock(&work.sweepWaiters.lock) + return + } + + // Wait until sweep termination, mark, and mark + // termination of cycle N complete. + gp := getg() + gp.schedlink = work.sweepWaiters.head + work.sweepWaiters.head.set(gp) + goparkunlock(&work.sweepWaiters.lock, waitReasonWaitForGCCycle, traceEvGoBlock, 1) + } +} + // gcMode indicates how concurrent a GC cycle should be. type gcMode int @@ -1531,7 +1528,7 @@ func gcMarkTermination(nextTriggerRatio float64) { _g_.m.traceback = 2 gp := _g_.m.curg casgstatus(gp, _Grunning, _Gwaiting) - gp.waitreason = "garbage collection" + gp.waitreason = waitReasonGarbageCollection // Run gc on the g0 stack. We do this so that the g stack // we're currently running on will no longer change. Cuts @@ -1800,7 +1797,7 @@ func gcBgMarkWorker(_p_ *p) { } } return true - }, unsafe.Pointer(park), "GC worker (idle)", traceEvGoBlock, 0) + }, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0) // Loop until the P dies and disassociates this // worker (the P may later be reused, in which case diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go index fe437bf..e7fa831 100644 --- a/libgo/go/runtime/mgclarge.go +++ b/libgo/go/runtime/mgclarge.go @@ -9,7 +9,7 @@ // Large spans are the subject of this file. Spans consisting of less than // _MaxMHeapLists are held in lists of like sized spans. Larger spans // are held in a treap. See https://en.wikipedia.org/wiki/Treap or -// http://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview. +// https://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview. // sema.go also holds an implementation of a treap. // // Each treapNode holds a single span. The treap is sorted by page size @@ -43,7 +43,7 @@ type treapNode struct { parent *treapNode // direct parent of this node, nil if root npagesKey uintptr // number of pages in spanKey, used as primary sort key spanKey *mspan // span of size npagesKey, used as secondary sort key - priority uint32 // random number used by treap algorithm keep tree probablistically balanced + priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced } func (t *treapNode) init() { @@ -137,7 +137,7 @@ func (root *mTreap) insert(span *mspan) { // npagesKeys, it is kept balanced on average by maintaining a heap ordering // on the priority: s.priority <= both s.right.priority and s.right.priority. // https://en.wikipedia.org/wiki/Treap - // http://faculty.washington.edu/aragon/pubs/rst89.pdf + // https://faculty.washington.edu/aragon/pubs/rst89.pdf t := (*treapNode)(mheap_.treapalloc.alloc()) t.init() diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go index 7297fcb..88cae41 100644 --- a/libgo/go/runtime/mgcmark.go +++ b/libgo/go/runtime/mgcmark.go @@ -232,7 +232,7 @@ func markroot(gcw *gcWork, i uint32) { selfScan := gp == userG && readgstatus(userG) == _Grunning if selfScan { casgstatus(userG, _Grunning, _Gwaiting) - userG.waitreason = "garbage collection scan" + userG.waitreason = waitReasonGarbageCollectionScan } // TODO: scang blocks until gp's stack has @@ -467,7 +467,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // store that clears it but an atomic check in every malloc // would be a performance hit. // Instead we recheck it here on the non-preemptable system - // stack to determine if we should preform an assist. + // stack to determine if we should perform an assist. // GC is done, so ignore any remaining debt. gp.gcAssistBytes = 0 @@ -486,7 +486,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // gcDrainN requires the caller to be preemptible. casgstatus(gp, _Grunning, _Gwaiting) - gp.waitreason = "GC assist marking" + gp.waitreason = waitReasonGCAssistMarking // drain own cached work first in the hopes that it // will be more cache friendly. @@ -585,7 +585,7 @@ func gcParkAssist() bool { return false } // Park. - goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlockGC, 2) + goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceEvGoBlockGC, 2) return true } @@ -934,9 +934,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { b := b0 n := n0 - arena_start := mheap_.arena_start - arena_used := mheap_.arena_used - for i := uintptr(0); i < n; { // Find bits for the next word. bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) @@ -948,9 +945,9 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { if bits&1 != 0 { // Same work as in scanobject; see comments there. obj := *(*uintptr)(unsafe.Pointer(b + i)) - if obj != 0 && arena_start <= obj && obj < arena_used { - if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 { - greyobject(obj, b, i, hbits, span, gcw, objIndex, false) + if obj != 0 { + if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 { + greyobject(obj, b, i, span, gcw, objIndex, false) } } } @@ -967,18 +964,6 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { // //go:nowritebarrier func scanobject(b uintptr, gcw *gcWork) { - // Note that arena_used may change concurrently during - // scanobject and hence scanobject may encounter a pointer to - // a newly allocated heap object that is *not* in - // [start,used). It will not mark this object; however, we - // know that it was just installed by a mutator, which means - // that mutator will execute a write barrier and take care of - // marking it. This is even more pronounced on relaxed memory - // architectures since we access arena_used without barriers - // or synchronization, but the same logic applies. - arena_start := mheap_.arena_start - arena_used := mheap_.arena_used - // Find the bits for b and the size of the object at b. // // b is either the beginning of an object, in which case this @@ -1052,11 +1037,19 @@ func scanobject(b uintptr, gcw *gcWork) { obj := *(*uintptr)(unsafe.Pointer(b + i)) // At this point we have extracted the next potential pointer. - // Check if it points into heap and not back at the current object. - if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n { - // Mark the object. - if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 { - greyobject(obj, b, i, hbits, span, gcw, objIndex, false) + // Quickly filter out nil and pointers back to the current object. + if obj != 0 && obj-b >= n { + // Test if obj points into the Go heap and, if so, + // mark the object. + // + // Note that it's possible for findObject to + // fail if obj points to a just-allocated heap + // object because of a race with growing the + // heap. In this case, we know the object was + // just allocated and hence will be marked by + // allocation itself. + if obj, span, objIndex := findObject(obj, b, i, false); obj != 0 { + greyobject(obj, b, i, span, gcw, objIndex, false) } } } @@ -1071,16 +1064,11 @@ func scanobject(b uintptr, gcw *gcWork) { // scanblock, but we scan the stack conservatively, so there is no // bitmask of pointers. func scanstackblock(b, n uintptr, gcw *gcWork) { - arena_start := mheap_.arena_start - arena_used := mheap_.arena_used - for i := uintptr(0); i < n; i += sys.PtrSize { // Same work as in scanobject; see comments there. obj := *(*uintptr)(unsafe.Pointer(b + i)) - if obj != 0 && arena_start <= obj && obj < arena_used { - if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, true); obj != 0 { - greyobject(obj, b, i, hbits, span, gcw, objIndex, true) - } + if obj, span, objIndex := findObject(obj, b, i, true); obj != 0 { + greyobject(obj, b, i, span, gcw, objIndex, true) } } } @@ -1090,11 +1078,9 @@ func scanstackblock(b, n uintptr, gcw *gcWork) { // Preemption must be disabled. //go:nowritebarrier func shade(b uintptr) { - // shade can be called to shade a pointer found on the stack, - // so pass forStack as true to heapBitsForObject and greyobject. - if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0, true); obj != 0 { + if obj, span, objIndex := findObject(b, 0, 0, true); obj != 0 { gcw := &getg().m.p.ptr().gcw - greyobject(obj, 0, 0, hbits, span, gcw, objIndex, true) + greyobject(obj, 0, 0, span, gcw, objIndex, true) if gcphase == _GCmarktermination || gcBlackenPromptly { // Ps aren't allowed to cache work during mark // termination. @@ -1110,7 +1096,7 @@ func shade(b uintptr) { // See also wbBufFlush1, which partially duplicates this logic. // //go:nowritebarrierrec -func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) { +func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) { // obj should be start of allocation, and so must be at least pointer-aligned. if obj&(sys.PtrSize-1) != 0 { throw("greyobject: obj not pointer-aligned") @@ -1139,6 +1125,7 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork getg().m.traceback = 2 throw("checkmark found unmarked object") } + hbits := heapBitsForAddr(obj) if hbits.isCheckmarked(span.elemsize) { return } @@ -1190,15 +1177,8 @@ func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork // gcDumpObject dumps the contents of obj for debugging and marks the // field at byte offset off in obj. func gcDumpObject(label string, obj, off uintptr) { - if obj < mheap_.arena_start || obj >= mheap_.arena_used { - print(label, "=", hex(obj), " is not in the Go heap\n") - return - } - k := obj >> _PageShift - x := k - x -= mheap_.arena_start >> _PageShift - s := mheap_.spans[x] - print(label, "=", hex(obj), " k=", hex(k)) + s := spanOf(obj) + print(label, "=", hex(obj)) if s == nil { print(" s=nil\n") return @@ -1272,9 +1252,9 @@ func gcMarkTinyAllocs() { if c == nil || c.tiny == 0 { continue } - _, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0, false) + _, span, objIndex := findObject(c.tiny, 0, 0, false) gcw := &p.gcw - greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex, false) + greyobject(c.tiny, 0, 0, span, gcw, objIndex, false) if gcBlackenPromptly { gcw.dispose() } @@ -1309,7 +1289,7 @@ func initCheckmarks() { useCheckmark = true for _, s := range mheap_.allspans { if s.state == _MSpanInUse { - heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout()) + heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout()) } } } @@ -1318,7 +1298,7 @@ func clearCheckmarks() { useCheckmark = false for _, s := range mheap_.allspans { if s.state == _MSpanInUse { - heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout()) + heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout()) } } } diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go index d6be349..39dd54e 100644 --- a/libgo/go/runtime/mgcsweep.go +++ b/libgo/go/runtime/mgcsweep.go @@ -51,7 +51,7 @@ func bgsweep(c chan int) { lock(&sweep.lock) sweep.parked = true c <- 1 - goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) + goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1) for { for gosweepone() != ^uintptr(0) { @@ -70,7 +70,7 @@ func bgsweep(c chan int) { continue } sweep.parked = true - goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) + goparkunlock(&sweep.lock, waitReasonGCSweepWait, traceEvGoBlock, 1) } } diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go index c6634fc..99771e2 100644 --- a/libgo/go/runtime/mgcwork.go +++ b/libgo/go/runtime/mgcwork.go @@ -400,6 +400,7 @@ func getempty() *workbuf { for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize { newb := (*workbuf)(unsafe.Pointer(s.base() + i)) newb.nobj = 0 + lfnodeValidate(&newb.node) if i == 0 { b = newb } else { diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go index d971bfe..65622f4 100644 --- a/libgo/go/runtime/mheap.go +++ b/libgo/go/runtime/mheap.go @@ -50,23 +50,6 @@ type mheap struct { // access (since that may free the backing store). allspans []*mspan // all spans out there - // spans is a lookup table to map virtual address page IDs to *mspan. - // For allocated spans, their pages map to the span itself. - // For free spans, only the lowest and highest pages map to the span itself. - // Internal pages map to an arbitrary span. - // For pages that have never been allocated, spans entries are nil. - // - // Modifications are protected by mheap.lock. Reads can be - // performed without locking, but ONLY from indexes that are - // known to contain in-use or stack spans. This means there - // must not be a safe-point between establishing that an - // address is live and looking it up in the spans array. - // - // This is backed by a reserved region of the address space so - // it can grow without moving. The memory up to len(spans) is - // mapped. cap(spans) indicates the total reserved memory. - spans []*mspan - // sweepSpans contains two mspan stacks: one of swept in-use // spans, and one of unswept in-use spans. These two trade // roles on each GC cycle. Since the sweepgen increases by 2 @@ -78,7 +61,7 @@ type mheap struct { // on the swept stack. sweepSpans [2]gcSweepBuf - _ uint32 // align uint64 fields on 32-bit for atomics + //_ uint32 // align uint64 fields on 32-bit for atomics // Proportional sweep // @@ -113,36 +96,44 @@ type mheap struct { nlargefree uint64 // number of frees for large objects (>maxsmallsize) nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) - // range of addresses we might see in the heap - bitmap uintptr // Points to one byte past the end of the bitmap - bitmap_mapped uintptr - - // The arena_* fields indicate the addresses of the Go heap. + // arenas is the heap arena map. It points to the metadata for + // the heap for every arena frame of the entire usable virtual + // address space. + // + // Use arenaIndex to compute indexes into this array. // - // The maximum range of the Go heap is - // [arena_start, arena_start+_MaxMem+1). + // For regions of the address space that are not backed by the + // Go heap, the arena map contains nil. // - // The range of the current Go heap is - // [arena_start, arena_used). Parts of this range may not be - // mapped, but the metadata structures are always mapped for - // the full range. - arena_start uintptr - arena_used uintptr // Set with setArenaUsed. - - // The heap is grown using a linear allocator that allocates - // from the block [arena_alloc, arena_end). arena_alloc is - // often, but *not always* equal to arena_used. - arena_alloc uintptr - arena_end uintptr - - // arena_reserved indicates that the memory [arena_alloc, - // arena_end) is reserved (e.g., mapped PROT_NONE). If this is - // false, we have to be careful not to clobber existing - // mappings here. If this is true, then we own the mapping - // here and *must* clobber it to use it. - arena_reserved bool - - _ uint32 // ensure 64-bit alignment + // Modifications are protected by mheap_.lock. Reads can be + // performed without locking; however, a given entry can + // transition from nil to non-nil at any time when the lock + // isn't held. (Entries never transitions back to nil.) + // + // In general, this is a two-level mapping consisting of an L1 + // map and possibly many L2 maps. This saves space when there + // are a huge number of arena frames. However, on many + // platforms (even 64-bit), arenaL1Bits is 0, making this + // effectively a single-level map. In this case, arenas[0] + // will never be nil. + arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena + + // heapArenaAlloc is pre-reserved space for allocating heapArena + // objects. This is only used on 32-bit, where we pre-reserve + // this space to avoid interleaving it with the heap itself. + heapArenaAlloc linearAlloc + + // arenaHints is a list of addresses at which to attempt to + // add more heap arenas. This is initially populated with a + // set of general hint addresses, and grown with the bounds of + // actual heap arena ranges. + arenaHints *arenaHint + + // arena is a pre-reserved space for allocating heap arenas + // (the actual arenas). This is only used on 32-bit. + arena linearAlloc + + //_ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. // the padding makes sure that the MCentrals are @@ -160,12 +151,51 @@ type mheap struct { specialfinalizeralloc fixalloc // allocator for specialfinalizer* specialprofilealloc fixalloc // allocator for specialprofile* speciallock mutex // lock for special record allocators. + arenaHintAlloc fixalloc // allocator for arenaHints unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF } var mheap_ mheap +// A heapArena stores metadata for a heap arena. heapArenas are stored +// outside of the Go heap and accessed via the mheap_.arenas index. +// +// This gets allocated directly from the OS, so ideally it should be a +// multiple of the system page size. For example, avoid adding small +// fields. +// +//go:notinheap +type heapArena struct { + // bitmap stores the pointer/scalar bitmap for the words in + // this arena. See mbitmap.go for a description. Use the + // heapBits type to access this. + bitmap [heapArenaBitmapBytes]byte + + // spans maps from virtual address page ID within this arena to *mspan. + // For allocated spans, their pages map to the span itself. + // For free spans, only the lowest and highest pages map to the span itself. + // Internal pages map to an arbitrary span. + // For pages that have never been allocated, spans entries are nil. + // + // Modifications are protected by mheap.lock. Reads can be + // performed without locking, but ONLY from indexes that are + // known to contain in-use or stack spans. This means there + // must not be a safe-point between establishing that an + // address is live and looking it up in the spans array. + spans [pagesPerArena]*mspan +} + +// arenaHint is a hint for where to grow the heap arenas. See +// mheap_.arenaHints. +// +//go:notinheap +type arenaHint struct { + addr uintptr + down bool + next *arenaHint +} + // An MSpan is a run of pages. // // When a MSpan is in the heap free list, state == MSpanFree @@ -384,21 +414,55 @@ func (sc spanClass) noscan() bool { return sc&1 != 0 } +// arenaIndex returns the index into mheap_.arenas of the arena +// containing metadata for p. This index combines of an index into the +// L1 map and an index into the L2 map and should be used as +// mheap_.arenas[ai.l1()][ai.l2()]. +// +// If p is outside the range of valid heap addresses, either l1() or +// l2() will be out of bounds. +// +// It is nosplit because it's called by spanOf and several other +// nosplit functions. +// +//go:nosplit +func arenaIndex(p uintptr) arenaIdx { + return arenaIdx((p + arenaBaseOffset) / heapArenaBytes) +} + +// arenaBase returns the low address of the region covered by heap +// arena i. +func arenaBase(i arenaIdx) uintptr { + return uintptr(i)*heapArenaBytes - arenaBaseOffset +} + +type arenaIdx uint + +func (i arenaIdx) l1() uint { + if arenaL1Bits == 0 { + // Let the compiler optimize this away if there's no + // L1 map. + return 0 + } else { + return uint(i) >> arenaL1Shift + } +} + +func (i arenaIdx) l2() uint { + if arenaL1Bits == 0 { + return uint(i) + } else { + return uint(i) & (1<<arenaL2Bits - 1) + } +} + // inheap reports whether b is a pointer into a (potentially dead) heap object. // It returns false for pointers into _MSpanManual spans. // Non-preemptible because it is used by write barriers. //go:nowritebarrier //go:nosplit func inheap(b uintptr) bool { - if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { - return false - } - // Not a beginning of a block, consult span table to find the block beginning. - s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift] - if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse { - return false - } - return true + return spanOfHeap(b) != nil } // inHeapOrStack is a variant of inheap that returns true for pointers @@ -407,11 +471,7 @@ func inheap(b uintptr) bool { //go:nowritebarrier //go:nosplit func inHeapOrStack(b uintptr) bool { - if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { - return false - } - // Not a beginning of a block, consult span table to find the block beginning. - s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift] + s := spanOf(b) if s == nil || b < s.base() { return false } @@ -423,81 +483,81 @@ func inHeapOrStack(b uintptr) bool { } } -// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places. -// Use the functions instead. - -// spanOf returns the span of p. If p does not point into the heap or -// no span contains p, spanOf returns nil. +// spanOf returns the span of p. If p does not point into the heap +// arena or no span has ever contained p, spanOf returns nil. +// +// If p does not point to allocated memory, this may return a non-nil +// span that does *not* contain p. If this is a possibility, the +// caller should either call spanOfHeap or check the span bounds +// explicitly. +// +// Must be nosplit because it has callers that are nosplit. +// +//go:nosplit func spanOf(p uintptr) *mspan { - if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used { + // This function looks big, but we use a lot of constant + // folding around arenaL1Bits to get it under the inlining + // budget. Also, many of the checks here are safety checks + // that Go needs to do anyway, so the generated code is quite + // short. + ri := arenaIndex(p) + if arenaL1Bits == 0 { + // If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can. + if ri.l2() >= uint(len(mheap_.arenas[0])) { + return nil + } + } else { + // If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't. + if ri.l1() >= uint(len(mheap_.arenas)) { + return nil + } + } + l2 := mheap_.arenas[ri.l1()] + if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1. return nil } - return spanOfUnchecked(p) + ha := l2[ri.l2()] + if ha == nil { + return nil + } + return ha.spans[(p/pageSize)%pagesPerArena] } // spanOfUnchecked is equivalent to spanOf, but the caller must ensure -// that p points into the heap (that is, mheap_.arena_start <= p < -// mheap_.arena_used). +// that p points into an allocated heap arena. +// +// Must be nosplit because it has callers that are nosplit. +// +//go:nosplit func spanOfUnchecked(p uintptr) *mspan { - return mheap_.spans[(p-mheap_.arena_start)>>_PageShift] + ai := arenaIndex(p) + return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena] } -func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 { - _g_ := getg() - - _g_.m.mcache.local_nlookup++ - if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 { - // purge cache stats to prevent overflow - lock(&mheap_.lock) - purgecachedstats(_g_.m.mcache) - unlock(&mheap_.lock) - } - - s := mheap_.lookupMaybe(unsafe.Pointer(v)) - if sp != nil { - *sp = s - } - if s == nil { - if base != nil { - *base = 0 - } - if size != nil { - *size = 0 - } - return 0 - } - - p := s.base() - if s.spanclass.sizeclass() == 0 { - // Large object. - if base != nil { - *base = p - } - if size != nil { - *size = s.npages << _PageShift - } - return 1 - } - - n := s.elemsize - if base != nil { - i := (v - p) / n - *base = p + i*n - } - if size != nil { - *size = n +// spanOfHeap is like spanOf, but returns nil if p does not point to a +// heap object. +// +// Must be nosplit because it has callers that are nosplit. +// +//go:nosplit +func spanOfHeap(p uintptr) *mspan { + s := spanOf(p) + // If p is not allocated, it may point to a stale span, so we + // have to check the span's bounds and state. + if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse { + return nil } - - return 1 + return s } // Initialize the heap. -func (h *mheap) init(spansStart, spansBytes uintptr) { +func (h *mheap) init() { h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys) h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys) h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys) h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys) h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys) + h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys) // Don't zero mspan allocations. Background sweeping can // inspect a span concurrently with allocating it, so it's @@ -518,60 +578,6 @@ func (h *mheap) init(spansStart, spansBytes uintptr) { for i := range h.central { h.central[i].mcentral.init(spanClass(i)) } - - sp := (*slice)(unsafe.Pointer(&h.spans)) - sp.array = unsafe.Pointer(spansStart) - sp.len = 0 - sp.cap = int(spansBytes / sys.PtrSize) - - // Map metadata structures. But don't map race detector memory - // since we're not actually growing the arena here (and TSAN - // gets mad if you map 0 bytes). - h.setArenaUsed(h.arena_used, false) -} - -// setArenaUsed extends the usable arena to address arena_used and -// maps auxiliary VM regions for any newly usable arena space. -// -// racemap indicates that this memory should be managed by the race -// detector. racemap should be true unless this is covering a VM hole. -func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) { - // Map auxiliary structures *before* h.arena_used is updated. - // Waiting to update arena_used until after the memory has been mapped - // avoids faults when other threads try access these regions immediately - // after observing the change to arena_used. - - // Map the bitmap. - h.mapBits(arena_used) - - // Map spans array. - h.mapSpans(arena_used) - - // Tell the race detector about the new heap memory. - if racemap && raceenabled { - racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used) - } - - h.arena_used = arena_used -} - -// mapSpans makes sure that the spans are mapped -// up to the new value of arena_used. -// -// Don't call this directly. Call mheap.setArenaUsed. -func (h *mheap) mapSpans(arena_used uintptr) { - // Map spans array, PageSize at a time. - n := arena_used - n -= h.arena_start - n = n / _PageSize * sys.PtrSize - n = round(n, physPageSize) - need := n / unsafe.Sizeof(h.spans[0]) - have := uintptr(len(h.spans)) - if have >= need { - return - } - h.spans = h.spans[:need] - sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys) } // Sweeps spans in list until reclaims at least npages into heap. @@ -598,7 +604,7 @@ retry: goto retry } if s.sweepgen == sg-1 { - // the span is being sweept by background sweeper, skip + // the span is being swept by background sweeper, skip continue } // already swept empty span, @@ -785,7 +791,7 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan { s.nelems = 0 s.elemsize = 0 s.limit = s.base() + s.npages<<_PageShift - // Manually manged memory doesn't count toward heap_sys. + // Manually managed memory doesn't count toward heap_sys. memstats.heap_sys -= uint64(s.npages << _PageShift) } @@ -795,6 +801,28 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan { return s } +// setSpan modifies the span map so spanOf(base) is s. +func (h *mheap) setSpan(base uintptr, s *mspan) { + ai := arenaIndex(base) + h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s +} + +// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize)) +// is s. +func (h *mheap) setSpans(base, npage uintptr, s *mspan) { + p := base / pageSize + ai := arenaIndex(base) + ha := h.arenas[ai.l1()][ai.l2()] + for n := uintptr(0); n < npage; n++ { + i := (p + n) % pagesPerArena + if i == 0 { + ai = arenaIndex(base + n*pageSize) + ha = h.arenas[ai.l1()][ai.l2()] + } + ha.spans[i] = s + } +} + // Allocates a span of the given size. h must be locked. // The returned span has been removed from the // free list, but its state is still MSpanFree. @@ -842,12 +870,9 @@ HaveSpan: t := (*mspan)(h.spanalloc.alloc()) t.init(s.base()+npage<<_PageShift, s.npages-npage) s.npages = npage - p := (t.base() - h.arena_start) >> _PageShift - if p > 0 { - h.spans[p-1] = s - } - h.spans[p] = t - h.spans[p+t.npages-1] = t + h.setSpan(t.base()-1, s) + h.setSpan(t.base(), t) + h.setSpan(t.base()+t.npages*pageSize-1, t) t.needzero = s.needzero s.state = _MSpanManual // prevent coalescing with s t.state = _MSpanManual @@ -856,10 +881,7 @@ HaveSpan: } s.unusedsince = 0 - p := (s.base() - h.arena_start) >> _PageShift - for n := uintptr(0); n < npage; n++ { - h.spans[p+n] = s - } + h.setSpans(s.base(), npage, s) *stat += uint64(npage << _PageShift) memstats.heap_idle -= uint64(npage << _PageShift) @@ -891,36 +913,18 @@ func (h *mheap) allocLarge(npage uintptr) *mspan { // // h must be locked. func (h *mheap) grow(npage uintptr) bool { - // Ask for a big chunk, to reduce the number of mappings - // the operating system needs to track; also amortizes - // the overhead of an operating system mapping. - // Allocate a multiple of 64kB. - npage = round(npage, (64<<10)/_PageSize) ask := npage << _PageShift - if ask < _HeapAllocChunk { - ask = _HeapAllocChunk - } - - v := h.sysAlloc(ask) + v, size := h.sysAlloc(ask) if v == nil { - if ask > npage<<_PageShift { - ask = npage << _PageShift - v = h.sysAlloc(ask) - } - if v == nil { - print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n") - return false - } + print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n") + return false } // Create a fake "in use" span and free it, so that the // right coalescing happens. s := (*mspan)(h.spanalloc.alloc()) - s.init(uintptr(v), ask>>_PageShift) - p := (s.base() - h.arena_start) >> _PageShift - for i := p; i < p+s.npages; i++ { - h.spans[i] = s - } + s.init(uintptr(v), size/pageSize) + h.setSpans(s.base(), s.npages, s) atomic.Store(&s.sweepgen, h.sweepgen) s.state = _MSpanInUse h.pagesInUse += uint64(s.npages) @@ -928,33 +932,6 @@ func (h *mheap) grow(npage uintptr) bool { return true } -// Look up the span at the given address. -// Address is guaranteed to be in map -// and is guaranteed to be start or end of span. -func (h *mheap) lookup(v unsafe.Pointer) *mspan { - p := uintptr(v) - p -= h.arena_start - return h.spans[p>>_PageShift] -} - -// Look up the span at the given address. -// Address is *not* guaranteed to be in map -// and may be anywhere in the span. -// Map entries for the middle of a span are only -// valid for allocated spans. Free spans may have -// other garbage in their middles, so we have to -// check for that. -func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan { - if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used { - return nil - } - s := h.spans[(uintptr(v)-h.arena_start)>>_PageShift] - if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse { - return nil - } - return s -} - // Free the span back into the heap. func (h *mheap) freeSpan(s *mspan, acct int32) { systemstack(func() { @@ -1039,46 +1016,38 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i s.npreleased = 0 // Coalesce with earlier, later spans. - p := (s.base() - h.arena_start) >> _PageShift - if p > 0 { - before := h.spans[p-1] - if before != nil && before.state == _MSpanFree { - // Now adjust s. - s.startAddr = before.startAddr - s.npages += before.npages - s.npreleased = before.npreleased // absorb released pages - s.needzero |= before.needzero - p -= before.npages - h.spans[p] = s - // The size is potentially changing so the treap needs to delete adjacent nodes and - // insert back as a combined node. - if h.isLargeSpan(before.npages) { - // We have a t, it is large so it has to be in the treap so we can remove it. - h.freelarge.removeSpan(before) - } else { - h.freeList(before.npages).remove(before) - } - before.state = _MSpanDead - h.spanalloc.free(unsafe.Pointer(before)) + if before := spanOf(s.base() - 1); before != nil && before.state == _MSpanFree { + // Now adjust s. + s.startAddr = before.startAddr + s.npages += before.npages + s.npreleased = before.npreleased // absorb released pages + s.needzero |= before.needzero + h.setSpan(before.base(), s) + // The size is potentially changing so the treap needs to delete adjacent nodes and + // insert back as a combined node. + if h.isLargeSpan(before.npages) { + // We have a t, it is large so it has to be in the treap so we can remove it. + h.freelarge.removeSpan(before) + } else { + h.freeList(before.npages).remove(before) } + before.state = _MSpanDead + h.spanalloc.free(unsafe.Pointer(before)) } // Now check to see if next (greater addresses) span is free and can be coalesced. - if (p + s.npages) < uintptr(len(h.spans)) { - after := h.spans[p+s.npages] - if after != nil && after.state == _MSpanFree { - s.npages += after.npages - s.npreleased += after.npreleased - s.needzero |= after.needzero - h.spans[p+s.npages-1] = s - if h.isLargeSpan(after.npages) { - h.freelarge.removeSpan(after) - } else { - h.freeList(after.npages).remove(after) - } - after.state = _MSpanDead - h.spanalloc.free(unsafe.Pointer(after)) + if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == _MSpanFree { + s.npages += after.npages + s.npreleased += after.npreleased + s.needzero |= after.needzero + h.setSpan(s.base()+s.npages*pageSize-1, s) + if h.isLargeSpan(after.npages) { + h.freelarge.removeSpan(after) + } else { + h.freeList(after.npages).remove(after) } + after.state = _MSpanDead + h.spanalloc.free(unsafe.Pointer(after)) } // Insert s into appropriate list or treap. @@ -1343,7 +1312,7 @@ type special struct { // (The add will fail only if a record with the same p and s->kind // already exists.) func addspecial(p unsafe.Pointer, s *special) bool { - span := mheap_.lookupMaybe(p) + span := spanOfHeap(uintptr(p)) if span == nil { throw("addspecial on invalid pointer") } @@ -1391,7 +1360,7 @@ func addspecial(p unsafe.Pointer, s *special) bool { // Returns the record if the record existed, nil otherwise. // The caller must FixAlloc_Free the result. func removespecial(p unsafe.Pointer, kind uint8) *special { - span := mheap_.lookupMaybe(p) + span := spanOfHeap(uintptr(p)) if span == nil { throw("removespecial on invalid pointer") } @@ -1454,12 +1423,12 @@ func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool // situation where it's possible that markrootSpans // has already run but mark termination hasn't yet. if gcphase != _GCoff { - _, base, _ := findObject(p) + base, _, _ := findObject(uintptr(p), 0, 0, false) mp := acquirem() gcw := &mp.p.ptr().gcw // Mark everything reachable from the object // so it's retained for the finalizer. - scanobject(uintptr(base), gcw) + scanobject(base, gcw) // Mark the finalizer itself, since the // special isn't part of the GC'd heap. scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw) @@ -1643,7 +1612,7 @@ func newMarkBits(nelems uintptr) *gcBits { // to be used for this span's alloc bits. // newAllocBits is used to provide newly initialized spans // allocation bits. For spans not being initialized the -// the mark bits are repurposed as allocation bits when +// mark bits are repurposed as allocation bits when // the span is swept. func newAllocBits(nelems uintptr) *gcBits { return newMarkBits(nelems) diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go index f31c88c..2bbf37a 100644 --- a/libgo/go/runtime/mprof.go +++ b/libgo/go/runtime/mprof.go @@ -436,7 +436,7 @@ var mutexprofilerate uint64 // fraction sampled // reported. The previous rate is returned. // // To turn off profiling entirely, pass rate 0. -// To just read the current rate, pass rate -1. +// To just read the current rate, pass rate < 0. // (For n>1 the details of sampling may change.) func SetMutexProfileFraction(rate int) int { if rate < 0 { @@ -833,7 +833,7 @@ func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) { if typ == nil { print("tracealloc(", p, ", ", hex(size), ")\n") } else { - print("tracealloc(", p, ", ", hex(size), ", ", *typ.string, ")\n") + print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n") } if gp.m.curg == nil || gp == gp.m.curg { goroutineheader(gp) diff --git a/libgo/go/runtime/msan/msan.go b/libgo/go/runtime/msan/msan.go index b6ea3f0..c81577d 100644 --- a/libgo/go/runtime/msan/msan.go +++ b/libgo/go/runtime/msan/msan.go @@ -2,7 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build msan,linux,amd64 +// +build msan,linux +// +build amd64 arm64 package msan diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go index 095a0de..f54ce9d 100644 --- a/libgo/go/runtime/mstats.go +++ b/libgo/go/runtime/mstats.go @@ -26,7 +26,7 @@ type mstats struct { alloc uint64 // bytes allocated and not yet freed total_alloc uint64 // bytes allocated (even if freed) sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate) - nlookup uint64 // number of pointer lookups + nlookup uint64 // number of pointer lookups (unused) nmalloc uint64 // number of mallocs nfree uint64 // number of frees @@ -637,8 +637,6 @@ func purgecachedstats(c *mcache) { c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 - memstats.nlookup += uint64(c.local_nlookup) - c.local_nlookup = 0 h.largefree += uint64(c.local_largefree) c.local_largefree = 0 h.nlargefree += uint64(c.local_nlargefree) @@ -663,6 +661,9 @@ func purgecachedstats(c *mcache) { // overflow errors. //go:nosplit func mSysStatInc(sysStat *uint64, n uintptr) { + if sysStat == nil { + return + } if sys.BigEndian { atomic.Xadd64(sysStat, int64(n)) return @@ -677,6 +678,9 @@ func mSysStatInc(sysStat *uint64, n uintptr) { // mSysStatInc apply. //go:nosplit func mSysStatDec(sysStat *uint64, n uintptr) { + if sysStat == nil { + return + } if sys.BigEndian { atomic.Xadd64(sysStat, -int64(n)) return diff --git a/libgo/go/runtime/mwbbuf.go b/libgo/go/runtime/mwbbuf.go index 7e88463..39d1370 100644 --- a/libgo/go/runtime/mwbbuf.go +++ b/libgo/go/runtime/mwbbuf.go @@ -5,6 +5,9 @@ // This implements the write barrier buffer. The write barrier itself // is gcWriteBarrier and is implemented in assembly. // +// See mbarrier.go for algorithmic details on the write barrier. This +// file deals only with the buffer. +// // The write barrier has a fast path and a slow path. The fast path // simply enqueues to a per-P write barrier buffer. It's written in // assembly and doesn't clobber any general purpose registers, so it @@ -111,16 +114,21 @@ func (b *wbBuf) discard() { // if !buf.putFast(old, new) { // wbBufFlush(...) // } +// ... actual memory write ... // // The arguments to wbBufFlush depend on whether the caller is doing // its own cgo pointer checks. If it is, then this can be // wbBufFlush(nil, 0). Otherwise, it must pass the slot address and // new. // -// Since buf is a per-P resource, the caller must ensure there are no -// preemption points while buf is in use. +// The caller must ensure there are no preemption points during the +// above sequence. There must be no preemption points while buf is in +// use because it is a per-P resource. There must be no preemption +// points between the buffer put and the write to memory because this +// could allow a GC phase change, which could result in missed write +// barriers. // -// It must be nowritebarrierrec to because write barriers here would +// putFast must be nowritebarrierrec to because write barriers here would // corrupt the write barrier buffer. It (and everything it calls, if // it called anything) has to be nosplit to avoid scheduling on to a // different P and a different buffer. @@ -155,6 +163,13 @@ func wbBufFlush(dst *uintptr, src uintptr) { // Note: Every possible return from this function must reset // the buffer's next pointer to prevent buffer overflow. + // This *must not* modify its arguments because this + // function's argument slots do double duty in gcWriteBarrier + // as register spill slots. Currently, not modifying the + // arguments is sufficient to keep the spill slots unmodified + // (which seems unlikely to change since it costs little and + // helps with debugging). + if getg().m.dying > 0 { // We're going down. Not much point in write barriers // and this way we can allow write barriers in the @@ -214,11 +229,18 @@ func wbBufFlush1(_p_ *p) { // // TODO: Should scanobject/scanblock just stuff pointers into // the wbBuf? Then this would become the sole greying path. + // + // TODO: We could avoid shading any of the "new" pointers in + // the buffer if the stack has been shaded, or even avoid + // putting them in the buffer at all (which would double its + // capacity). This is slightly complicated with the buffer; we + // could track whether any un-shaded goroutine has used the + // buffer, or just track globally whether there are any + // un-shaded stacks and flush after each stack scan. gcw := &_p_.gcw pos := 0 - arenaStart := mheap_.arena_start for _, ptr := range ptrs { - if ptr < arenaStart { + if ptr < minLegalPointer { // nil pointers are very common, especially // for the "old" values. Filter out these and // other "obvious" non-heap pointers ASAP. @@ -227,11 +249,7 @@ func wbBufFlush1(_p_ *p) { // path to reduce the rate of flushes? continue } - // TODO: This doesn't use hbits, so calling - // heapBitsForObject seems a little silly. We could - // easily separate this out since heapBitsForObject - // just calls heapBitsForAddr(obj) to get hbits. - obj, _, span, objIndex := heapBitsForObject(ptr, 0, 0, false) + obj, span, objIndex := findObject(ptr, 0, 0, false) if obj == 0 { continue } diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go index 3aeb1f6..ab3d14d 100644 --- a/libgo/go/runtime/netpoll.go +++ b/libgo/go/runtime/netpoll.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd js,wasm linux nacl netbsd openbsd solaris windows package runtime @@ -366,7 +366,7 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool { // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl // do the opposite: store to closing/rd/wd, membarrier, load of rg/wg if waitio || netpollcheckerr(pd, mode) == 0 { - gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait", traceEvGoBlockNet, 5) + gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5) } // be careful to not lose concurrent READY notification old := atomic.Xchguintptr(gpp, 0) diff --git a/libgo/go/runtime/netpoll_nacl.go b/libgo/go/runtime/netpoll_fake.go index dc5a55e..aab18dc 100644 --- a/libgo/go/runtime/netpoll_nacl.go +++ b/libgo/go/runtime/netpoll_fake.go @@ -2,8 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Fake network poller for NaCl. -// Should never be used, because NaCl network connections do not honor "SetNonblock". +// Fake network poller for NaCl and wasm/js. +// Should never be used, because NaCl and wasm/js network connections do not honor "SetNonblock". + +// +build nacl js,wasm package runtime diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go index ec92370..9597633 100644 --- a/libgo/go/runtime/os_darwin.go +++ b/libgo/go/runtime/os_darwin.go @@ -7,323 +7,61 @@ package runtime import "unsafe" type mOS struct { - machport uint32 // return address for mach ipc - waitsema uint32 // semaphore for parking on locks + initialized bool + mutex pthreadmutex + cond pthreadcond + count int } -//go:noescape -//extern mach_msg_trap -func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 - -//extern mach_reply_port -func mach_reply_port() uint32 - -//extern mach_task_self -func mach_task_self() uint32 - func unimplemented(name string) { println(name, "not implemented") *(*int)(unsafe.Pointer(uintptr(1231))) = 1231 } //go:nosplit -func semawakeup(mp *m) { - mach_semrelease(mp.mos.waitsema) -} - -//go:nosplit func semacreate(mp *m) { - if mp.mos.waitsema != 0 { + if mp.initialized { return } - systemstack(func() { - mp.mos.waitsema = mach_semcreate() - }) -} - -// Mach IPC, to get at semaphores -// Definitions are in /usr/include/mach on a Mac. - -func macherror(r int32, fn string) { - print("mach error ", fn, ": ", r, "\n") - throw("mach error") -} - -const _DebugMach = false - -var zerondr machndr - -func mach_msgh_bits(a, b uint32) uint32 { - return a | b<<8 -} - -func mach_msg(h *machheader, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 { - // TODO: Loop on interrupt. - return mach_msg_trap(unsafe.Pointer(h), op, send_size, rcv_size, rcv_name, timeout, notify) -} - -// Mach RPC (MIG) -const ( - _MinMachMsg = 48 - _MachReply = 100 -) - -type codemsg struct { - h machheader - ndr machndr - code int32 -} - -func machcall(h *machheader, maxsize int32, rxsize int32) int32 { - _g_ := getg() - port := _g_.m.mos.machport - if port == 0 { - port = mach_reply_port() - _g_.m.mos.machport = port - } - - h.msgh_bits |= mach_msgh_bits(_MACH_MSG_TYPE_COPY_SEND, _MACH_MSG_TYPE_MAKE_SEND_ONCE) - h.msgh_local_port = port - h.msgh_reserved = 0 - id := h.msgh_id - - if _DebugMach { - p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h)) - print("send:\t") - var i uint32 - for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ { - print(" ", p[i]) - if i%8 == 7 { - print("\n\t") - } - } - if i%8 != 0 { - print("\n") - } - } - ret := mach_msg(h, _MACH_SEND_MSG|_MACH_RCV_MSG, h.msgh_size, uint32(maxsize), port, 0, 0) - if ret != 0 { - if _DebugMach { - print("mach_msg error ", ret, "\n") - } - return ret - } - if _DebugMach { - p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h)) - var i uint32 - for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ { - print(" ", p[i]) - if i%8 == 7 { - print("\n\t") - } - } - if i%8 != 0 { - print("\n") - } - } - if h.msgh_id != id+_MachReply { - if _DebugMach { - print("mach_msg _MachReply id mismatch ", h.msgh_id, " != ", id+_MachReply, "\n") - } - return -303 // MIG_REPLY_MISMATCH - } - // Look for a response giving the return value. - // Any call can send this back with an error, - // and some calls only have return values so they - // send it back on success too. I don't quite see how - // you know it's one of these and not the full response - // format, so just look if the message is right. - c := (*codemsg)(unsafe.Pointer(h)) - if uintptr(h.msgh_size) == unsafe.Sizeof(*c) && h.msgh_bits&_MACH_MSGH_BITS_COMPLEX == 0 { - if _DebugMach { - print("mig result ", c.code, "\n") - } - return c.code - } - if h.msgh_size != uint32(rxsize) { - if _DebugMach { - print("mach_msg _MachReply size mismatch ", h.msgh_size, " != ", rxsize, "\n") - } - return -307 // MIG_ARRAY_TOO_LARGE - } - return 0 -} - -// Semaphores! - -const ( - tmach_semcreate = 3418 - rmach_semcreate = tmach_semcreate + _MachReply - - tmach_semdestroy = 3419 - rmach_semdestroy = tmach_semdestroy + _MachReply - - _KERN_ABORTED = 14 - _KERN_OPERATION_TIMED_OUT = 49 -) - -type tmach_semcreatemsg struct { - h machheader - ndr machndr - policy int32 - value int32 -} - -type rmach_semcreatemsg struct { - h machheader - body machbody - semaphore machport -} - -type tmach_semdestroymsg struct { - h machheader - body machbody - semaphore machport -} - -func mach_semcreate() uint32 { - var m [256]uint8 - tx := (*tmach_semcreatemsg)(unsafe.Pointer(&m)) - rx := (*rmach_semcreatemsg)(unsafe.Pointer(&m)) - - tx.h.msgh_bits = 0 - tx.h.msgh_size = uint32(unsafe.Sizeof(*tx)) - tx.h.msgh_remote_port = mach_task_self() - tx.h.msgh_id = tmach_semcreate - tx.ndr = zerondr - - tx.policy = 0 // 0 = SYNC_POLICY_FIFO - tx.value = 0 - - for { - r := machcall(&tx.h, int32(unsafe.Sizeof(m)), int32(unsafe.Sizeof(*rx))) - if r == 0 { - break - } - if r == _KERN_ABORTED { // interrupted - continue - } - macherror(r, "semaphore_create") - } - if rx.body.msgh_descriptor_count != 1 { - unimplemented("mach_semcreate desc count") - } - return rx.semaphore.name -} - -func mach_semdestroy(sem uint32) { - var m [256]uint8 - tx := (*tmach_semdestroymsg)(unsafe.Pointer(&m)) - - tx.h.msgh_bits = _MACH_MSGH_BITS_COMPLEX - tx.h.msgh_size = uint32(unsafe.Sizeof(*tx)) - tx.h.msgh_remote_port = mach_task_self() - tx.h.msgh_id = tmach_semdestroy - tx.body.msgh_descriptor_count = 1 - tx.semaphore.name = sem - tx.semaphore.disposition = _MACH_MSG_TYPE_MOVE_SEND - tx.semaphore._type = 0 - - for { - r := machcall(&tx.h, int32(unsafe.Sizeof(m)), 0) - if r == 0 { - break - } - if r == _KERN_ABORTED { // interrupted - continue - } - macherror(r, "semaphore_destroy") - } -} - -//extern semaphore_wait -func mach_semaphore_wait(sema uint32) int32 - -//extern semaphore_timedwait -func mach_semaphore_timedwait(sema, sec, nsec uint32) int32 - -//extern semaphore_signal -func mach_semaphore_signal(sema uint32) int32 - -//extern semaphore_signal_all -func mach_semaphore_signal_all(sema uint32) int32 - -func semasleep1(ns int64) int32 { - _g_ := getg() - - if ns >= 0 { - var nsecs int32 - secs := timediv(ns, 1000000000, &nsecs) - r := mach_semaphore_timedwait(_g_.m.mos.waitsema, uint32(secs), uint32(nsecs)) - if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT { - return -1 - } - if r != 0 { - macherror(r, "semaphore_wait") - } - return 0 + mp.initialized = true + if err := pthread_mutex_init(&mp.mutex, nil); err != 0 { + throw("pthread_mutex_init") } - - for { - r := mach_semaphore_wait(_g_.m.mos.waitsema) - if r == 0 { - break - } - // Note: We don't know how this call (with no timeout) can get _KERN_OPERATION_TIMED_OUT, - // but it does reliably, though at a very low rate, on OS X 10.8, 10.9, 10.10, and 10.11. - // See golang.org/issue/17161. - if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT { // interrupted - continue - } - macherror(r, "semaphore_wait") + if err := pthread_cond_init(&mp.cond, nil); err != 0 { + throw("pthread_cond_init") } - return 0 } //go:nosplit func semasleep(ns int64) int32 { - var r int32 - systemstack(func() { - r = semasleep1(ns) - }) - return r -} - -//go:nosplit -func mach_semrelease(sem uint32) { + mp := getg().m + pthread_mutex_lock(&mp.mutex) for { - r := mach_semaphore_signal(sem) - if r == 0 { - break - } - if r == _KERN_ABORTED { // interrupted - continue + if mp.count > 0 { + mp.count-- + pthread_mutex_unlock(&mp.mutex) + return 0 + } + if ns >= 0 { + var t timespec + t.set_nsec(ns) + err := pthread_cond_timedwait_relative_np(&mp.cond, &mp.mutex, &t) + if err == _ETIMEDOUT { + pthread_mutex_unlock(&mp.mutex) + return -1 + } + } else { + pthread_cond_wait(&mp.cond, &mp.mutex) } - - // mach_semrelease must be completely nosplit, - // because it is called from Go code. - // If we're going to die, start that process on the system stack - // to avoid a Go stack split. - systemstack(func() { macherror(r, "semaphore_signal") }) } } -type machheader struct { - msgh_bits uint32 - msgh_size uint32 - msgh_remote_port uint32 - msgh_local_port uint32 - msgh_reserved uint32 - msgh_id int32 -} - -type machndr struct { - mig_vers uint8 - if_vers uint8 - reserved1 uint8 - mig_encoding uint8 - int_rep uint8 - char_rep uint8 - float_rep uint8 - reserved2 uint8 +//go:nosplit +func semawakeup(mp *m) { + pthread_mutex_lock(&mp.mutex) + mp.count++ + if mp.count > 0 { + pthread_cond_signal(&mp.cond) + } + pthread_mutex_unlock(&mp.mutex) } diff --git a/libgo/go/runtime/os_dragonfly.go b/libgo/go/runtime/os_dragonfly.go index 6452984..abcad72 100644 --- a/libgo/go/runtime/os_dragonfly.go +++ b/libgo/go/runtime/os_dragonfly.go @@ -4,11 +4,12 @@ package runtime -import "unsafe" +import ( + "runtime/internal/sys" + "unsafe" +) -type mOS struct { - unused byte -} +type mOS struct{} //go:noescape //extern umtx_sleep diff --git a/libgo/go/runtime/os_freebsd.go b/libgo/go/runtime/os_freebsd.go index 8c3535b..34939c5 100644 --- a/libgo/go/runtime/os_freebsd.go +++ b/libgo/go/runtime/os_freebsd.go @@ -8,9 +8,7 @@ import ( "unsafe" ) -type mOS struct { - unused byte -} +type mOS struct{} //go:noescape //extern _umtx_op diff --git a/libgo/go/runtime/os_js.go b/libgo/go/runtime/os_js.go new file mode 100644 index 0000000..ad6db18 --- /dev/null +++ b/libgo/go/runtime/os_js.go @@ -0,0 +1,145 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build js,wasm + +package runtime + +import ( + "unsafe" +) + +func exit(code int32) + +func write(fd uintptr, p unsafe.Pointer, n int32) int32 { + if fd > 2 { + throw("runtime.write to fd > 2 is unsupported") + } + wasmWrite(fd, p, n) + return n +} + +// Stubs so tests can link correctly. These should never be called. +func open(name *byte, mode, perm int32) int32 { panic("not implemented") } +func closefd(fd int32) int32 { panic("not implemented") } +func read(fd int32, p unsafe.Pointer, n int32) int32 { panic("not implemented") } + +//go:noescape +func wasmWrite(fd uintptr, p unsafe.Pointer, n int32) + +func usleep(usec uint32) + +func exitThread(wait *uint32) + +type mOS struct{} + +func osyield() + +const _SIGSEGV = 0xb + +func sigpanic() { + g := getg() + if !canpanic(g) { + throw("unexpected signal during runtime execution") + } + + // js only invokes the exception handler for memory faults. + g.sig = _SIGSEGV + panicmem() +} + +type sigset struct{} + +// Called to initialize a new m (including the bootstrap m). +// Called on the parent thread (main thread in case of bootstrap), can allocate memory. +func mpreinit(mp *m) { + mp.gsignal = malg(32 * 1024) + mp.gsignal.m = mp +} + +//go:nosplit +func msigsave(mp *m) { +} + +//go:nosplit +func msigrestore(sigmask sigset) { +} + +//go:nosplit +//go:nowritebarrierrec +func clearSignalHandlers() { +} + +//go:nosplit +func sigblock() { +} + +// Called to initialize a new m (including the bootstrap m). +// Called on the new thread, cannot allocate memory. +func minit() { +} + +// Called from dropm to undo the effect of an minit. +func unminit() { +} + +func osinit() { + ncpu = 1 + getg().m.procid = 2 + physPageSize = 64 * 1024 +} + +// wasm has no signals +const _NSIG = 0 + +func signame(sig uint32) string { + return "" +} + +func crash() { + *(*int32)(nil) = 0 +} + +func getRandomData(r []byte) + +func goenvs() { + goenvs_unix() +} + +func initsig(preinit bool) { +} + +// May run with m.p==nil, so write barriers are not allowed. +//go:nowritebarrier +func newosproc(mp *m) { + panic("newosproc: not implemented") +} + +func setProcessCPUProfiler(hz int32) {} +func setThreadCPUProfiler(hz int32) {} +func sigdisable(uint32) {} +func sigenable(uint32) {} +func sigignore(uint32) {} + +//go:linkname os_sigpipe os.sigpipe +func os_sigpipe() { + throw("too many writes on closed pipe") +} + +//go:nosplit +func cputicks() int64 { + // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand(). + // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. + // TODO: need more entropy to better seed fastrand. + return nanotime() +} + +//go:linkname syscall_now syscall.now +func syscall_now() (sec int64, nsec int32) { + sec, nsec, _ = time_now() + return +} + +// gsignalStack is unused on js. +type gsignalStack struct{} diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go index 816327e..04314bd 100644 --- a/libgo/go/runtime/os_linux.go +++ b/libgo/go/runtime/os_linux.go @@ -27,8 +27,9 @@ func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, // Futexsleep is allowed to wake up spuriously. const ( - _FUTEX_WAIT = 0 - _FUTEX_WAKE = 1 + _FUTEX_PRIVATE_FLAG = 128 + _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG + _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG ) // Atomically, @@ -45,7 +46,7 @@ func futexsleep(addr *uint32, val uint32, ns int64) { // here, and so can we: as it says a few lines up, // spurious wakeups are allowed. if ns < 0 { - futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0) + futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0) return } @@ -62,13 +63,13 @@ func futexsleep(addr *uint32, val uint32, ns int64) { ts.tv_nsec = 0 ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec))))) } - futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0) + futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0) } // If any procs are sleeping on addr, wake up at most cnt. //go:nosplit func futexwakeup(addr *uint32, cnt uint32) { - ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0) + ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0) if ret >= 0 { return } @@ -93,6 +94,11 @@ const ( var procAuxv = []byte("/proc/self/auxv\x00") +var addrspace_vec [1]byte + +//extern mincore +func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 + func sysargs(argc int32, argv **byte) { n := argc + 1 @@ -158,12 +164,17 @@ func sysauxv(auxv []uintptr) int { // worth of random data. startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:] + setRandomNumber(uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 | + uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24) + case _AT_PAGESZ: physPageSize = val } + archauxv(tag, val) + // Commented out for gccgo for now. - // archauxv(tag, val) + // vdsoauxv(tag, val) } return i / 2 } diff --git a/libgo/go/runtime/os_linux_arm.go b/libgo/go/runtime/os_linux_arm.go new file mode 100644 index 0000000..42c2839 --- /dev/null +++ b/libgo/go/runtime/os_linux_arm.go @@ -0,0 +1,60 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +const ( + _AT_PLATFORM = 15 // introduced in at least 2.6.11 + + _HWCAP_VFP = 1 << 6 // introduced in at least 2.6.11 + _HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30 + _HWCAP_IDIVA = 1 << 17 +) + +var randomNumber uint32 +var armArch uint8 = 6 // we default to ARMv6 +var hwcap uint32 // set by archauxv +var hardDiv bool // set if a hardware divider is available + +func checkgoarm() { + // On Android, /proc/self/auxv might be unreadable and hwcap won't + // reflect the CPU capabilities. Assume that every Android arm device + // has the necessary floating point hardware available. + if GOOS == "android" { + return + } + if goarm > 5 && hwcap&_HWCAP_VFP == 0 { + print("runtime: this CPU has no floating point hardware, so it cannot run\n") + print("this GOARM=", goarm, " binary. Recompile using GOARM=5.\n") + exit(1) + } + if goarm > 6 && hwcap&_HWCAP_VFPv3 == 0 { + print("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n") + print("this GOARM=", goarm, " binary. Recompile using GOARM=5 or GOARM=6.\n") + exit(1) + } +} + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_RANDOM: + // sysargs filled in startupRandomData, but that + // pointer may not be word aligned, so we must treat + // it as a byte array. + randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 | + uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24 + + case _AT_PLATFORM: // v5l, v6l, v7l + t := *(*uint8)(unsafe.Pointer(val + 1)) + if '5' <= t && t <= '7' { + armArch = t - '0' + } + + case _AT_HWCAP: // CPU capability bit flags + hwcap = uint32(val) + hardDiv = (hwcap & _HWCAP_IDIVA) != 0 + } +} diff --git a/libgo/go/runtime/os_linux_arm64.go b/libgo/go/runtime/os_linux_arm64.go new file mode 100644 index 0000000..013e7ae --- /dev/null +++ b/libgo/go/runtime/os_linux_arm64.go @@ -0,0 +1,29 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build arm64 + +package runtime + +import "internal/cpu" + +var randomNumber uint32 + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_RANDOM: + // sysargs filled in startupRandomData, but that + // pointer may not be word aligned, so we must treat + // it as a byte array. + randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 | + uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24 + + case _AT_HWCAP: + // arm64 doesn't have a 'cpuid' instruction equivalent and relies on + // HWCAP/HWCAP2 bits for hardware capabilities. + cpu.HWCap = uint(val) + case _AT_HWCAP2: + cpu.HWCap2 = uint(val) + } +} diff --git a/libgo/go/runtime/os_linux_mips64x.go b/libgo/go/runtime/os_linux_mips64x.go new file mode 100644 index 0000000..b7f737f --- /dev/null +++ b/libgo/go/runtime/os_linux_mips64x.go @@ -0,0 +1,21 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build mips64 mips64le + +package runtime + +var randomNumber uint32 + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_RANDOM: + // sysargs filled in startupRandomData, but that + // pointer may not be word aligned, so we must treat + // it as a byte array. + randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 | + uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24 + } +} diff --git a/libgo/go/runtime/os_linux_mipsx.go b/libgo/go/runtime/os_linux_mipsx.go new file mode 100644 index 0000000..a2696de --- /dev/null +++ b/libgo/go/runtime/os_linux_mipsx.go @@ -0,0 +1,21 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build mips mipsle + +package runtime + +var randomNumber uint32 + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_RANDOM: + // sysargs filled in startupRandomData, but that + // pointer may not be word aligned, so we must treat + // it as a byte array. + randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 | + uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24 + } +} diff --git a/libgo/go/runtime/os_linux_noauxv.go b/libgo/go/runtime/os_linux_noauxv.go new file mode 100644 index 0000000..895b4cd --- /dev/null +++ b/libgo/go/runtime/os_linux_noauxv.go @@ -0,0 +1,11 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux +// +build !arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le + +package runtime + +func archauxv(tag, val uintptr) { +} diff --git a/libgo/go/runtime/os_linux_ppc64x.go b/libgo/go/runtime/os_linux_ppc64x.go index d27902d..cc79cc4 100644 --- a/libgo/go/runtime/os_linux_ppc64x.go +++ b/libgo/go/runtime/os_linux_ppc64x.go @@ -2,27 +2,21 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore_for_gccgo +// +build linux // +build ppc64 ppc64le package runtime -// For go:linkname -import _ "unsafe" - -// ppc64x doesn't have a 'cpuid' instruction equivalent and relies on -// HWCAP/HWCAP2 bits for hardware capabilities. - -//go:linkname cpu_hwcap internal/cpu.ppc64x_hwcap -//go:linkname cpu_hwcap2 internal/cpu.ppc64x_hwcap2 -var cpu_hwcap uint -var cpu_hwcap2 uint +import "internal/cpu" func archauxv(tag, val uintptr) { switch tag { case _AT_HWCAP: - cpu_hwcap = uint(val) + // ppc64x doesn't have a 'cpuid' instruction + // equivalent and relies on HWCAP/HWCAP2 bits for + // hardware capabilities. + cpu.HWCap = uint(val) case _AT_HWCAP2: - cpu_hwcap2 = uint(val) + cpu.HWCap2 = uint(val) } } diff --git a/libgo/go/runtime/os_linux_s390x.go b/libgo/go/runtime/os_linux_s390x.go new file mode 100644 index 0000000..55d35c7 --- /dev/null +++ b/libgo/go/runtime/os_linux_s390x.go @@ -0,0 +1,19 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "internal/cpu" + +const ( + // bit masks taken from bits/hwcap.h + _HWCAP_S390_VX = 2048 // vector facility +) + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_HWCAP: // CPU capability bit flags + cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0 + } +} diff --git a/libgo/go/runtime/os_netbsd.go b/libgo/go/runtime/os_netbsd.go index 81ebe76..ea47e5c 100644 --- a/libgo/go/runtime/os_netbsd.go +++ b/libgo/go/runtime/os_netbsd.go @@ -6,6 +6,7 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/sys" "unsafe" ) diff --git a/libgo/go/runtime/os_openbsd.go b/libgo/go/runtime/os_openbsd.go index b64d3af..4f05665 100644 --- a/libgo/go/runtime/os_openbsd.go +++ b/libgo/go/runtime/os_openbsd.go @@ -6,6 +6,7 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/sys" "unsafe" ) diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go index 6b490b7..752bf71 100644 --- a/libgo/go/runtime/panic.go +++ b/libgo/go/runtime/panic.go @@ -39,7 +39,24 @@ func panicCheckMalloc(err error) { var indexError = error(errorString("index out of range")) +// The panicindex, panicslice, and panicdivide functions are called by +// code generated by the compiler for out of bounds index expressions, +// out of bounds slice expressions, and division by zero. The +// panicdivide (again), panicoverflow, panicfloat, and panicmem +// functions are called by the signal handler when a signal occurs +// indicating the respective problem. +// +// Since panicindex and panicslice are never called directly, and +// since the runtime package should never have an out of bounds slice +// or array reference, if we see those functions called from the +// runtime package we turn the panic into a throw. That will dump the +// entire runtime stack for easier debugging. + func panicindex() { + name, _, _ := funcfileline(getcallerpc(), -1) + if hasprefix(name, "runtime.") { + throw(string(indexError.(errorString))) + } panicCheckMalloc(indexError) panic(indexError) } @@ -47,6 +64,10 @@ func panicindex() { var sliceError = error(errorString("slice bounds out of range")) func panicslice() { + name, _, _ := funcfileline(getcallerpc(), -1) + if hasprefix(name, "runtime.") { + throw(string(sliceError.(errorString))) + } panicCheckMalloc(sliceError) panic(sliceError) } @@ -144,6 +165,12 @@ func newdefer() *_defer { // //go:nosplit func freedefer(d *_defer) { + if d._panic != nil { + freedeferpanic() + } + if d.pfn != 0 { + freedeferfn() + } pp := getg().m.p.ptr() if len(pp.deferpool) == cap(pp.deferpool) { // Transfer half of local cache to the central cache. @@ -176,15 +203,28 @@ func freedefer(d *_defer) { d.link = nil d.frame = nil d.panicStack = nil - d._panic = nil - d.pfn = 0 d.arg = nil d.retaddr = 0 d.makefunccanrecover = false + // d._panic and d.pfn must be nil already. + // If not, we would have called freedeferpanic or freedeferfn above, + // both of which throw. pp.deferpool = append(pp.deferpool, d) } +// Separate function so that it can split stack. +// Windows otherwise runs out of stack space. +func freedeferpanic() { + // _panic must be cleared before d is unlinked from gp. + throw("freedefer with d._panic != nil") +} + +func freedeferfn() { + // fn must be cleared before d is unlinked from gp. + throw("freedefer with d.fn != nil") +} + // deferreturn is called to undefer the stack. // The compiler inserts a call to this function as a finally clause // wrapped around the body of any function that calls defer. @@ -544,15 +584,9 @@ func gopanic(e interface{}) { // the world, we call preprintpanics to invoke all necessary Error // and String methods to prepare the panic strings before startpanic. preprintpanics(gp._panic) - startpanic() - - // startpanic set panicking, which will block main from exiting, - // so now OK to decrement runningPanicDefers. - atomic.Xadd(&runningPanicDefers, -1) - printpanics(gp._panic) - dopanic(0) // should not return - *(*int)(nil) = 0 // not reached + fatalpanic(gp._panic) // should not return + *(*int)(nil) = 0 // not reached } // currentDefer returns the top of the defer stack if it can be recovered. @@ -810,13 +844,16 @@ func sync_throw(s string) { //go:nosplit func throw(s string) { - print("fatal error: ", s, "\n") + // Everything throw does should be recursively nosplit so it + // can be called even when it's unsafe to grow the stack. + systemstack(func() { + print("fatal error: ", s, "\n") + }) gp := getg() if gp.m.throwing == 0 { gp.m.throwing = 1 } - startpanic() - dopanic(0) + fatalthrow() *(*int)(nil) = 0 // not reached } @@ -833,13 +870,76 @@ var panicking uint32 // so that two concurrent panics don't overlap their output. var paniclk mutex +// fatalthrow implements an unrecoverable runtime throw. It freezes the +// system, prints stack traces starting from its caller, and terminates the +// process. +// +//go:nosplit +func fatalthrow() { + pc := getcallerpc() + sp := getcallersp() + gp := getg() + + startpanic_m() + + if dopanic_m(gp, pc, sp) { + crash() + } + + exit(2) + + *(*int)(nil) = 0 // not reached +} + +// fatalpanic implements an unrecoverable panic. It is like fatalthrow, except +// that if msgs != nil, fatalpanic also prints panic messages and decrements +// runningPanicDefers once main is blocked from exiting. +// +//go:nosplit +func fatalpanic(msgs *_panic) { + pc := getcallerpc() + sp := getcallersp() + gp := getg() + var docrash bool + + if startpanic_m() && msgs != nil { + // There were panic messages and startpanic_m + // says it's okay to try to print them. + + // startpanic_m set panicking, which will + // block main from exiting, so now OK to + // decrement runningPanicDefers. + atomic.Xadd(&runningPanicDefers, -1) + + printpanics(msgs) + } + + docrash = dopanic_m(gp, pc, sp) + + if docrash { + // By crashing outside the above systemstack call, debuggers + // will not be confused when generating a backtrace. + // Function crash is marked nosplit to avoid stack growth. + crash() + } + + systemstack(func() { + exit(2) + }) + + *(*int)(nil) = 0 // not reached +} + // startpanic_m prepares for an unrecoverable panic. // +// It returns true if panic messages should be printed, or false if +// the runtime is in bad shape and should just print stacks. +// // It can have write barriers because the write barrier explicitly // ignores writes once dying > 0. // //go:yeswritebarrierrec -func startpanic() { +func startpanic_m() bool { _g_ := getg() if mheap_.cachealloc.size == 0 { // very early print("runtime: panic before malloc heap initialized\n") @@ -850,6 +950,12 @@ func startpanic() { // happen (even if we're not in one of these situations). _g_.m.mallocing++ + // If we're dying because of a bad lock count, set it to a + // good lock count so we don't recursively panic below. + if _g_.m.locks < 0 { + _g_.m.locks = 1 + } + switch _g_.m.dying { case 0: _g_.m.dying = 1 @@ -860,15 +966,13 @@ func startpanic() { schedtrace(true) } freezetheworld() - return + return true case 1: - // Something failed while panicking, probably the print of the - // argument to panic(). Just print a stack trace and exit. + // Something failed while panicking. + // Just print a stack trace and exit. _g_.m.dying = 2 print("panic during panic\n") - dopanic(0) - exit(3) - fallthrough + return false case 2: // This is a genuine bug in the runtime, we couldn't even // print the stack trace successfully. @@ -879,14 +983,14 @@ func startpanic() { default: // Can't even print! Just exit. exit(5) + return false // Need to return something. } } var didothers bool var deadlock mutex -func dopanic(unused int) { - gp := getg() +func dopanic_m(gp *g, pc, sp uintptr) bool { if gp.sig != 0 { signame := signame(gp.sig) if signame != "" { @@ -927,11 +1031,7 @@ func dopanic(unused int) { lock(&deadlock) } - if docrash { - crash() - } - - exit(2) + return docrash } // canpanic returns false if a signal should throw instead of @@ -951,7 +1051,7 @@ func canpanic(gp *g) bool { if gp == nil || gp != _m_.curg { return false } - if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 { + if _m_.locks != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 { return false } status := readgstatus(gp) @@ -960,3 +1060,14 @@ func canpanic(gp *g) bool { } return true } + +// isAbortPC returns true if pc is the program counter at which +// runtime.abort raises a signal. +// +// It is nosplit because it's part of the isgoexception +// implementation. +// +//go:nosplit +func isAbortPC(pc uintptr) bool { + return false +} diff --git a/libgo/go/runtime/pprof/internal/profile/encode.go b/libgo/go/runtime/pprof/internal/profile/encode.go index 6b879a8..af31933 100644 --- a/libgo/go/runtime/pprof/internal/profile/encode.go +++ b/libgo/go/runtime/pprof/internal/profile/encode.go @@ -197,6 +197,10 @@ var profileDecoder = []decoder{ }, // repeated int64 period = 12 func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).Period) }, + // repeated int64 comment = 13 + func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) }, + // int64 defaultSampleType = 14 + func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) }, } // postDecode takes the unexported fields populated by decode (with @@ -278,6 +282,14 @@ func (p *Profile) postDecode() error { pt.Type, err = getString(p.stringTable, &pt.typeX, err) pt.Unit, err = getString(p.stringTable, &pt.unitX, err) } + for _, i := range p.commentX { + var c string + c, err = getString(p.stringTable, &i, err) + p.Comments = append(p.Comments, c) + } + + p.commentX = nil + p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err) p.stringTable = nil return nil } diff --git a/libgo/go/runtime/pprof/internal/profile/profile.go b/libgo/go/runtime/pprof/internal/profile/profile.go index 9b6a6f9..64c3e3f 100644 --- a/libgo/go/runtime/pprof/internal/profile/profile.go +++ b/libgo/go/runtime/pprof/internal/profile/profile.go @@ -22,11 +22,13 @@ import ( // Profile is an in-memory representation of profile.proto. type Profile struct { - SampleType []*ValueType - Sample []*Sample - Mapping []*Mapping - Location []*Location - Function []*Function + SampleType []*ValueType + DefaultSampleType string + Sample []*Sample + Mapping []*Mapping + Location []*Location + Function []*Function + Comments []string DropFrames string KeepFrames string @@ -36,9 +38,11 @@ type Profile struct { PeriodType *ValueType Period int64 - dropFramesX int64 - keepFramesX int64 - stringTable []string + commentX []int64 + dropFramesX int64 + keepFramesX int64 + stringTable []string + defaultSampleTypeX int64 } // ValueType corresponds to Profile.ValueType diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go index be4e869..5128c22 100644 --- a/libgo/go/runtime/pprof/pprof.go +++ b/libgo/go/runtime/pprof/pprof.go @@ -68,7 +68,7 @@ // all pprof commands. // // For more information about pprof, see -// https://github.com/google/pprof/blob/master/doc/pprof.md. +// https://github.com/google/pprof/blob/master/doc/README.md. package pprof import ( @@ -99,7 +99,8 @@ import ( // Each Profile has a unique name. A few profiles are predefined: // // goroutine - stack traces of all current goroutines -// heap - a sampling of all heap allocations +// heap - a sampling of memory allocations of live objects +// allocs - a sampling of all past memory allocations // threadcreate - stack traces that led to the creation of new OS threads // block - stack traces that led to blocking on synchronization primitives // mutex - stack traces of holders of contended mutexes @@ -114,6 +115,16 @@ import ( // all known allocations. This exception helps mainly in programs running // without garbage collection enabled, usually for debugging purposes. // +// The heap profile tracks both the allocation sites for all live objects in +// the application memory and for all objects allocated since the program start. +// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects +// flags select which to display, defaulting to -inuse_space (live objects, +// scaled by size). +// +// The allocs profile is the same as the heap profile but changes the default +// pprof display to -alloc_space, the total number of bytes allocated since +// the program began (including garbage-collected bytes). +// // The CPU profile is not available as a Profile. It has a special API, // the StartCPUProfile and StopCPUProfile functions, because it streams // output to a writer during profiling. @@ -150,6 +161,12 @@ var heapProfile = &Profile{ write: writeHeap, } +var allocsProfile = &Profile{ + name: "allocs", + count: countHeap, // identical to heap profile + write: writeAlloc, +} + var blockProfile = &Profile{ name: "block", count: countBlock, @@ -170,6 +187,7 @@ func lockProfiles() { "goroutine": goroutineProfile, "threadcreate": threadcreateProfile, "heap": heapProfile, + "allocs": allocsProfile, "block": blockProfile, "mutex": mutexProfile, } @@ -525,6 +543,16 @@ func countHeap() int { // writeHeap writes the current runtime heap profile to w. func writeHeap(w io.Writer, debug int) error { + return writeHeapInternal(w, debug, "") +} + +// writeAlloc writes the current runtime heap profile to w +// with the total allocation space as the default sample type. +func writeAlloc(w io.Writer, debug int) error { + return writeHeapInternal(w, debug, "alloc_space") +} + +func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error { var memStats *runtime.MemStats if debug != 0 { // Read mem stats first, so that our other allocations @@ -555,7 +583,7 @@ func writeHeap(w io.Writer, debug int) error { } if debug == 0 { - return writeHeapProto(w, p, int64(runtime.MemProfileRate)) + return writeHeapProto(w, p, int64(runtime.MemProfileRate), defaultSampleType) } sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() }) diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go index 02d99f5..74a7777 100644 --- a/libgo/go/runtime/pprof/pprof_test.go +++ b/libgo/go/runtime/pprof/pprof_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !nacl +// +build !nacl,!js package pprof @@ -732,7 +732,7 @@ func TestMutexProfile(t *testing.T) { return } // checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931" - r2 := `^\d+ 1 @(?: 0x[[:xdigit:]]+)+` + r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+` //r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$" if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok { t.Errorf("%q didn't match %q", lines[3], r2) @@ -862,16 +862,22 @@ func containsCounts(prof *profile.Profile, counts []int64) bool { return true } +var emptyCallStackTestRun int64 + // Issue 18836. func TestEmptyCallStack(t *testing.T) { + name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun) + emptyCallStackTestRun++ + t.Parallel() var buf bytes.Buffer - p := NewProfile("test18836") + p := NewProfile(name) + p.Add("foo", 47674) p.WriteTo(&buf, 1) p.Remove("foo") got := buf.String() - prefix := "test18836 profile: total 1\n" + prefix := name + " profile: total 1\n" if !strings.HasPrefix(got, prefix) { t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix) } diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go index 793be44..d8456be 100644 --- a/libgo/go/runtime/pprof/proto.go +++ b/libgo/go/runtime/pprof/proto.go @@ -11,7 +11,6 @@ import ( "io" "io/ioutil" "runtime" - "sort" "strconv" "time" "unsafe" @@ -53,24 +52,43 @@ type profileBuilder struct { } type memMap struct { - start uintptr - end uintptr + // initialized as reading mapping + start uintptr + end uintptr + offset uint64 + file, buildID string + + funcs symbolizeFlag + fake bool // map entry was faked; /proc/self/maps wasn't available } +// symbolizeFlag keeps track of symbolization result. +// 0 : no symbol lookup was performed +// 1<<0 (lookupTried) : symbol lookup was performed +// 1<<1 (lookupFailed): symbol lookup was performed but failed +type symbolizeFlag uint8 + +const ( + lookupTried symbolizeFlag = 1 << iota + lookupFailed symbolizeFlag = 1 << iota +) + const ( // message Profile - tagProfile_SampleType = 1 // repeated ValueType - tagProfile_Sample = 2 // repeated Sample - tagProfile_Mapping = 3 // repeated Mapping - tagProfile_Location = 4 // repeated Location - tagProfile_Function = 5 // repeated Function - tagProfile_StringTable = 6 // repeated string - tagProfile_DropFrames = 7 // int64 (string table index) - tagProfile_KeepFrames = 8 // int64 (string table index) - tagProfile_TimeNanos = 9 // int64 - tagProfile_DurationNanos = 10 // int64 - tagProfile_PeriodType = 11 // ValueType (really optional string???) - tagProfile_Period = 12 // int64 + tagProfile_SampleType = 1 // repeated ValueType + tagProfile_Sample = 2 // repeated Sample + tagProfile_Mapping = 3 // repeated Mapping + tagProfile_Location = 4 // repeated Location + tagProfile_Function = 5 // repeated Function + tagProfile_StringTable = 6 // repeated string + tagProfile_DropFrames = 7 // int64 (string table index) + tagProfile_KeepFrames = 8 // int64 (string table index) + tagProfile_TimeNanos = 9 // int64 + tagProfile_DurationNanos = 10 // int64 + tagProfile_PeriodType = 11 // ValueType (really optional string???) + tagProfile_Period = 12 // int64 + tagProfile_Comment = 13 // repeated int64 + tagProfile_DefaultSampleType = 14 // int64 // message ValueType tagValueType_Type = 1 // int64 (string table index) @@ -174,7 +192,7 @@ func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) { } // pbMapping encodes a Mapping message to b.pb. -func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string) { +func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) { start := b.pb.startMessage() b.pb.uint64Opt(tagMapping_ID, id) b.pb.uint64Opt(tagMapping_Start, base) @@ -182,8 +200,15 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file b.pb.uint64Opt(tagMapping_Offset, offset) b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file)) b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID)) - // TODO: Set any of HasInlineFrames, HasFunctions, HasFilenames, HasLineNumbers? - // It seems like they should all be true, but they've never been set. + // TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs). + // Decide what to do about HasInlineFrames and HasLineNumbers. + // Also, another approach to handle the mapping entry with + // incomplete symbolization results is to dupliace the mapping + // entry (but with different Has* fields values) and use + // different entries for symbolized locations and unsymbolized locations. + if hasFuncs { + b.pb.bool(tagMapping_HasFunctions, true) + } b.pb.endMessage(tag, start) } @@ -208,6 +233,11 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 { return 0 } + symbolizeResult := lookupTried + if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 { + symbolizeResult |= lookupFailed + } + if frame.PC == 0 { // If we failed to resolve the frame, at least make up // a reasonable call PC. This mostly happens in tests. @@ -242,12 +272,14 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 { } frame, more = frames.Next() } - if len(b.mem) > 0 { - i := sort.Search(len(b.mem), func(i int) bool { - return b.mem[i].end > addr - }) - if i < len(b.mem) && b.mem[i].start <= addr && addr < b.mem[i].end { + for i := range b.mem { + if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake { b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1)) + + m := b.mem[i] + m.funcs |= symbolizeResult + b.mem[i] = m + break } } b.pb.endMessage(tagProfile_Location, start) @@ -348,7 +380,7 @@ func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error } // build completes and returns the constructed profile. -func (b *profileBuilder) build() error { +func (b *profileBuilder) build() { b.end = time.Now() b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano()) @@ -395,13 +427,17 @@ func (b *profileBuilder) build() error { b.pbSample(values, locs, labels) } + for i, m := range b.mem { + hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed + b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions) + } + // TODO: Anything for tagProfile_DropFrames? // TODO: Anything for tagProfile_KeepFrames? b.pb.strings(tagProfile_StringTable, b.strings) b.zw.Write(b.pb.data) b.zw.Close() - return nil } // readMapping reads /proc/self/maps and writes mappings to b.pb. @@ -410,6 +446,12 @@ func (b *profileBuilder) build() error { func (b *profileBuilder) readMapping() { data, _ := ioutil.ReadFile("/proc/self/maps") parseProcSelfMaps(data, b.addMapping) + if len(b.mem) == 0 { // pprof expects a map entry, so fake one. + b.addMappingEntry(0, 0, 0, "", "", true) + // TODO(hyangah): make addMapping return *memMap or + // take a memMap struct, and get rid of addMappingEntry + // that takes a bunch of positional arguments. + } } func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) { @@ -510,6 +552,16 @@ func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, } func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) { - b.mem = append(b.mem, memMap{uintptr(lo), uintptr(hi)}) - b.pbMapping(tagProfile_Mapping, uint64(len(b.mem)), lo, hi, offset, file, buildID) + b.addMappingEntry(lo, hi, offset, file, buildID, false) +} + +func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) { + b.mem = append(b.mem, memMap{ + start: uintptr(lo), + end: uintptr(hi), + offset: offset, + file: file, + buildID: buildID, + fake: fake, + }) } diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go index a268c3a..604628c 100644 --- a/libgo/go/runtime/pprof/proto_test.go +++ b/libgo/go/runtime/pprof/proto_test.go @@ -8,7 +8,10 @@ import ( "bytes" "encoding/json" "fmt" + "internal/testenv" "io/ioutil" + "os" + "os/exec" "reflect" "runtime" "runtime/pprof/internal/profile" @@ -63,7 +66,7 @@ func TestConvertCPUProfileEmpty(t *testing.T) { {Type: "cpu", Unit: "nanoseconds"}, } - checkProfile(t, p, 2000*1000, periodType, sampleType, nil) + checkProfile(t, p, 2000*1000, periodType, sampleType, nil, "") } // For gccgo make these functions different so that gccgo doesn't @@ -96,9 +99,16 @@ func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) { addr2 = mprof.Mapping[1].Start map2 = mprof.Mapping[1] map2.BuildID, _ = elfBuildID(map2.File) + case "js": + addr1 = uint64(funcPC(f1)) + addr2 = uint64(funcPC(f2)) default: addr1 = uint64(funcPC(f1)) addr2 = uint64(funcPC(f2)) + // Fake mapping - HasFunctions will be true because two PCs from Go + // will be fully symbolized. + fake := &profile.Mapping{ID: 1, HasFunctions: true} + map1, map2 = fake, fake } return } @@ -132,18 +142,23 @@ func TestConvertCPUProfile(t *testing.T) { {ID: 4, Mapping: map2, Address: addr2 + 1}, }}, } - checkProfile(t, p, period, periodType, sampleType, samples) + checkProfile(t, p, period, periodType, sampleType, samples, "") } -func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample) { +func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample, defaultSampleType string) { + t.Helper() + if p.Period != period { - t.Fatalf("p.Period = %d, want %d", p.Period, period) + t.Errorf("p.Period = %d, want %d", p.Period, period) } if !reflect.DeepEqual(p.PeriodType, periodType) { - t.Fatalf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType)) + t.Errorf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType)) } if !reflect.DeepEqual(p.SampleType, sampleType) { - t.Fatalf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType)) + t.Errorf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType)) + } + if defaultSampleType != p.DefaultSampleType { + t.Errorf("p.DefaultSampleType = %v\nwant = %v", p.DefaultSampleType, defaultSampleType) } // Clear line info since it is not in the expected samples. // If we used f1 and f2 above, then the samples will have line info. @@ -222,3 +237,114 @@ func TestProcSelfMaps(t *testing.T) { } } } + +// TestMapping checkes the mapping section of CPU profiles +// has the HasFunctions field set correctly. If all PCs included +// in the samples are successfully symbolized, the corresponding +// mapping entry (in this test case, only one entry) should have +// its HasFunctions field set true. +// The test generates a CPU profile that includes PCs from C side +// that the runtime can't symbolize. See ./testdata/mappingtest. +func TestMapping(t *testing.T) { + testenv.MustHaveGoRun(t) + testenv.MustHaveCGO(t) + + prog := "./testdata/mappingtest/main.go" + + // GoOnly includes only Go symbols that runtime will symbolize. + // Go+C includes C symbols that runtime will not symbolize. + for _, traceback := range []string{"GoOnly", "Go+C"} { + t.Run("traceback"+traceback, func(t *testing.T) { + cmd := exec.Command(testenv.GoToolPath(t), "run", prog) + if traceback != "GoOnly" { + cmd.Env = append(os.Environ(), "SETCGOTRACEBACK=1") + } + cmd.Stderr = new(bytes.Buffer) + + out, err := cmd.Output() + if err != nil { + t.Fatalf("failed to run the test program %q: %v\n%v", prog, err, cmd.Stderr) + } + + prof, err := profile.Parse(bytes.NewReader(out)) + if err != nil { + t.Fatalf("failed to parse the generated profile data: %v", err) + } + t.Logf("Profile: %s", prof) + + hit := make(map[*profile.Mapping]bool) + miss := make(map[*profile.Mapping]bool) + for _, loc := range prof.Location { + if symbolized(loc) { + hit[loc.Mapping] = true + } else { + miss[loc.Mapping] = true + } + } + if len(miss) == 0 { + t.Log("no location with missing symbol info was sampled") + } + + for _, m := range prof.Mapping { + if miss[m] && m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m) + continue + } + if !miss[m] && hit[m] && !m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m) + continue + } + } + }) + } +} + +func symbolized(loc *profile.Location) bool { + if len(loc.Line) == 0 { + return false + } + l := loc.Line[0] + f := l.Function + if l.Line == 0 || f == nil || f.Name == "" || f.Filename == "" { + return false + } + return true +} + +// TestFakeMapping tests if at least one mapping exists +// (including a fake mapping), and their HasFunctions bits +// are set correctly. +func TestFakeMapping(t *testing.T) { + var buf bytes.Buffer + if err := Lookup("heap").WriteTo(&buf, 0); err != nil { + t.Fatalf("failed to write heap profile: %v", err) + } + prof, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("failed to parse the generated profile data: %v", err) + } + t.Logf("Profile: %s", prof) + if len(prof.Mapping) == 0 { + t.Fatal("want profile with at least one mapping entry, got 0 mapping") + } + + hit := make(map[*profile.Mapping]bool) + miss := make(map[*profile.Mapping]bool) + for _, loc := range prof.Location { + if symbolized(loc) { + hit[loc.Mapping] = true + } else { + miss[loc.Mapping] = true + } + } + for _, m := range prof.Mapping { + if miss[m] && m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=true, but contains locations with failed symbolization", m) + continue + } + if !miss[m] && hit[m] && !m.HasFunctions { + t.Errorf("mapping %+v has HasFunctions=false, but all referenced locations from this lapping were symbolized successfully", m) + continue + } + } +} diff --git a/libgo/go/runtime/pprof/protomem.go b/libgo/go/runtime/pprof/protomem.go index 2756cfd..82565d5 100644 --- a/libgo/go/runtime/pprof/protomem.go +++ b/libgo/go/runtime/pprof/protomem.go @@ -12,7 +12,7 @@ import ( ) // writeHeapProto writes the current heap profile in protobuf format to w. -func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error { +func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defaultSampleType string) error { b := newProfileBuilder(w) b.pbValueType(tagProfile_PeriodType, "space", "bytes") b.pb.int64Opt(tagProfile_Period, rate) @@ -20,6 +20,9 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error b.pbValueType(tagProfile_SampleType, "alloc_space", "bytes") b.pbValueType(tagProfile_SampleType, "inuse_objects", "count") b.pbValueType(tagProfile_SampleType, "inuse_space", "bytes") + if defaultSampleType != "" { + b.pb.int64Opt(tagProfile_DefaultSampleType, b.stringIndex(defaultSampleType)) + } values := []int64{0, 0, 0, 0} var locs []uint64 diff --git a/libgo/go/runtime/pprof/protomem_test.go b/libgo/go/runtime/pprof/protomem_test.go index 1e30ed9..315d5f0 100644 --- a/libgo/go/runtime/pprof/protomem_test.go +++ b/libgo/go/runtime/pprof/protomem_test.go @@ -14,7 +14,6 @@ import ( func TestConvertMemProfile(t *testing.T) { addr1, addr2, map1, map2 := testPCs(t) - var buf bytes.Buffer // MemProfileRecord stacks are return PCs, so add one to the // addresses recorded in the "profile". The proto profile // locations are call PCs, so conversion will subtract one @@ -27,15 +26,6 @@ func TestConvertMemProfile(t *testing.T) { {AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}}, } - if err := writeHeapProto(&buf, rec, rate); err != nil { - t.Fatalf("writing profile: %v", err) - } - - p, err := profile.Parse(&buf) - if err != nil { - t.Fatalf("profile.Parse: %v", err) - } - periodType := &profile.ValueType{Type: "space", Unit: "bytes"} sampleType := []*profile.ValueType{ {Type: "alloc_objects", Unit: "count"}, @@ -70,5 +60,25 @@ func TestConvertMemProfile(t *testing.T) { NumLabel: map[string][]int64{"bytes": {829411}}, }, } - checkProfile(t, p, rate, periodType, sampleType, samples) + for _, tc := range []struct { + name string + defaultSampleType string + }{ + {"heap", ""}, + {"allocs", "alloc_space"}, + } { + t.Run(tc.name, func(t *testing.T) { + var buf bytes.Buffer + if err := writeHeapProto(&buf, rec, rate, tc.defaultSampleType); err != nil { + t.Fatalf("writing profile: %v", err) + } + + p, err := profile.Parse(&buf) + if err != nil { + t.Fatalf("profile.Parse: %v", err) + } + + checkProfile(t, p, rate, periodType, sampleType, samples, tc.defaultSampleType) + }) + } } diff --git a/libgo/go/runtime/pprof/testdata/mappingtest/main.go b/libgo/go/runtime/pprof/testdata/mappingtest/main.go new file mode 100644 index 0000000..7850faa --- /dev/null +++ b/libgo/go/runtime/pprof/testdata/mappingtest/main.go @@ -0,0 +1,105 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This program outputs a CPU profile that includes +// both Go and Cgo stacks. This is used by the mapping info +// tests in runtime/pprof. +// +// If SETCGOTRACEBACK=1 is set, the CPU profile will includes +// PCs from C side but they will not be symbolized. +package main + +/* +#include <stdint.h> +#include <stdlib.h> + +int cpuHogCSalt1 = 0; +int cpuHogCSalt2 = 0; + +void CPUHogCFunction() { + int foo = cpuHogCSalt1; + int i; + for (i = 0; i < 100000; i++) { + if (foo > 0) { + foo *= foo; + } else { + foo *= foo + 1; + } + cpuHogCSalt2 = foo; + } +} + +struct CgoTracebackArg { + uintptr_t context; + uintptr_t sigContext; + uintptr_t *buf; + uintptr_t max; +}; + +void CollectCgoTraceback(void* parg) { + struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg); + arg->buf[0] = (uintptr_t)(CPUHogCFunction); + arg->buf[1] = 0; +}; +*/ +import "C" + +import ( + "log" + "os" + "runtime" + "runtime/pprof" + "time" + "unsafe" +) + +func init() { + if v := os.Getenv("SETCGOTRACEBACK"); v == "1" { + // Collect some PCs from C-side, but don't symbolize. + runtime.SetCgoTraceback(0, unsafe.Pointer(C.CollectCgoTraceback), nil, nil) + } +} + +func main() { + go cpuHogGoFunction() + go cpuHogCFunction() + runtime.Gosched() + + if err := pprof.StartCPUProfile(os.Stdout); err != nil { + log.Fatal("can't start CPU profile: ", err) + } + time.Sleep(1 * time.Second) + pprof.StopCPUProfile() + + if err := os.Stdout.Close(); err != nil { + log.Fatal("can't write CPU profile: ", err) + } +} + +var salt1 int +var salt2 int + +func cpuHogGoFunction() { + // Generates CPU profile samples including a Go call path. + for { + foo := salt1 + for i := 0; i < 1e5; i++ { + if foo > 0 { + foo *= foo + } else { + foo *= foo + 1 + } + salt2 = foo + } + runtime.Gosched() + } +} + +func cpuHogCFunction() { + // Generates CPU profile samples including a Cgo call path. + for { + C.CPUHogCFunction() + runtime.Gosched() + } +} diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index 4fc45dd..77d379b 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -5,6 +5,7 @@ package runtime import ( + "internal/cpu" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -169,9 +170,11 @@ func main() { // Allow newproc to start new Ms. mainStarted = true - systemstack(func() { - newm(sysmon, nil) - }) + if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon + systemstack(func() { + newm(sysmon, nil) + }) + } // Lock the main goroutine onto this, the main OS thread, // during initialization. Most programs won't care, but a few @@ -242,7 +245,7 @@ func main() { } } if atomic.Load(&panicking) != 0 { - gopark(nil, nil, "panicwait", traceEvGoStop, 1) + gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1) } exit(0) @@ -276,7 +279,7 @@ func forcegchelper() { throw("forcegc: phase error") } atomic.Store(&forcegc.idle, 1) - goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock, 1) + goparkunlock(&forcegc.lock, waitReasonForceGGIdle, traceEvGoBlock, 1) // this goroutine is explicitly resumed by sysmon if debug.gctrace > 0 { println("GC forced") @@ -291,6 +294,7 @@ func forcegchelper() { // Gosched yields the processor, allowing other goroutines to run. It does not // suspend the current goroutine, so execution resumes automatically. func Gosched() { + checkTimeouts() mcall(gosched_m) } @@ -305,7 +309,14 @@ func goschedguarded() { // If unlockf returns false, the goroutine is resumed. // unlockf must not access this G's stack, as it may be moved between // the call to gopark and the call to unlockf. -func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte, traceskip int) { +// Reason explains why the goroutine has been parked. +// It is displayed in stack traces and heap dumps. +// Reasons should be unique and descriptive. +// Do not re-use reasons, add new ones. +func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) { + if reason != waitReasonSleep { + checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy + } mp := acquirem() gp := mp.curg status := readgstatus(gp) @@ -324,7 +335,7 @@ func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason s // Puts the current goroutine into a waiting state and unlocks the lock. // The goroutine can be made runnable again by calling goready(gp). -func goparkunlock(lock *mutex, reason string, traceEv byte, traceskip int) { +func goparkunlock(lock *mutex, reason waitReason, traceEv byte, traceskip int) { gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip) } @@ -468,6 +479,37 @@ const ( _GoidCacheBatch = 16 ) +// cpuinit extracts the environment variable GODEBUGCPU from the environment on +// Linux and Darwin if the GOEXPERIMENT debugcpu was set and calls internal/cpu.Initialize. +func cpuinit() { + const prefix = "GODEBUGCPU=" + var env string + + if haveexperiment("debugcpu") && (GOOS == "linux" || GOOS == "darwin") { + cpu.DebugOptions = true + + // Similar to goenv_unix but extracts the environment value for + // GODEBUGCPU directly. + // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() + n := int32(0) + for argv_index(argv, argc+1+n) != nil { + n++ + } + + for i := int32(0); i < n; i++ { + p := argv_index(argv, argc+1+i) + s := *(*string)(unsafe.Pointer(&stringStruct{unsafe.Pointer(p), findnull(p)})) + + if hasprefix(s, prefix) { + env = gostring(p)[len(prefix):] + break + } + } + } + + cpu.Initialize(env) +} + // The bootstrap sequence is: // // call osinit @@ -488,6 +530,7 @@ func schedinit() { mallocinit() mcommoninit(_g_.m) + cpuinit() // must run before alginit alginit() // maps must not be used before this call msigsave(_g_.m) @@ -778,7 +821,7 @@ func casgstatus(gp *g, oldval, newval uint32) { }) } - // See http://golang.org/cl/21503 for justification of the yield delay. + // See https://golang.org/cl/21503 for justification of the yield delay. const yieldDelay = 5 * 1000 var nextYield int64 @@ -786,9 +829,7 @@ func casgstatus(gp *g, oldval, newval uint32) { // GC time to finish and change the state to oldval. for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ { if oldval == _Gwaiting && gp.atomicstatus == _Grunnable { - systemstack(func() { - throw("casgstatus: waiting for Gwaiting but is Grunnable") - }) + throw("casgstatus: waiting for Gwaiting but is Grunnable") } // Help GC if needed. // if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) { @@ -826,7 +867,7 @@ func scang(gp *g, gcw *gcWork) { gp.gcscandone = false - // See http://golang.org/cl/21503 for justification of the yield delay. + // See https://golang.org/cl/21503 for justification of the yield delay. const yieldDelay = 10 * 1000 var nextYield int64 @@ -1212,7 +1253,9 @@ func mstart1() { //go:yeswritebarrierrec func mstartm0() { // Create an extra M for callbacks on threads not created by Go. - if iscgo && !cgoHasExtraM { + // An extra M is also needed on Windows for callbacks created by + // syscall.NewCallback. See issue #6751 for details. + if (iscgo || GOOS == "windows") && !cgoHasExtraM { cgoHasExtraM = true newextram() } @@ -1517,8 +1560,12 @@ func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointe // put the m back on the list. //go:nosplit func needm(x byte) { - if iscgo && !cgoHasExtraM { + if (iscgo || GOOS == "windows") && !cgoHasExtraM { // Can happen if C/C++ code calls Go from a global ctor. + // Can also happen on Windows if a global ctor uses a + // callback created by syscall.NewCallback. See issue #6751 + // for details. + // // Can not throw, because scheduler is not initialized yet. write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback))) exit(1) @@ -1814,13 +1861,16 @@ func newm1(mp *m) { // // The calling thread must itself be in a known-good state. func startTemplateThread() { + if GOARCH == "wasm" { // no threads on wasm yet + return + } if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { return } newm(templateThread, nil) } -// tmeplateThread is a thread in a known-good state that exists solely +// templateThread is a thread in a known-good state that exists solely // to start new threads in known-good states when the calling thread // may not be a a good state. // @@ -2232,6 +2282,14 @@ stop: return gp, false } + // wasm only: + // Check if a goroutine is waiting for a callback from the WebAssembly host. + // If yes, pause the execution until a callback was triggered. + if pauseSchedulerUntilCallback() { + // A callback was triggered and caused at least one goroutine to wake up. + goto top + } + // Before we drop our P, make a snapshot of the allp slice, // which can change underfoot once we no longer block // safe-points. We don't need to snapshot the contents because @@ -2616,7 +2674,7 @@ func goexit0(gp *g) { gp._defer = nil // should be true already but just in case. gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. gp.writebuf = nil - gp.waitreason = "" + gp.waitreason = 0 gp.param = nil gp.labels = nil gp.timer = nil @@ -2635,6 +2693,11 @@ func goexit0(gp *g) { gp.gcscanvalid = true dropg() + if GOARCH == "wasm" { // no threads yet on wasm + gfput(_g_.m.p.ptr(), gp) + schedule() // never returns + } + if _g_.m.lockedInt != 0 { print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n") throw("internal lockOSThread error") @@ -2743,8 +2806,6 @@ func entersyscall_gcwait() { unlock(&sched.lock) } -// The same as reentersyscall(), but with a hint that the syscall is blocking. -//go:nosplit func reentersyscallblock(pc, sp uintptr) { _g_ := getg() @@ -2789,9 +2850,7 @@ func exitsyscall() { oldp := _g_.m.p.ptr() if exitsyscallfast() { if _g_.m.mcache == nil { - systemstack(func() { - throw("lost mcache") - }) + throw("lost mcache") } if trace.enabled { if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { @@ -2836,9 +2895,7 @@ func exitsyscall() { mcall(exitsyscall0) if _g_.m.mcache == nil { - systemstack(func() { - throw("lost mcache") - }) + throw("lost mcache") } // Scheduler returned, so we're allowed to run now. @@ -3188,6 +3245,42 @@ func setSystemGoroutine() { atomic.Xadd(&expectedSystemGoroutines, -1) } +// saveAncestors copies previous ancestors of the given caller g and +// includes infor for the current caller into a new set of tracebacks for +// a g being created. +func saveAncestors(callergp *g) *[]ancestorInfo { + // Copy all prior info, except for the root goroutine (goid 0). + if debug.tracebackancestors <= 0 || callergp.goid == 0 { + return nil + } + var callerAncestors []ancestorInfo + if callergp.ancestors != nil { + callerAncestors = *callergp.ancestors + } + n := int32(len(callerAncestors)) + 1 + if n > debug.tracebackancestors { + n = debug.tracebackancestors + } + ancestors := make([]ancestorInfo, n) + copy(ancestors[1:], callerAncestors) + + var pcs [_TracebackMaxFrames]uintptr + // FIXME: This should get a traceback of callergp. + // npcs := gcallers(callergp, 0, pcs[:]) + npcs := 0 + ipcs := make([]uintptr, npcs) + copy(ipcs, pcs[:]) + ancestors[0] = ancestorInfo{ + pcs: ipcs, + goid: callergp.goid, + gopc: callergp.gopc, + } + + ancestorsp := new([]ancestorInfo) + *ancestorsp = ancestors + return ancestorsp +} + // Put on gfree list. // If local list is too long, transfer a batch to the global list. func gfput(_p_ *p, gp *g) { @@ -3265,6 +3358,9 @@ func Breakpoint() { // or else the m might be different in this function than in the caller. //go:nosplit func dolockOSThread() { + if GOARCH == "wasm" { + return // no threads on wasm yet + } _g_ := getg() _g_.m.lockedg.set(_g_) _g_.lockedm.set(_g_.m) @@ -3280,6 +3376,10 @@ func dolockOSThread() { // If the calling goroutine exits without unlocking the thread, // the thread will be terminated. // +// All init functions are run on the startup thread. Calling LockOSThread +// from an init function will cause the main function to be invoked on +// that thread. +// // A goroutine should call LockOSThread before calling OS services or // non-Go library functions that depend on per-thread state. func LockOSThread() { @@ -3309,6 +3409,9 @@ func lockOSThread() { // or else the m might be in different in this function than in the caller. //go:nosplit func dounlockOSThread() { + if GOARCH == "wasm" { + return // no threads on wasm yet + } _g_ := getg() if _g_.m.lockedInt != 0 || _g_.m.lockedExt != 0 { return @@ -3382,6 +3485,7 @@ func _ExternalCode() { _ExternalCode() } func _LostExternalCode() { _LostExternalCode() } func _GC() { _GC() } func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } +func _VDSO() { _VDSO() } // Counts SIGPROFs received while in atomic64 critical section, on mips{,le} var lostAtomic64Count uint64 @@ -3470,7 +3574,7 @@ func sigprof(pc uintptr, gp *g, mp *m) { } if prof.hz != 0 { - if (GOARCH == "mips" || GOARCH == "mipsle") && lostAtomic64Count > 0 { + if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm") && lostAtomic64Count > 0 { cpuprof.addLostAtomic64(lostAtomic64Count) lostAtomic64Count = 0 } @@ -3818,8 +3922,17 @@ func checkdead() { return } + // If we are not running under cgo, but we have an extra M then account + // for it. (It is possible to have an extra M on Windows without cgo to + // accommodate callbacks created by syscall.NewCallback. See issue #6751 + // for details.) + var run0 int32 + if !iscgo && cgoHasExtraM { + run0 = 1 + } + run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys - if run > 0 { + if run > run0 { return } if run < 0 { @@ -4215,7 +4328,7 @@ func schedtrace(detailed bool) { if lockedm != nil { id2 = lockedm.id } - print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n") + print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n") } unlock(&allglock) unlock(&sched.lock) @@ -4375,7 +4488,7 @@ func runqempty(_p_ *p) bool { const randomizeScheduler = raceenabled // runqput tries to put g on the local runnable queue. -// If next if false, runqput adds g to the tail of the runnable queue. +// If next is false, runqput adds g to the tail of the runnable queue. // If next is true, runqput puts g in the _p_.runnext slot. // If the run queue is full, runnext puts g on the global queue. // Executed only by the owner P. @@ -4571,6 +4684,11 @@ func setMaxThreads(in int) (out int) { return } +func haveexperiment(name string) bool { + // The gofrontend does not support experiments. + return false +} + //go:nosplit func procPin() int { _g_ := getg() @@ -4618,7 +4736,7 @@ func sync_runtime_canSpin(i int) bool { // Spin only few times and only if running on a multicore machine and // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. // As opposed to runtime mutex we don't do passive spinning here, - // because there can be work on global runq on on other Ps. + // because there can be work on global runq or on other Ps. if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 { return false } diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go index 672e1fa..82a2fe4 100644 --- a/libgo/go/runtime/proc_test.go +++ b/libgo/go/runtime/proc_test.go @@ -28,6 +28,9 @@ func perpetuumMobile() { } func TestStopTheWorldDeadlock(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } if testing.Short() { t.Skip("skipping during short test") } @@ -230,6 +233,10 @@ func TestBlockLocked(t *testing.T) { } func TestTimerFairness(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } + done := make(chan bool) c := make(chan bool) for i := 0; i < 2; i++ { @@ -256,6 +263,10 @@ func TestTimerFairness(t *testing.T) { } func TestTimerFairness2(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } + done := make(chan bool) c := make(chan bool) for i := 0; i < 2; i++ { @@ -290,7 +301,13 @@ var preempt = func() int { } func TestPreemption(t *testing.T) { - t.Skip("gccgo does not implement preemption") + if runtime.Compiler == "gccgo" { + t.Skip("gccgo does not implement preemption") + } + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } + // Test that goroutines are preempted at function calls. N := 5 if testing.Short() { @@ -314,7 +331,13 @@ func TestPreemption(t *testing.T) { } func TestPreemptionGC(t *testing.T) { - t.Skip("gccgo does not implement preemption") + if runtime.Compiler == "gccgo" { + t.Skip("gccgo does not implement preemption") + } + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } + // Test that pending GC preempts running goroutines. P := 5 N := 10 @@ -387,6 +410,9 @@ func TestNumGoroutine(t *testing.T) { } func TestPingPongHog(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("no preemption on wasm yet") + } if testing.Short() { t.Skip("skipping in -short mode") } @@ -837,6 +863,10 @@ func TestStealOrder(t *testing.T) { } func TestLockOSThreadNesting(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("no threads on wasm yet") + } + go func() { e, i := runtime.LockOSCounts() if e != 0 || i != 0 { diff --git a/libgo/go/runtime/rand_test.go b/libgo/go/runtime/rand_test.go index f8831b0..1b84c79 100644 --- a/libgo/go/runtime/rand_test.go +++ b/libgo/go/runtime/rand_test.go @@ -25,7 +25,7 @@ func BenchmarkFastrandHashiter(b *testing.B) { } b.RunParallel(func(pb *testing.PB) { for pb.Next() { - for _ = range m { + for range m { break } } diff --git a/libgo/go/runtime/runtime-lldb_test.go b/libgo/go/runtime/runtime-lldb_test.go index 9a28705..fe3a0eb 100644 --- a/libgo/go/runtime/runtime-lldb_test.go +++ b/libgo/go/runtime/runtime-lldb_test.go @@ -154,7 +154,9 @@ func TestLldbPython(t *testing.T) { t.Fatalf("failed to create file: %v", err) } - cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-o", "a.exe") + // As of 2018-07-17, lldb doesn't support compressed DWARF, so + // disable it for this test. + cmd := exec.Command(testenv.GoToolPath(t), "build", "-gcflags=all=-N -l", "-ldflags=-compressdwarf=false", "-o", "a.exe") cmd.Dir = dir out, err := cmd.CombinedOutput() if err != nil { diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go index b617f85..8b1b0a0 100644 --- a/libgo/go/runtime/runtime1.go +++ b/libgo/go/runtime/runtime1.go @@ -326,20 +326,21 @@ type dbgVar struct { // existing int var for that value, which may // already have an initial value. var debug struct { - allocfreetrace int32 - cgocheck int32 - efence int32 - gccheckmark int32 - gcpacertrace int32 - gcshrinkstackoff int32 - gcrescanstacks int32 - gcstoptheworld int32 - gctrace int32 - invalidptr int32 - sbrk int32 - scavenge int32 - scheddetail int32 - schedtrace int32 + allocfreetrace int32 + cgocheck int32 + efence int32 + gccheckmark int32 + gcpacertrace int32 + gcshrinkstackoff int32 + gcrescanstacks int32 + gcstoptheworld int32 + gctrace int32 + invalidptr int32 + sbrk int32 + scavenge int32 + scheddetail int32 + schedtrace int32 + tracebackancestors int32 } var dbgvars = []dbgVar{ @@ -357,6 +358,7 @@ var dbgvars = []dbgVar{ {"scavenge", &debug.scavenge}, {"scheddetail", &debug.scheddetail}, {"schedtrace", &debug.schedtrace}, + {"tracebackancestors", &debug.tracebackancestors}, } func parsedebugvars() { diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index 2de1cc8..e12e832 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -353,28 +353,29 @@ type g struct { atomicstatus uint32 // Not for gccgo: stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus goid int64 - waitsince int64 // approx time when the g become blocked - waitreason string // if status==Gwaiting schedlink guintptr - preempt bool // preemption signal, duplicates stackguard0 = stackpreempt - paniconfault bool // panic (instead of crash) on unexpected fault address - preemptscan bool // preempted g does scan for gc - gcscandone bool // g has scanned stack; protected by _Gscan bit in status - gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan; TODO: remove? - throwsplit bool // must not split stack - raceignore int8 // ignore race detection events - sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine - sysexitticks int64 // cputicks when syscall has returned (for tracing) - traceseq uint64 // trace event sequencer - tracelastp puintptr // last P emitted an event for this goroutine + waitsince int64 // approx time when the g become blocked + waitreason waitReason // if status==Gwaiting + preempt bool // preemption signal, duplicates stackguard0 = stackpreempt + paniconfault bool // panic (instead of crash) on unexpected fault address + preemptscan bool // preempted g does scan for gc + gcscandone bool // g has scanned stack; protected by _Gscan bit in status + gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan; TODO: remove? + throwsplit bool // must not split stack + raceignore int8 // ignore race detection events + sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine + sysexitticks int64 // cputicks when syscall has returned (for tracing) + traceseq uint64 // trace event sequencer + tracelastp puintptr // last P emitted an event for this goroutine lockedm muintptr sig uint32 writebuf []byte sigcode0 uintptr sigcode1 uintptr sigpc uintptr - gopc uintptr // pc of go statement that created this goroutine - startpc uintptr // pc of goroutine function + gopc uintptr // pc of go statement that created this goroutine + ancestors *[]ancestorInfo // ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors) + startpc uintptr // pc of goroutine function // Not for gccgo: racectx uintptr waiting *sudog // sudog structures this g is waiting on (that have a valid elem ptr); in lock order // Not for gccgo: cgoCtxt []uintptr // cgo traceback context @@ -476,15 +477,12 @@ type m struct { ncgo int32 // number of cgo calls currently in progress // Not for gccgo: cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily // Not for gccgo: cgoCallers *cgoCallers // cgo traceback if crashing in cgo call - park note - alllink *m // on allm - schedlink muintptr - mcache *mcache - lockedg guintptr - createstack [32]location // stack that created this thread. - // Not for gccgo: freglo [16]uint32 // d[i] lsb and f[i] - // Not for gccgo: freghi [16]uint32 // d[i] msb and f[i+16] - // Not for gccgo: fflag uint32 // floating point compare flags + park note + alllink *m // on allm + schedlink muintptr + mcache *mcache + lockedg guintptr + createstack [32]location // stack that created this thread. lockedExt uint32 // tracking for external LockOSThread lockedInt uint32 // tracking for internal lockOSThread nextwaitm muintptr // next m waiting for lock @@ -773,6 +771,13 @@ type _panic struct { aborted bool } +// ancestorInfo records details of where a goroutine was started. +type ancestorInfo struct { + pcs []uintptr // pcs from the stack of this goroutine + goid int64 // goroutine id of this goroutine; original goroutine possibly dead + gopc uintptr // pc of go statement that created this goroutine +} + const ( _TraceRuntimeFrames = 1 << iota // include frames for internal runtime functions. _TraceTrap // the initial PC, SP are from a trap, not a return PC from a call @@ -782,6 +787,71 @@ const ( // The maximum number of frames we print for a traceback const _TracebackMaxFrames = 100 +// A waitReason explains why a goroutine has been stopped. +// See gopark. Do not re-use waitReasons, add new ones. +type waitReason uint8 + +const ( + waitReasonZero waitReason = iota // "" + waitReasonGCAssistMarking // "GC assist marking" + waitReasonIOWait // "IO wait" + waitReasonChanReceiveNilChan // "chan receive (nil chan)" + waitReasonChanSendNilChan // "chan send (nil chan)" + waitReasonDumpingHeap // "dumping heap" + waitReasonGarbageCollection // "garbage collection" + waitReasonGarbageCollectionScan // "garbage collection scan" + waitReasonPanicWait // "panicwait" + waitReasonSelect // "select" + waitReasonSelectNoCases // "select (no cases)" + waitReasonGCAssistWait // "GC assist wait" + waitReasonGCSweepWait // "GC sweep wait" + waitReasonChanReceive // "chan receive" + waitReasonChanSend // "chan send" + waitReasonFinalizerWait // "finalizer wait" + waitReasonForceGGIdle // "force gc (idle)" + waitReasonSemacquire // "semacquire" + waitReasonSleep // "sleep" + waitReasonSyncCondWait // "sync.Cond.Wait" + waitReasonTimerGoroutineIdle // "timer goroutine (idle)" + waitReasonTraceReaderBlocked // "trace reader (blocked)" + waitReasonWaitForGCCycle // "wait for GC cycle" + waitReasonGCWorkerIdle // "GC worker (idle)" +) + +var waitReasonStrings = [...]string{ + waitReasonZero: "", + waitReasonGCAssistMarking: "GC assist marking", + waitReasonIOWait: "IO wait", + waitReasonChanReceiveNilChan: "chan receive (nil chan)", + waitReasonChanSendNilChan: "chan send (nil chan)", + waitReasonDumpingHeap: "dumping heap", + waitReasonGarbageCollection: "garbage collection", + waitReasonGarbageCollectionScan: "garbage collection scan", + waitReasonPanicWait: "panicwait", + waitReasonSelect: "select", + waitReasonSelectNoCases: "select (no cases)", + waitReasonGCAssistWait: "GC assist wait", + waitReasonGCSweepWait: "GC sweep wait", + waitReasonChanReceive: "chan receive", + waitReasonChanSend: "chan send", + waitReasonFinalizerWait: "finalizer wait", + waitReasonForceGGIdle: "force gc (idle)", + waitReasonSemacquire: "semacquire", + waitReasonSleep: "sleep", + waitReasonSyncCondWait: "sync.Cond.Wait", + waitReasonTimerGoroutineIdle: "timer goroutine (idle)", + waitReasonTraceReaderBlocked: "trace reader (blocked)", + waitReasonWaitForGCCycle: "wait for GC cycle", + waitReasonGCWorkerIdle: "GC worker (idle)", +} + +func (w waitReason) String() string { + if w < 0 || w >= waitReason(len(waitReasonStrings)) { + return "unknown wait reason" + } + return waitReasonStrings[w] +} + var ( allglen uintptr allm *m @@ -793,23 +863,7 @@ var ( sched schedt newprocs int32 - // Information about what cpu features are available. - // Set on startup in asm_{x86,amd64}.s. - // Packages outside the runtime should not use these - // as they are not an external api. - cpuid_ecx uint32 support_aes bool - - // cpuid_edx uint32 - // cpuid_ebx7 uint32 - // lfenceBeforeRdtsc bool - // support_avx bool - // support_avx2 bool - // support_bmi1 bool - // support_bmi2 bool - -// goarm uint8 // set by cmd/link on arm systems -// framepointer_enabled bool // set by cmd/link ) // Set by the linker so the runtime can determine the buildmode. diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go index 0231043..995ce25 100644 --- a/libgo/go/runtime/runtime_test.go +++ b/libgo/go/runtime/runtime_test.go @@ -169,6 +169,9 @@ func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) { if GOOS == "nacl" { t.Skip("nacl doesn't seem to fault on high addresses") } + if GOOS == "js" { + t.Skip("js does not support catching faults") + } defer func() { if err := recover(); err != nil { @@ -266,7 +269,7 @@ func TestTrailingZero(t *testing.T) { */ func TestBadOpen(t *testing.T) { - if GOOS == "windows" || GOOS == "nacl" { + if GOOS == "windows" || GOOS == "nacl" || GOOS == "js" { t.Skip("skipping OS that doesn't have open/read/write/close") } // make sure we get the correct error code if open fails. Same for diff --git a/libgo/go/runtime/rwmutex_test.go b/libgo/go/runtime/rwmutex_test.go index 872b3b0..291a32e 100644 --- a/libgo/go/runtime/rwmutex_test.go +++ b/libgo/go/runtime/rwmutex_test.go @@ -47,6 +47,9 @@ func doTestParallelReaders(numReaders int) { } func TestParallelRWMutexReaders(t *testing.T) { + if GOARCH == "wasm" { + t.Skip("wasm has no threads yet") + } defer GOMAXPROCS(GOMAXPROCS(-1)) // If runtime triggers a forced GC during this test then it will deadlock, // since the goroutines can't be stopped/preempted. diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go index 9dab052..39c12da 100644 --- a/libgo/go/runtime/select.go +++ b/libgo/go/runtime/select.go @@ -94,7 +94,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { } func block() { - gopark(nil, nil, "select (no cases)", traceEvGoStop, 1) // forever + gopark(nil, nil, waitReasonSelectNoCases, traceEvGoStop, 1) // forever } // selectgo implements the select statement. @@ -307,7 +307,7 @@ loop: // wait for someone to wake us up gp.param = nil - gopark(selparkcommit, nil, "select", traceEvGoBlockSelect, 1) + gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1) sellock(scases, lockorder) diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go index 6e2beec..cb7d3cd 100644 --- a/libgo/go/runtime/sema.go +++ b/libgo/go/runtime/sema.go @@ -15,7 +15,7 @@ // even if, due to races, the wakeup happens before the sleep. // // See Mullender and Cox, ``Semaphores in Plan 9,'' -// http://swtch.com/semaphore.pdf +// https://swtch.com/semaphore.pdf package runtime @@ -141,7 +141,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) { // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. root.queue(addr, s, lifo) - goparkunlock(&root.lock, "semacquire", traceEvGoBlockSync, 4) + goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4) if s.ticket != 0 || cansemacquire(addr) { break } @@ -274,7 +274,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { // addresses, it is kept balanced on average by maintaining a heap ordering // on the ticket: s.ticket <= both s.prev.ticket and s.next.ticket. // https://en.wikipedia.org/wiki/Treap - // http://faculty.washington.edu/aragon/pubs/rst89.pdf + // https://faculty.washington.edu/aragon/pubs/rst89.pdf // // s.ticket compared with zero in couple of places, therefore set lowest bit. // It will not affect treap's quality noticeably. @@ -507,7 +507,7 @@ func notifyListWait(l *notifyList, t uint32) { l.tail.next = s } l.tail = s - goparkunlock(&l.lock, "semacquire", traceEvGoBlockCond, 3) + goparkunlock(&l.lock, waitReasonSyncCondWait, traceEvGoBlockCond, 3) if t0 != 0 { blockevent(s.releasetime-t0, 2) } diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go index 698629d..e4bf7bc 100644 --- a/libgo/go/runtime/signal_sighandler.go +++ b/libgo/go/runtime/signal_sighandler.go @@ -14,6 +14,11 @@ import ( // GOTRACEBACK=crash when a signal is received. var crashing int32 +// testSigtrap is used by the runtime tests. If non-nil, it is called +// on SIGTRAP. If it returns true, the normal behavior on SIGTRAP is +// suppressed. +var testSigtrap func(info *_siginfo_t, ctxt *sigctxt, gp *g) bool + // sighandler is invoked when a signal occurs. The global g will be // set to a gsignal goroutine and we will be running on the alternate // signal stack. The parameter g will be the value of the global g @@ -27,7 +32,7 @@ var crashing int32 //go:nowritebarrierrec func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { _g_ := getg() - c := sigctxt{info, ctxt} + c := &sigctxt{info, ctxt} sigfault, sigpc := getSiginfo(info, ctxt) @@ -36,6 +41,10 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { return } + if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) { + return + } + flags := int32(_SigThrow) if sig < uint32(len(sigtable)) { flags = sigtable[sig].flags @@ -45,6 +54,11 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { // stack. Abort in the signal handler instead. flags = (flags &^ _SigPanic) | _SigThrow } + if isAbortPC(sigpc) { + // On many architectures, the abort function just + // causes a memory fault. Don't turn that into a panic. + flags = _SigThrow + } if c.sigcode() != _SI_USER && flags&_SigPanic != 0 { // Emulate gc by passing arguments out of band, // although we don't really have to. @@ -87,7 +101,7 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { _g_.m.caughtsig.set(gp) if crashing == 0 { - startpanic() + startpanic_m() } if sig < uint32(len(sigtable)) { diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go index a8f77fa..84623d3 100644 --- a/libgo/go/runtime/signal_unix.go +++ b/libgo/go/runtime/signal_unix.go @@ -112,6 +112,8 @@ func initsig(preinit bool) { // set SA_ONSTACK if necessary. if fwdSig[i] != _SIG_DFL && fwdSig[i] != _SIG_IGN { setsigstack(i) + } else if fwdSig[i] == _SIG_IGN { + sigInitIgnored(i) } continue } @@ -398,14 +400,6 @@ func dieFromSignal(sig uint32) { osyield() osyield() - // On Darwin we may still fail to die, because raise sends the - // signal to the whole process rather than just the current thread, - // and osyield just sleeps briefly rather than letting all other - // threads run. See issue 20315. Sleep longer. - if GOOS == "darwin" { - usleep(100) - } - // If we are still somehow running, just exit with the wrong status. exit(2) } @@ -444,7 +438,10 @@ func raisebadsignal(sig uint32, c *sigctxt) { // re-installing sighandler. At this point we can just // return and the signal will be re-raised and caught by // the default handler with the correct context. - if (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER { + // + // On FreeBSD, the libthr sigaction code prevents + // this from working so we fall through to raise. + if GOOS != "freebsd" && (isarchive || islibrary) && handler == _SIG_DFL && c.sigcode() != _SI_USER { return } @@ -464,6 +461,7 @@ func raisebadsignal(sig uint32, c *sigctxt) { setsig(sig, getSigtramp()) } +//go:nosplit func crash() { if GOOS == "darwin" { // OS X core dumps are linear dumps of the mapped memory, diff --git a/libgo/go/runtime/sigqueue.go b/libgo/go/runtime/sigqueue.go index b108c39..cf926a9 100644 --- a/libgo/go/runtime/sigqueue.go +++ b/libgo/go/runtime/sigqueue.go @@ -237,7 +237,18 @@ func signal_ignore(s uint32) { atomic.Store(&sig.ignored[s/32], i) } +// sigInitIgnored marks the signal as already ignored. This is called at +// program start by initsig. In a shared library initsig is called by +// libpreinit, so the runtime may not be initialized yet. +//go:nosplit +func sigInitIgnored(s uint32) { + i := sig.ignored[s/32] + i |= 1 << (s & 31) + atomic.Store(&sig.ignored[s/32], i) +} + // Checked by signal handlers. +//go:linkname signal_ignored os_signal.signal_ignored func signal_ignored(s uint32) bool { i := atomic.Load(&sig.ignored[s/32]) return i&(1<<(s&31)) != 0 diff --git a/libgo/go/runtime/sizeof_test.go b/libgo/go/runtime/sizeof_test.go new file mode 100644 index 0000000..ecda82a --- /dev/null +++ b/libgo/go/runtime/sizeof_test.go @@ -0,0 +1,43 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !nacl + +package runtime_test + +import ( + "reflect" + "runtime" + "testing" + "unsafe" +) + +// Assert that the size of important structures do not change unexpectedly. + +func TestSizeof(t *testing.T) { + if runtime.Compiler != "gc" { + t.Skip("skipping size test; specific to gc compiler") + } + + const _64bit = unsafe.Sizeof(uintptr(0)) == 8 + + var tests = []struct { + val interface{} // type as a value + _32bit uintptr // size on 32bit platforms + _64bit uintptr // size on 64bit platforms + }{ + {runtime.G{}, 216, 376}, // g, but exported for testing + } + + for _, tt := range tests { + want := tt._32bit + if _64bit { + want = tt._64bit + } + got := reflect.TypeOf(tt.val).Size() + if want != got { + t.Errorf("unsafe.Sizeof(%T) = %d, want %d", tt.val, got, want) + } + } +} diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go index ec5aa64..2e874cc 100644 --- a/libgo/go/runtime/slice.go +++ b/libgo/go/runtime/slice.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/sys" "unsafe" ) @@ -34,14 +35,14 @@ type notInHeapSlice struct { // The index is the size of the slice element. var maxElems = [...]uintptr{ ^uintptr(0), - _MaxMem / 1, _MaxMem / 2, _MaxMem / 3, _MaxMem / 4, - _MaxMem / 5, _MaxMem / 6, _MaxMem / 7, _MaxMem / 8, - _MaxMem / 9, _MaxMem / 10, _MaxMem / 11, _MaxMem / 12, - _MaxMem / 13, _MaxMem / 14, _MaxMem / 15, _MaxMem / 16, - _MaxMem / 17, _MaxMem / 18, _MaxMem / 19, _MaxMem / 20, - _MaxMem / 21, _MaxMem / 22, _MaxMem / 23, _MaxMem / 24, - _MaxMem / 25, _MaxMem / 26, _MaxMem / 27, _MaxMem / 28, - _MaxMem / 29, _MaxMem / 30, _MaxMem / 31, _MaxMem / 32, + maxAlloc / 1, maxAlloc / 2, maxAlloc / 3, maxAlloc / 4, + maxAlloc / 5, maxAlloc / 6, maxAlloc / 7, maxAlloc / 8, + maxAlloc / 9, maxAlloc / 10, maxAlloc / 11, maxAlloc / 12, + maxAlloc / 13, maxAlloc / 14, maxAlloc / 15, maxAlloc / 16, + maxAlloc / 17, maxAlloc / 18, maxAlloc / 19, maxAlloc / 20, + maxAlloc / 21, maxAlloc / 22, maxAlloc / 23, maxAlloc / 24, + maxAlloc / 25, maxAlloc / 26, maxAlloc / 27, maxAlloc / 28, + maxAlloc / 29, maxAlloc / 30, maxAlloc / 31, maxAlloc / 32, } // maxSliceCap returns the maximum capacity for a slice. @@ -49,7 +50,15 @@ func maxSliceCap(elemsize uintptr) uintptr { if elemsize < uintptr(len(maxElems)) { return maxElems[elemsize] } - return _MaxMem / elemsize + return maxAlloc / elemsize +} + +func panicmakeslicelen() { + panic(errorString("makeslice: len out of range")) +} + +func panicmakeslicecap() { + panic(errorString("makeslice: cap out of range")) } func makeslice(et *_type, len, cap int) slice { @@ -60,11 +69,11 @@ func makeslice(et *_type, len, cap int) slice { // See issue 4085. maxElements := maxSliceCap(et.size) if len < 0 || uintptr(len) > maxElements { - panic(errorString("makeslice: len out of range")) + panicmakeslicelen() } if cap < len || uintptr(cap) > maxElements { - panic(errorString("makeslice: cap out of range")) + panicmakeslicecap() } p := mallocgc(et.size*uintptr(cap), et, true) @@ -74,12 +83,12 @@ func makeslice(et *_type, len, cap int) slice { func makeslice64(et *_type, len64, cap64 int64) slice { len := int(len64) if int64(len) != len64 { - panic(errorString("makeslice: len out of range")) + panicmakeslicelen() } cap := int(cap64) if int64(cap) != cap64 { - panic(errorString("makeslice: cap out of range")) + panicmakeslicecap() } return makeslice(et, len, cap) @@ -131,20 +140,36 @@ func growslice(et *_type, old slice, cap int) slice { var overflow bool var lenmem, newlenmem, capmem uintptr - const ptrSize = unsafe.Sizeof((*byte)(nil)) - switch et.size { - case 1: + // Specialize for common values of et.size. + // For 1 we don't need any division/multiplication. + // For sys.PtrSize, compiler will optimize division/multiplication into a shift by a constant. + // For powers of 2, use a variable shift. + switch { + case et.size == 1: lenmem = uintptr(old.len) newlenmem = uintptr(cap) capmem = roundupsize(uintptr(newcap)) - overflow = uintptr(newcap) > _MaxMem + overflow = uintptr(newcap) > maxAlloc newcap = int(capmem) - case ptrSize: - lenmem = uintptr(old.len) * ptrSize - newlenmem = uintptr(cap) * ptrSize - capmem = roundupsize(uintptr(newcap) * ptrSize) - overflow = uintptr(newcap) > _MaxMem/ptrSize - newcap = int(capmem / ptrSize) + case et.size == sys.PtrSize: + lenmem = uintptr(old.len) * sys.PtrSize + newlenmem = uintptr(cap) * sys.PtrSize + capmem = roundupsize(uintptr(newcap) * sys.PtrSize) + overflow = uintptr(newcap) > maxAlloc/sys.PtrSize + newcap = int(capmem / sys.PtrSize) + case isPowerOfTwo(et.size): + var shift uintptr + if sys.PtrSize == 8 { + // Mask shift for better code generation. + shift = uintptr(sys.Ctz64(uint64(et.size))) & 63 + } else { + shift = uintptr(sys.Ctz32(uint32(et.size))) & 31 + } + lenmem = uintptr(old.len) << shift + newlenmem = uintptr(cap) << shift + capmem = roundupsize(uintptr(newcap) << shift) + overflow = uintptr(newcap) > (maxAlloc >> shift) + newcap = int(capmem >> shift) default: lenmem = uintptr(old.len) * et.size newlenmem = uintptr(cap) * et.size @@ -167,7 +192,7 @@ func growslice(et *_type, old slice, cap int) slice { // s = append(s, d, d, d, d) // print(len(s), "\n") // } - if cap < old.cap || overflow || capmem > _MaxMem { + if cap < old.cap || overflow || capmem > maxAlloc { panic(errorString("growslice: cap out of range")) } @@ -193,6 +218,10 @@ func growslice(et *_type, old slice, cap int) slice { return slice{p, cap, newcap} } +func isPowerOfTwo(x uintptr) bool { + return x&(x-1) == 0 +} + func slicecopy(to, fm slice, width uintptr) int { if fm.len == 0 || to.len == 0 { return 0 diff --git a/libgo/go/runtime/append_test.go b/libgo/go/runtime/slice_test.go index ef1e812..c2dfb7a 100644 --- a/libgo/go/runtime/append_test.go +++ b/libgo/go/runtime/slice_test.go @@ -31,6 +31,12 @@ func BenchmarkGrowSlice(b *testing.B) { _ = append([]byte(nil), x...) } }) + b.Run("Int16", func(b *testing.B) { + x := make([]int16, 9) + for i := 0; i < b.N; i++ { + _ = append([]int16(nil), x...) + } + }) b.Run("Int", func(b *testing.B) { x := make([]int, 9) for i := 0; i < b.N; i++ { @@ -66,6 +72,36 @@ func BenchmarkGrowSlice(b *testing.B) { }) } +var ( + SinkIntSlice []int + SinkIntPointerSlice []*int +) + +func BenchmarkExtendSlice(b *testing.B) { + var length = 4 // Use a variable to prevent stack allocation of slices. + b.Run("IntSlice", func(b *testing.B) { + s := make([]int, 0, length) + for i := 0; i < b.N; i++ { + s = append(s[:0:length/2], make([]int, length)...) + } + SinkIntSlice = s + }) + b.Run("PointerSlice", func(b *testing.B) { + s := make([]*int, 0, length) + for i := 0; i < b.N; i++ { + s = append(s[:0:length/2], make([]*int, length)...) + } + SinkIntPointerSlice = s + }) + b.Run("NoGrow", func(b *testing.B) { + s := make([]int, 0, length) + for i := 0; i < b.N; i++ { + s = append(s[:0:length], make([]int, length)...) + } + SinkIntSlice = s + }) +} + func BenchmarkAppend(b *testing.B) { b.StopTimer() x := make([]int, 0, N) diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go index e8df9a6..5296ebd 100644 --- a/libgo/go/runtime/string.go +++ b/libgo/go/runtime/string.go @@ -4,7 +4,10 @@ package runtime -import "unsafe" +import ( + "internal/bytealg" + "unsafe" +) // For gccgo, use go:linkname to rename compiler-called functions to // themselves, so that the compiler will export them. @@ -105,6 +108,11 @@ func slicebytetostring(buf *tmpBuf, b []byte) (str string) { if msanenabled { msanread(unsafe.Pointer(&b[0]), uintptr(l)) } + if l == 1 { + stringStructOf(&str).str = unsafe.Pointer(&staticbytes[b[0]]) + stringStructOf(&str).len = 1 + return + } var p unsafe.Pointer if buf != nil && len(b) <= len(buf) { @@ -232,8 +240,13 @@ func stringStructOf(sp *string) *stringStruct { return (*stringStruct)(unsafe.Pointer(sp)) } -func intstring(buf *[4]byte, v int64) string { - var s string +func intstring(buf *[4]byte, v int64) (s string) { + if v >= 0 && v < runeSelf { + stringStructOf(&s).str = unsafe.Pointer(&staticbytes[v]) + stringStructOf(&s).len = 1 + return + } + var b []byte if buf != nil { b = buf[:] @@ -277,7 +290,7 @@ func rawbyteslice(size int) (b []byte) { // rawruneslice allocates a new rune slice. The rune slice is not zeroed. func rawruneslice(size int) (b []rune) { - if uintptr(size) > _MaxMem/4 { + if uintptr(size) > maxAlloc/4 { throw("out of memory") } mem := roundupsize(uintptr(size) * 4) @@ -291,13 +304,20 @@ func rawruneslice(size int) (b []rune) { } // used by cmd/cgo -func gobytes(p *byte, n int) []byte { +func gobytes(p *byte, n int) (b []byte) { if n == 0 { return make([]byte, 0) } - x := make([]byte, n) - memmove(unsafe.Pointer(&x[0]), unsafe.Pointer(p), uintptr(n)) - return x + + if n < 0 || uintptr(n) > maxAlloc { + panic(errorString("gobytes: length out of range")) + } + + bp := mallocgc(uintptr(n), nil, false) + memmove(bp, unsafe.Pointer(p), uintptr(n)) + + *(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n} + return } func gostring(p *byte) string { @@ -406,19 +426,50 @@ func findnull(s *byte) int { if s == nil { return 0 } - p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s)) - l := 0 - for p[l] != 0 { - l++ + + // Avoid IndexByteString on Plan 9 because it uses SSE instructions + // on x86 machines, and those are classified as floating point instructions, + // which are illegal in a note handler. + if GOOS == "plan9" { + p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s)) + l := 0 + for p[l] != 0 { + l++ + } + return l + } + + // pageSize is the unit we scan at a time looking for NULL. + // It must be the minimum page size for any architecture Go + // runs on. It's okay (just a minor performance loss) if the + // actual system page size is larger than this value. + const pageSize = 4096 + + offset := 0 + ptr := unsafe.Pointer(s) + // IndexByteString uses wide reads, so we need to be careful + // with page boundaries. Call IndexByteString on + // [ptr, endOfPage) interval. + safeLen := int(pageSize - uintptr(ptr)%pageSize) + + for { + t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen})) + // Check one page at a time. + if i := bytealg.IndexByteString(t, 0); i != -1 { + return offset + i + } + // Move to next page + ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen)) + offset += safeLen + safeLen = pageSize } - return l } func findnullw(s *uint16) int { if s == nil { return 0 } - p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s)) + p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s)) l := 0 for p[l] != 0 { l++ @@ -435,7 +486,7 @@ func gostringnocopy(str *byte) string { func gostringw(strw *uint16) string { var buf [8]byte - str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw)) + str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw)) n1 := 0 for i := 0; str[i] != 0; i++ { n1 += encoderune(buf[:], rune(str[i])) diff --git a/libgo/go/runtime/string_test.go b/libgo/go/runtime/string_test.go index 555a7fc..03327bb 100644 --- a/libgo/go/runtime/string_test.go +++ b/libgo/go/runtime/string_test.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" "testing" + "unicode/utf8" ) // Strings and slices that don't escape and fit into tmpBuf are stack allocated, @@ -110,6 +111,43 @@ var stringdata = []struct{ name, data string }{ {"MixedLength", "$Ѐࠀက퀀𐀀\U00040000\U0010FFFF"}, } +var sinkInt int + +func BenchmarkRuneCount(b *testing.B) { + // Each sub-benchmark counts the runes in a string in a different way. + b.Run("lenruneslice", func(b *testing.B) { + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + sinkInt += len([]rune(sd.data)) + } + }) + } + }) + b.Run("rangeloop", func(b *testing.B) { + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + n := 0 + for range sd.data { + n++ + } + sinkInt += n + } + }) + } + }) + b.Run("utf8.RuneCountInString", func(b *testing.B) { + for _, sd := range stringdata { + b.Run(sd.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + sinkInt += utf8.RuneCountInString(sd.data) + } + }) + } + }) +} + func BenchmarkRuneIterate(b *testing.B) { b.Run("range", func(b *testing.B) { for _, sd := range stringdata { @@ -125,7 +163,7 @@ func BenchmarkRuneIterate(b *testing.B) { for _, sd := range stringdata { b.Run(sd.name, func(b *testing.B) { for i := 0; i < b.N; i++ { - for _ = range sd.data { + for range sd.data { } } }) @@ -135,7 +173,7 @@ func BenchmarkRuneIterate(b *testing.B) { for _, sd := range stringdata { b.Run(sd.name, func(b *testing.B) { for i := 0; i < b.N; i++ { - for _, _ = range sd.data { + for range sd.data { } } }) diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go index 1d21445..1aae4f3 100644 --- a/libgo/go/runtime/stubs.go +++ b/libgo/go/runtime/stubs.go @@ -69,8 +69,13 @@ func systemstack(fn func()) { } } +var badsystemstackMsg = "fatal: systemstack called from unexpected goroutine" + +//go:nosplit +//go:nowritebarrierrec func badsystemstack() { - throw("systemstack called from unexpected goroutine") + sp := stringStructOf(&badsystemstackMsg) + write(2, sp.str, int32(sp.len)) } // memclrNoHeapPointers clears n bytes starting at ptr. @@ -127,7 +132,7 @@ func fastrand() uint32 { //go:nosplit func fastrandn(n uint32) uint32 { // This is similar to fastrand() % n, but faster. - // See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + // See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ return uint32(uint64(fastrand()) * uint64(n) >> 32) } @@ -198,7 +203,6 @@ func publicationBarrier() // getcallerpc returns the program counter (PC) of its caller's caller. // getcallersp returns the stack pointer (SP) of its caller's caller. -// argp must be a pointer to the caller's first function argument. // The implementation may be a compiler intrinsic; there is not // necessarily code implementing this on every platform. // @@ -213,10 +217,7 @@ func publicationBarrier() // the call to f (where f will return). // // The call to getcallerpc and getcallersp must be done in the -// frame being asked about. It would not be correct for f to pass &arg1 -// to another function g and let g call getcallerpc/getcallersp. -// The call inside g might return information about g's caller or -// information about f's caller or complete garbage. +// frame being asked about. // // The result of getcallersp is correct at the time of the return, // but it may be invalidated by any subsequent call to a function @@ -228,7 +229,7 @@ func publicationBarrier() func getcallerpc() uintptr //go:noescape -func getcallersp() uintptr +func getcallersp() uintptr // implemented as an intrinsic on all platforms func asmcgocall(fn, arg unsafe.Pointer) int32 { throw("asmcgocall") @@ -293,12 +294,6 @@ func setIsCgo() { } // For gccgo, to communicate from the C code to the Go code. -//go:linkname setCpuidECX runtime.setCpuidECX -func setCpuidECX(v uint32) { - cpuid_ecx = v -} - -// For gccgo, to communicate from the C code to the Go code. //go:linkname setSupportAES runtime.setSupportAES func setSupportAES(v bool) { support_aes = v @@ -336,6 +331,9 @@ func getSiginfo(*_siginfo_t, unsafe.Pointer) (sigaddr uintptr, sigpc uintptr) // Implemented in C for gccgo. func dumpregs(*_siginfo_t, unsafe.Pointer) +// Implemented in C for gccgo. +func setRandomNumber(uint32) + // Temporary for gccgo until we port proc.go. //go:linkname getsched runtime.getsched func getsched() *schedt { @@ -426,6 +424,15 @@ type bitvector struct { bytedata *uint8 } +// ptrbit returns the i'th bit in bv. +// ptrbit is less efficient than iterating directly over bitvector bits, +// and should only be used in non-performance-critical code. +// See adjustpointers for an example of a high-efficiency walk of a bitvector. +func (bv *bitvector) ptrbit(i uintptr) uint8 { + b := *(addb(bv.bytedata, i/8)) + return (b >> (i % 8)) & 1 +} + // bool2int returns 0 if x is false or 1 if x is true. func bool2int(x bool) int { if x { @@ -433,3 +440,10 @@ func bool2int(x bool) int { } return 0 } + +// abort crashes the runtime in situations where even throw might not +// work. In general it should do something a debugger will recognize +// (e.g., an INT3 on x86). A crash in abort is recognized by the +// signal handler, which will attempt to tear down the runtime +// immediately. +func abort() diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go index e305b16..1cb910c 100644 --- a/libgo/go/runtime/stubs2.go +++ b/libgo/go/runtime/stubs2.go @@ -5,6 +5,8 @@ // +build !plan9 // +build !windows // +build !nacl +// +build !js +// +build !darwin package runtime @@ -16,7 +18,6 @@ func closefd(fd int32) int32 //extern exit func exit(code int32) -func nanotime() int64 func usleep(usec uint32) //go:noescape diff --git a/libgo/go/runtime/stubs3.go b/libgo/go/runtime/stubs3.go new file mode 100644 index 0000000..5c0786e --- /dev/null +++ b/libgo/go/runtime/stubs3.go @@ -0,0 +1,14 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9 +// +build !solaris +// +build !windows +// +build !nacl +// +build !freebsd +// +build !darwin + +package runtime + +func nanotime() int64 diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go index 12dc672..861921c 100644 --- a/libgo/go/runtime/symtab.go +++ b/libgo/go/runtime/symtab.go @@ -124,6 +124,7 @@ type funcID uint32 const ( funcID_normal funcID = iota // not a special function + funcID_runtime_main funcID_goexit funcID_jmpdefer funcID_mcall @@ -133,15 +134,13 @@ const ( funcID_asmcgocall funcID_sigpanic funcID_runfinq - funcID_bgsweep - funcID_forcegchelper - funcID_timerproc funcID_gcBgMarkWorker funcID_systemstack_switch funcID_systemstack funcID_cgocallback_gofunc funcID_gogo funcID_externalthreadhandler + funcID_debugCallV1 ) // FuncForPC returns a *Func describing the function that contains the diff --git a/libgo/go/runtime/sys_darwin.go b/libgo/go/runtime/sys_darwin.go new file mode 100644 index 0000000..7efbef7 --- /dev/null +++ b/libgo/go/runtime/sys_darwin.go @@ -0,0 +1,374 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +// Call fn with arg as its argument. Return what fn returns. +// fn is the raw pc value of the entry point of the desired function. +// Switches to the system stack, if not already there. +// Preserves the calling point as the location where a profiler traceback will begin. +//go:nosplit +func libcCall(fn, arg unsafe.Pointer) int32 { + // Leave caller's PC/SP/G around for traceback. + gp := getg() + var mp *m + if gp != nil { + mp = gp.m + } + if mp != nil && mp.libcallsp == 0 { + mp.libcallg.set(gp) + mp.libcallpc = getcallerpc() + // sp must be the last, because once async cpu profiler finds + // all three values to be non-zero, it will use them + mp.libcallsp = getcallersp() + } else { + // Make sure we don't reset libcallsp. This makes + // libcCall reentrant; We remember the g/pc/sp for the + // first call on an M, until that libcCall instance + // returns. Reentrance only matters for signals, as + // libc never calls back into Go. The tricky case is + // where we call libcX from an M and record g/pc/sp. + // Before that call returns, a signal arrives on the + // same M and the signal handling code calls another + // libc function. We don't want that second libcCall + // from within the handler to be recorded, and we + // don't want that call's completion to zero + // libcallsp. + // We don't need to set libcall* while we're in a sighandler + // (even if we're not currently in libc) because we block all + // signals while we're handling a signal. That includes the + // profile signal, which is the one that uses the libcall* info. + mp = nil + } + res := asmcgocall(fn, arg) + if mp != nil { + mp.libcallsp = 0 + } + return res +} + +// The *_trampoline functions convert from the Go calling convention to the C calling convention +// and then call the underlying libc function. They are defined in sys_darwin_$ARCH.s. + +//go:nosplit +//go:cgo_unsafe_args +func pthread_attr_init(attr *pthreadattr) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_attr_init_trampoline)), unsafe.Pointer(&attr)) +} +func pthread_attr_init_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_attr_setstacksize(attr *pthreadattr, size uintptr) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_attr_setstacksize_trampoline)), unsafe.Pointer(&attr)) +} +func pthread_attr_setstacksize_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_attr_setdetachstate(attr *pthreadattr, state int) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_attr_setdetachstate_trampoline)), unsafe.Pointer(&attr)) +} +func pthread_attr_setdetachstate_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_create(attr *pthreadattr, start uintptr, arg unsafe.Pointer) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_create_trampoline)), unsafe.Pointer(&attr)) +} +func pthread_create_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func raise(sig uint32) { + libcCall(unsafe.Pointer(funcPC(raise_trampoline)), unsafe.Pointer(&sig)) +} +func raise_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_self() (t pthread) { + libcCall(unsafe.Pointer(funcPC(pthread_self_trampoline)), unsafe.Pointer(&t)) + return +} +func pthread_self_trampoline() + +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (unsafe.Pointer, int) { + args := struct { + addr unsafe.Pointer + n uintptr + prot, flags, fd int32 + off uint32 + ret1 unsafe.Pointer + ret2 int + }{addr, n, prot, flags, fd, off, nil, 0} + libcCall(unsafe.Pointer(funcPC(mmap_trampoline)), unsafe.Pointer(&args)) + return args.ret1, args.ret2 +} +func mmap_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func munmap(addr unsafe.Pointer, n uintptr) { + libcCall(unsafe.Pointer(funcPC(munmap_trampoline)), unsafe.Pointer(&addr)) +} +func munmap_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func madvise(addr unsafe.Pointer, n uintptr, flags int32) { + libcCall(unsafe.Pointer(funcPC(madvise_trampoline)), unsafe.Pointer(&addr)) +} +func madvise_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func read(fd int32, p unsafe.Pointer, n int32) int32 { + return libcCall(unsafe.Pointer(funcPC(read_trampoline)), unsafe.Pointer(&fd)) +} +func read_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func closefd(fd int32) int32 { + return libcCall(unsafe.Pointer(funcPC(close_trampoline)), unsafe.Pointer(&fd)) +} +func close_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func exit(code int32) { + libcCall(unsafe.Pointer(funcPC(exit_trampoline)), unsafe.Pointer(&code)) +} +func exit_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func usleep(usec uint32) { + libcCall(unsafe.Pointer(funcPC(usleep_trampoline)), unsafe.Pointer(&usec)) +} +func usleep_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func write(fd uintptr, p unsafe.Pointer, n int32) int32 { + return libcCall(unsafe.Pointer(funcPC(write_trampoline)), unsafe.Pointer(&fd)) +} +func write_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func open(name *byte, mode, perm int32) (ret int32) { + return libcCall(unsafe.Pointer(funcPC(open_trampoline)), unsafe.Pointer(&name)) +} +func open_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func nanotime() int64 { + var r struct { + t int64 // raw timer + numer, denom uint32 // conversion factors. nanoseconds = t * numer / denom. + } + libcCall(unsafe.Pointer(funcPC(nanotime_trampoline)), unsafe.Pointer(&r)) + // Note: Apple seems unconcerned about overflow here. See + // https://developer.apple.com/library/content/qa/qa1398/_index.html + // Note also, numer == denom == 1 is common. + t := r.t + if r.numer != 1 { + t *= int64(r.numer) + } + if r.denom != 1 { + t /= int64(r.denom) + } + return t +} +func nanotime_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func walltime() (int64, int32) { + var t timeval + libcCall(unsafe.Pointer(funcPC(walltime_trampoline)), unsafe.Pointer(&t)) + return int64(t.tv_sec), 1000 * t.tv_usec +} +func walltime_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func sigaction(sig uint32, new *usigactiont, old *usigactiont) { + libcCall(unsafe.Pointer(funcPC(sigaction_trampoline)), unsafe.Pointer(&sig)) +} +func sigaction_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func sigprocmask(how uint32, new *sigset, old *sigset) { + libcCall(unsafe.Pointer(funcPC(sigprocmask_trampoline)), unsafe.Pointer(&how)) +} +func sigprocmask_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func sigaltstack(new *stackt, old *stackt) { + if new != nil && new.ss_flags&_SS_DISABLE != 0 && new.ss_size == 0 { + // Despite the fact that Darwin's sigaltstack man page says it ignores the size + // when SS_DISABLE is set, it doesn't. sigaltstack returns ENOMEM + // if we don't give it a reasonable size. + // ref: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20140421/214296.html + new.ss_size = 32768 + } + libcCall(unsafe.Pointer(funcPC(sigaltstack_trampoline)), unsafe.Pointer(&new)) +} +func sigaltstack_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func raiseproc(sig uint32) { + libcCall(unsafe.Pointer(funcPC(raiseproc_trampoline)), unsafe.Pointer(&sig)) +} +func raiseproc_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func setitimer(mode int32, new, old *itimerval) { + libcCall(unsafe.Pointer(funcPC(setitimer_trampoline)), unsafe.Pointer(&mode)) +} +func setitimer_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32 { + return libcCall(unsafe.Pointer(funcPC(sysctl_trampoline)), unsafe.Pointer(&mib)) +} +func sysctl_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func fcntl(fd, cmd, arg int32) int32 { + return libcCall(unsafe.Pointer(funcPC(fcntl_trampoline)), unsafe.Pointer(&fd)) +} +func fcntl_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func kqueue() int32 { + v := libcCall(unsafe.Pointer(funcPC(kqueue_trampoline)), nil) + return v +} +func kqueue_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func kevent(kq int32, ch *keventt, nch int32, ev *keventt, nev int32, ts *timespec) int32 { + return libcCall(unsafe.Pointer(funcPC(kevent_trampoline)), unsafe.Pointer(&kq)) +} +func kevent_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_mutex_init(m *pthreadmutex, attr *pthreadmutexattr) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_mutex_init_trampoline)), unsafe.Pointer(&m)) +} +func pthread_mutex_init_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_mutex_lock(m *pthreadmutex) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_mutex_lock_trampoline)), unsafe.Pointer(&m)) +} +func pthread_mutex_lock_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_mutex_unlock(m *pthreadmutex) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_mutex_unlock_trampoline)), unsafe.Pointer(&m)) +} +func pthread_mutex_unlock_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_cond_init(c *pthreadcond, attr *pthreadcondattr) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_cond_init_trampoline)), unsafe.Pointer(&c)) +} +func pthread_cond_init_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_cond_wait(c *pthreadcond, m *pthreadmutex) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_cond_wait_trampoline)), unsafe.Pointer(&c)) +} +func pthread_cond_wait_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_cond_timedwait_relative_np(c *pthreadcond, m *pthreadmutex, t *timespec) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_cond_timedwait_relative_np_trampoline)), unsafe.Pointer(&c)) +} +func pthread_cond_timedwait_relative_np_trampoline() + +//go:nosplit +//go:cgo_unsafe_args +func pthread_cond_signal(c *pthreadcond) int32 { + return libcCall(unsafe.Pointer(funcPC(pthread_cond_signal_trampoline)), unsafe.Pointer(&c)) +} +func pthread_cond_signal_trampoline() + +// Not used on Darwin, but must be defined. +func exitThread(wait *uint32) { +} + +//go:nosplit +func closeonexec(fd int32) { + fcntl(fd, _F_SETFD, _FD_CLOEXEC) +} + +// Tell the linker that the libc_* functions are to be found +// in a system library, with the libc_ prefix missing. + +//go:cgo_import_dynamic libc_pthread_attr_init pthread_attr_init "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_attr_setstacksize pthread_attr_setstacksize "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_attr_setdetachstate pthread_attr_setdetachstate "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_create pthread_create "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_exit exit "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_raise raise "/usr/lib/libSystem.B.dylib" + +//go:cgo_import_dynamic libc_open open "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_close close "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_read read "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_write write "/usr/lib/libSystem.B.dylib" + +//go:cgo_import_dynamic libc_mmap mmap "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_munmap munmap "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_madvise madvise "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_error __error "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_usleep usleep "/usr/lib/libSystem.B.dylib" + +//go:cgo_import_dynamic libc_mach_timebase_info mach_timebase_info "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_mach_absolute_time mach_absolute_time "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_gettimeofday gettimeofday "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_sigaction sigaction "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_sigmask pthread_sigmask "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_sigaltstack sigaltstack "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_getpid getpid "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_kill kill "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_setitimer setitimer "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_fcntl fcntl "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_kqueue kqueue "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_kevent kevent "/usr/lib/libSystem.B.dylib" + +//go:cgo_import_dynamic libc_pthread_mutex_init pthread_mutex_init "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_mutex_lock pthread_mutex_lock "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_mutex_unlock pthread_mutex_unlock "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_cond_init pthread_cond_init "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_cond_wait pthread_cond_wait "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_cond_timedwait_relative_np pthread_cond_timedwait_relative_np "/usr/lib/libSystem.B.dylib" +//go:cgo_import_dynamic libc_pthread_cond_signal pthread_cond_signal "/usr/lib/libSystem.B.dylib" + +// Magic incantation to get libSystem actually dynamically linked. +// TODO: Why does the code require this? See cmd/compile/internal/ld/go.go:210 +//go:cgo_import_dynamic _ _ "/usr/lib/libSystem.B.dylib" diff --git a/libgo/go/runtime/sys_wasm.go b/libgo/go/runtime/sys_wasm.go new file mode 100644 index 0000000..9bf710b --- /dev/null +++ b/libgo/go/runtime/sys_wasm.go @@ -0,0 +1,42 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +type m0Stack struct { + _ [8192 * sys.StackGuardMultiplier]byte +} + +var wasmStack m0Stack + +func wasmMove() + +func wasmZero() + +func wasmDiv() + +func wasmTruncS() +func wasmTruncU() + +func wasmExit(code int32) + +// adjust Gobuf as it if executed a call to fn with context ctxt +// and then did an immediate gosave. +func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) { + sp := buf.sp + if sys.RegSize > sys.PtrSize { + sp -= sys.PtrSize + *(*uintptr)(unsafe.Pointer(sp)) = 0 + } + sp -= sys.PtrSize + *(*uintptr)(unsafe.Pointer(sp)) = buf.pc + buf.sp = sp + buf.pc = uintptr(fn) + buf.ctxt = ctxt +} diff --git a/libgo/go/runtime/testdata/testprog/abort.go b/libgo/go/runtime/testdata/testprog/abort.go new file mode 100644 index 0000000..9e79d4d --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/abort.go @@ -0,0 +1,23 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import _ "unsafe" // for go:linkname + +func init() { + register("Abort", Abort) +} + +//go:linkname runtimeAbort runtime.abort +func runtimeAbort() + +func Abort() { + defer func() { + recover() + panic("BAD: recovered from abort") + }() + runtimeAbort() + println("BAD: after abort") +} diff --git a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go index 035c534..42ee154 100644 --- a/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go +++ b/libgo/go/runtime/testdata/testprog/numcpu_freebsd.go @@ -9,12 +9,17 @@ import ( "fmt" "os" "os/exec" + "regexp" "runtime" "strconv" "strings" "syscall" ) +var ( + cpuSetRE = regexp.MustCompile(`(\d,?)+`) +) + func init() { register("FreeBSDNumCPU", FreeBSDNumCPU) register("FreeBSDNumCPUHelper", FreeBSDNumCPUHelper) @@ -105,8 +110,12 @@ func checkNCPU(list []string) error { return fmt.Errorf("could not check against an empty CPU list") } + cListString := cpuSetRE.FindString(listString) + if len(cListString) == 0 { + return fmt.Errorf("invalid cpuset output '%s'", listString) + } // Launch FreeBSDNumCPUHelper() with specified CPUs list. - cmd := exec.Command("cpuset", "-l", listString, os.Args[0], "FreeBSDNumCPUHelper") + cmd := exec.Command("cpuset", "-l", cListString, os.Args[0], "FreeBSDNumCPUHelper") cmdline := strings.Join(cmd.Args, " ") output, err := cmd.CombinedOutput() if err != nil { @@ -120,7 +129,7 @@ func checkNCPU(list []string) error { return fmt.Errorf("fail to parse output from child '%s', error: %s, output: %s", cmdline, err, output) } if n != len(list) { - return fmt.Errorf("runtime.NumCPU() expected to %d, got %d when run with CPU list %s", len(list), n, listString) + return fmt.Errorf("runtime.NumCPU() expected to %d, got %d when run with CPU list %s", len(list), n, cListString) } return nil } diff --git a/libgo/go/runtime/testdata/testprog/timeprof.go b/libgo/go/runtime/testdata/testprog/timeprof.go new file mode 100644 index 0000000..0702885 --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/timeprof.go @@ -0,0 +1,46 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "io/ioutil" + "os" + "runtime/pprof" + "time" +) + +func init() { + register("TimeProf", TimeProf) +} + +func TimeProf() { + f, err := ioutil.TempFile("", "timeprof") + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(2) + } + + if err := pprof.StartCPUProfile(f); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(2) + } + + t0 := time.Now() + // We should get a profiling signal 100 times a second, + // so running for 1/10 second should be sufficient. + for time.Since(t0) < time.Second/10 { + } + + pprof.StopCPUProfile() + + name := f.Name() + if err := f.Close(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(2) + } + + fmt.Println(name) +} diff --git a/libgo/go/runtime/testdata/testprog/traceback_ancestors.go b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go new file mode 100644 index 0000000..fe57c1c --- /dev/null +++ b/libgo/go/runtime/testdata/testprog/traceback_ancestors.go @@ -0,0 +1,53 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "runtime" +) + +func init() { + register("TracebackAncestors", TracebackAncestors) +} + +const numGoroutines = 3 +const numFrames = 2 + +func TracebackAncestors() { + w := make(chan struct{}) + recurseThenCallGo(w, numGoroutines, numFrames) + <-w + printStack() + close(w) +} + +func printStack() { + buf := make([]byte, 1024) + for { + n := runtime.Stack(buf, true) + if n < len(buf) { + fmt.Print(string(buf[:n])) + return + } + buf = make([]byte, 2*len(buf)) + } +} + +func recurseThenCallGo(w chan struct{}, frames int, goroutines int) { + if frames == 0 { + // Signal to TracebackAncestors that we are done recursing and starting goroutines. + w <- struct{}{} + <-w + return + } + if goroutines == 0 { + // Start the next goroutine now that there are no more recursions left + // for this current goroutine. + go recurseThenCallGo(w, frames-1, numFrames) + return + } + recurseThenCallGo(w, frames, goroutines-1) +} diff --git a/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go new file mode 100644 index 0000000..f58fcf9 --- /dev/null +++ b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.go @@ -0,0 +1,27 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +/* +typedef void callback(char*); +extern void goBigStack1(char*); +extern void bigStack(callback*); +*/ +import "C" + +func init() { + register("BigStack", BigStack) +} + +func BigStack() { + // Create a large thread stack and call back into Go to test + // if Go correctly determines the stack bounds. + C.bigStack((*C.callback)(C.goBigStack1)) +} + +//export goBigStack1 +func goBigStack1(x *C.char) { + println("OK") +} diff --git a/libgo/go/runtime/testdata/testprogcgo/raceprof.go b/libgo/go/runtime/testdata/testprogcgo/raceprof.go index 466a367..0750ec1 100644 --- a/libgo/go/runtime/testdata/testprogcgo/raceprof.go +++ b/libgo/go/runtime/testdata/testprogcgo/raceprof.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build linux,amd64 +// +build linux,amd64 freebsd,amd64 // +build !gccgo package main diff --git a/libgo/go/runtime/testdata/testprogcgo/racesig.go b/libgo/go/runtime/testdata/testprogcgo/racesig.go index d0c1c3c..a079b3f 100644 --- a/libgo/go/runtime/testdata/testprogcgo/racesig.go +++ b/libgo/go/runtime/testdata/testprogcgo/racesig.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build linux,amd64 +// +build linux,amd64 freebsd,amd64 package main diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go index b707590..a95d95b 100644 --- a/libgo/go/runtime/time.go +++ b/libgo/go/runtime/time.go @@ -98,8 +98,11 @@ func timeSleep(ns int64) { t.arg = gp tb := t.assignBucket() lock(&tb.lock) - tb.addtimerLocked(t) - goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2) + if !tb.addtimerLocked(t) { + unlock(&tb.lock) + badTimer() + } + goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2) } // startTimer adds t to the timer heap. @@ -128,14 +131,19 @@ func goroutineReady(arg interface{}, seq uintptr) { func addtimer(t *timer) { tb := t.assignBucket() lock(&tb.lock) - tb.addtimerLocked(t) + ok := tb.addtimerLocked(t) unlock(&tb.lock) + if !ok { + badTimer() + } } // Add a timer to the heap and start or kick timerproc if the new timer is // earlier than any of the others. // Timers are locked. -func (tb *timersBucket) addtimerLocked(t *timer) { +// Returns whether all is well: false if the data structure is corrupt +// due to user-level races. +func (tb *timersBucket) addtimerLocked(t *timer) bool { // when must never be negative; otherwise timerproc will overflow // during its delta calculation and never expire other runtime timers. if t.when < 0 { @@ -143,7 +151,9 @@ func (tb *timersBucket) addtimerLocked(t *timer) { } t.i = len(tb.t) tb.t = append(tb.t, t) - siftupTimer(tb.t, t.i) + if !siftupTimer(tb.t, t.i) { + return false + } if t.i == 0 { // siftup moved to top: new earliest deadline. if tb.sleeping { @@ -160,6 +170,7 @@ func (tb *timersBucket) addtimerLocked(t *timer) { expectSystemGoroutine() go timerproc(tb) } + return true } // Delete timer t from the heap. @@ -192,11 +203,19 @@ func deltimer(t *timer) bool { } tb.t[last] = nil tb.t = tb.t[:last] + ok := true if i != last { - siftupTimer(tb.t, i) - siftdownTimer(tb.t, i) + if !siftupTimer(tb.t, i) { + ok = false + } + if !siftdownTimer(tb.t, i) { + ok = false + } } unlock(&tb.lock) + if !ok { + badTimer() + } return true } @@ -222,10 +241,13 @@ func timerproc(tb *timersBucket) { if delta > 0 { break } + ok := true if t.period > 0 { // leave in heap but adjust next time to fire t.when += t.period * (1 + -delta/t.period) - siftdownTimer(tb.t, 0) + if !siftdownTimer(tb.t, 0) { + ok = false + } } else { // remove from heap last := len(tb.t) - 1 @@ -236,7 +258,9 @@ func timerproc(tb *timersBucket) { tb.t[last] = nil tb.t = tb.t[:last] if last > 0 { - siftdownTimer(tb.t, 0) + if !siftdownTimer(tb.t, 0) { + ok = false + } } t.i = -1 // mark as removed } @@ -244,6 +268,9 @@ func timerproc(tb *timersBucket) { arg := t.arg seq := t.seq unlock(&tb.lock) + if !ok { + badTimer() + } if raceenabled { raceacquire(unsafe.Pointer(t)) } @@ -253,7 +280,7 @@ func timerproc(tb *timersBucket) { if delta < 0 || faketime > 0 { // No timers left - put goroutine to sleep. tb.rescheduling = true - goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1) + goparkunlock(&tb.lock, waitReasonTimerGoroutineIdle, traceEvGoBlock, 1) continue } // At least one timer pending. Sleep until then. @@ -329,8 +356,20 @@ func timeSleepUntil() int64 { } // Heap maintenance algorithms. - -func siftupTimer(t []*timer, i int) { +// These algorithms check for slice index errors manually. +// Slice index error can happen if the program is using racy +// access to timers. We don't want to panic here, because +// it will cause the program to crash with a mysterious +// "panic holding locks" message. Instead, we panic while not +// holding a lock. +// The races can occur despite the bucket locks because assignBucket +// itself is called without locks, so racy calls can cause a timer to +// change buckets while executing these functions. + +func siftupTimer(t []*timer, i int) bool { + if i >= len(t) { + return false + } when := t[i].when tmp := t[i] for i > 0 { @@ -346,10 +385,14 @@ func siftupTimer(t []*timer, i int) { t[i] = tmp t[i].i = i } + return true } -func siftdownTimer(t []*timer, i int) { +func siftdownTimer(t []*timer, i int) bool { n := len(t) + if i >= n { + return false + } when := t[i].when tmp := t[i] for { @@ -385,6 +428,15 @@ func siftdownTimer(t []*timer, i int) { t[i] = tmp t[i].i = i } + return true +} + +// badTimer is called if the timer data structures have been corrupted, +// presumably due to racy use by the program. We panic here rather than +// panicing due to invalid slice access while holding locks. +// See issue #25686. +func badTimer() { + panic(errorString("racy use of timers")) } // Entry points for net, time to call nanotime. diff --git a/libgo/go/runtime/timeasm.go b/libgo/go/runtime/timeasm.go index d5f5ea3..55b0d07 100644 --- a/libgo/go/runtime/timeasm.go +++ b/libgo/go/runtime/timeasm.go @@ -7,7 +7,7 @@ // so that time.now and nanotime return the same monotonic clock readings. // +build ignore -// +build darwin,amd64 darwin,386 windows +// +build windows package runtime diff --git a/libgo/go/runtime/timestub.go b/libgo/go/runtime/timestub.go index 033734e..9f1d111 100644 --- a/libgo/go/runtime/timestub.go +++ b/libgo/go/runtime/timestub.go @@ -5,15 +5,12 @@ // Declarations for operating systems implementing time.now // indirectly, in terms of walltime and nanotime assembly. -// -build !darwin !amd64,!386 // -build !windows package runtime import _ "unsafe" // for go:linkname -func walltime() (sec int64, nsec int32) - //go:linkname time_now time.now func time_now() (sec int64, nsec int32, mono int64) { sec, nsec = walltime() diff --git a/libgo/go/runtime/timestub2.go b/libgo/go/runtime/timestub2.go new file mode 100644 index 0000000..9ddc6fe --- /dev/null +++ b/libgo/go/runtime/timestub2.go @@ -0,0 +1,11 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !darwin +// +build !windows +// +build !freebsd + +package runtime + +func walltime() (sec int64, nsec int32) diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go index e2bbb5d..7aed9a9 100644 --- a/libgo/go/runtime/trace.go +++ b/libgo/go/runtime/trace.go @@ -64,7 +64,14 @@ const ( traceEvGoBlockGC = 42 // goroutine blocks on GC assist [timestamp, stack] traceEvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack] traceEvGCMarkAssistDone = 44 // GC mark assist done [timestamp] - traceEvCount = 45 + traceEvUserTaskCreate = 45 // trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string] + traceEvUserTaskEnd = 46 // end of a task [timestamp, internal task id, stack] + traceEvUserRegion = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string] + traceEvUserLog = 48 // trace.Log [timestamp, internal task id, key string id, stack, value string] + traceEvCount = 49 + // Byte is used but only 6 bits are available for event type. + // The remaining 2 bits are used to specify the number of arguments. + // That means, the max event type value is 63. ) const ( @@ -121,11 +128,13 @@ var trace struct { // Dictionary for traceEvString. // - // Currently this is used only at trace setup and for - // func/file:line info after tracing session, so we assume - // single-threaded access. - strings map[string]uint64 - stringSeq uint64 + // TODO: central lock to access the map is not ideal. + // option: pre-assign ids to all user annotation region names and tags + // option: per-P cache + // option: sync.Map like data structure + stringsLock mutex + strings map[string]uint64 + stringSeq uint64 // markWorkerLabels maps gcMarkWorkerMode to string ID. markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64 @@ -379,12 +388,12 @@ func ReadTrace() []byte { trace.headerWritten = true trace.lockOwner = nil unlock(&trace.lock) - return []byte("go 1.10 trace\x00\x00\x00") + return []byte("go 1.11 trace\x00\x00\x00") } // Wait for new data. if trace.fullHead == 0 && !trace.shutdown { trace.reader.set(getg()) - goparkunlock(&trace.lock, "trace reader (blocked)", traceEvGoBlock, 2) + goparkunlock(&trace.lock, waitReasonTraceReaderBlocked, traceEvGoBlock, 2) lock(&trace.lock) } // Write a buffer. @@ -507,12 +516,26 @@ func traceEvent(ev byte, skip int, args ...uint64) { // so if we see trace.enabled == true now, we know it's true for the rest of the function. // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace // during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer. + // + // Note trace_userTaskCreate runs the same check. if !trace.enabled && !mp.startingtrace { traceReleaseBuffer(pid) return } + + if skip > 0 { + if getg() == mp.curg { + skip++ // +1 because stack is captured in traceEventLocked. + } + } + traceEventLocked(0, mp, pid, bufp, ev, skip, args...) + traceReleaseBuffer(pid) +} + +func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev byte, skip int, args ...uint64) { buf := (*bufp).ptr() - const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params + // TODO: test on non-zero extraBytes param. + maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params if buf == nil || len(buf.arr)-buf.pos < maxSize { buf = traceFlush(traceBufPtrOf(buf), pid).ptr() (*bufp).set(buf) @@ -555,7 +578,6 @@ func traceEvent(ev byte, skip int, args ...uint64) { // Fill in actual length. *lenp = byte(evSize - 2) } - traceReleaseBuffer(pid) } func traceStackID(mp *m, buf []location, skip int) uint64 { @@ -636,7 +658,20 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) if s == "" { return 0, bufp } + + lock(&trace.stringsLock) + if raceenabled { + // raceacquire is necessary because the map access + // below is race annotated. + raceacquire(unsafe.Pointer(&trace.stringsLock)) + } + if id, ok := trace.strings[s]; ok { + if raceenabled { + racerelease(unsafe.Pointer(&trace.stringsLock)) + } + unlock(&trace.stringsLock) + return id, bufp } @@ -644,6 +679,11 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) id := trace.stringSeq trace.strings[s] = id + if raceenabled { + racerelease(unsafe.Pointer(&trace.stringsLock)) + } + unlock(&trace.stringsLock) + // memory allocation in above may trigger tracing and // cause *bufp changes. Following code now works with *bufp, // so there must be no memory allocation or any activities @@ -657,8 +697,16 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr) } buf.byte(traceEvString) buf.varint(id) - buf.varint(uint64(len(s))) - buf.pos += copy(buf.arr[buf.pos:], s) + + // double-check the string and the length can fit. + // Otherwise, truncate the string. + slen := len(s) + if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber { + slen = room + } + + buf.varint(uint64(slen)) + buf.pos += copy(buf.arr[buf.pos:], s[:slen]) (*bufp).set(buf) return id, bufp @@ -1091,3 +1139,78 @@ func traceNextGC() { traceEvent(traceEvNextGC, -1, memstats.next_gc) } } + +// To access runtime functions from runtime/trace. +// See runtime/trace/annotation.go + +//go:linkname trace_userTaskCreate runtime_trace.userTaskCreate +func trace_userTaskCreate(id, parentID uint64, taskType string) { + if !trace.enabled { + return + } + + // Same as in traceEvent. + mp, pid, bufp := traceAcquireBuffer() + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + typeStringID, bufp := traceString(bufp, pid, taskType) + traceEventLocked(0, mp, pid, bufp, traceEvUserTaskCreate, 3, id, parentID, typeStringID) + traceReleaseBuffer(pid) +} + +//go:linkname trace_userTaskEnd runtime_trace.userTaskEnd +func trace_userTaskEnd(id uint64) { + traceEvent(traceEvUserTaskEnd, 2, id) +} + +//go:linkname trace_userRegion runtime_trace.userRegion +func trace_userRegion(id, mode uint64, name string) { + if !trace.enabled { + return + } + + mp, pid, bufp := traceAcquireBuffer() + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + nameStringID, bufp := traceString(bufp, pid, name) + traceEventLocked(0, mp, pid, bufp, traceEvUserRegion, 3, id, mode, nameStringID) + traceReleaseBuffer(pid) +} + +//go:linkname trace_userLog runtime_trace.userLog +func trace_userLog(id uint64, category, message string) { + if !trace.enabled { + return + } + + mp, pid, bufp := traceAcquireBuffer() + if !trace.enabled && !mp.startingtrace { + traceReleaseBuffer(pid) + return + } + + categoryID, bufp := traceString(bufp, pid, category) + + extraSpace := traceBytesPerNumber + len(message) // extraSpace for the value string + traceEventLocked(extraSpace, mp, pid, bufp, traceEvUserLog, 3, id, categoryID) + // traceEventLocked reserved extra space for val and len(val) + // in buf, so buf now has room for the following. + buf := (*bufp).ptr() + + // double-check the message and its length can fit. + // Otherwise, truncate the message. + slen := len(message) + if room := len(buf.arr) - buf.pos; room < slen+traceBytesPerNumber { + slen = room + } + buf.varint(uint64(slen)) + buf.pos += copy(buf.arr[buf.pos:], message[:slen]) + + traceReleaseBuffer(pid) +} diff --git a/libgo/go/runtime/trace/annotation.go b/libgo/go/runtime/trace/annotation.go new file mode 100644 index 0000000..3545ef3 --- /dev/null +++ b/libgo/go/runtime/trace/annotation.go @@ -0,0 +1,196 @@ +package trace + +import ( + "context" + "fmt" + "sync/atomic" + _ "unsafe" +) + +type traceContextKey struct{} + +// NewTask creates a task instance with the type taskType and returns +// it along with a Context that carries the task. +// If the input context contains a task, the new task is its subtask. +// +// The taskType is used to classify task instances. Analysis tools +// like the Go execution tracer may assume there are only a bounded +// number of unique task types in the system. +// +// The returned end function is used to mark the task's end. +// The trace tool measures task latency as the time between task creation +// and when the end function is called, and provides the latency +// distribution per task type. +// If the end function is called multiple times, only the first +// call is used in the latency measurement. +// +// ctx, task := trace.NewTask(ctx, "awesome task") +// trace.WithRegion(ctx, prepWork) +// // preparation of the task +// go func() { // continue processing the task in a separate goroutine. +// defer task.End() +// trace.WithRegion(ctx, remainingWork) +// } +func NewTask(pctx context.Context, taskType string) (ctx context.Context, task *Task) { + pid := fromContext(pctx).id + id := newID() + userTaskCreate(id, pid, taskType) + s := &Task{id: id} + return context.WithValue(pctx, traceContextKey{}, s), s + + // We allocate a new task and the end function even when + // the tracing is disabled because the context and the detach + // function can be used across trace enable/disable boundaries, + // which complicates the problem. + // + // For example, consider the following scenario: + // - trace is enabled. + // - trace.WithRegion is called, so a new context ctx + // with a new region is created. + // - trace is disabled. + // - trace is enabled again. + // - trace APIs with the ctx is called. Is the ID in the task + // a valid one to use? + // + // TODO(hyangah): reduce the overhead at least when + // tracing is disabled. Maybe the id can embed a tracing + // round number and ignore ids generated from previous + // tracing round. +} + +func fromContext(ctx context.Context) *Task { + if s, ok := ctx.Value(traceContextKey{}).(*Task); ok { + return s + } + return &bgTask +} + +// Task is a data type for tracing a user-defined, logical operation. +type Task struct { + id uint64 + // TODO(hyangah): record parent id? +} + +// End marks the end of the operation represented by the Task. +func (t *Task) End() { + userTaskEnd(t.id) +} + +var lastTaskID uint64 = 0 // task id issued last time + +func newID() uint64 { + // TODO(hyangah): use per-P cache + return atomic.AddUint64(&lastTaskID, 1) +} + +var bgTask = Task{id: uint64(0)} + +// Log emits a one-off event with the given category and message. +// Category can be empty and the API assumes there are only a handful of +// unique categories in the system. +func Log(ctx context.Context, category, message string) { + id := fromContext(ctx).id + userLog(id, category, message) +} + +// Logf is like Log, but the value is formatted using the specified format spec. +func Logf(ctx context.Context, category, format string, args ...interface{}) { + if IsEnabled() { + // Ideally this should be just Log, but that will + // add one more frame in the stack trace. + id := fromContext(ctx).id + userLog(id, category, fmt.Sprintf(format, args...)) + } +} + +const ( + regionStartCode = uint64(0) + regionEndCode = uint64(1) +) + +// WithRegion starts a region associated with its calling goroutine, runs fn, +// and then ends the region. If the context carries a task, the region is +// associated with the task. Otherwise, the region is attached to the background +// task. +// +// The regionType is used to classify regions, so there should be only a +// handful of unique region types. +func WithRegion(ctx context.Context, regionType string, fn func()) { + // NOTE: + // WithRegion helps avoiding misuse of the API but in practice, + // this is very restrictive: + // - Use of WithRegion makes the stack traces captured from + // region start and end are identical. + // - Refactoring the existing code to use WithRegion is sometimes + // hard and makes the code less readable. + // e.g. code block nested deep in the loop with various + // exit point with return values + // - Refactoring the code to use this API with closure can + // cause different GC behavior such as retaining some parameters + // longer. + // This causes more churns in code than I hoped, and sometimes + // makes the code less readable. + + id := fromContext(ctx).id + userRegion(id, regionStartCode, regionType) + defer userRegion(id, regionEndCode, regionType) + fn() +} + +// StartRegion starts a region and returns a function for marking the +// end of the region. The returned Region's End function must be called +// from the same goroutine where the region was started. +// Within each goroutine, regions must nest. That is, regions started +// after this region must be ended before this region can be ended. +// Recommended usage is +// +// defer trace.StartRegion(ctx, "myTracedRegion").End() +// +func StartRegion(ctx context.Context, regionType string) *Region { + if !IsEnabled() { + return noopRegion + } + id := fromContext(ctx).id + userRegion(id, regionStartCode, regionType) + return &Region{id, regionType} +} + +// Region is a region of code whose execution time interval is traced. +type Region struct { + id uint64 + regionType string +} + +var noopRegion = &Region{} + +// End marks the end of the traced code region. +func (r *Region) End() { + if r == noopRegion { + return + } + userRegion(r.id, regionEndCode, r.regionType) +} + +// IsEnabled returns whether tracing is enabled. +// The information is advisory only. The tracing status +// may have changed by the time this function returns. +func IsEnabled() bool { + enabled := atomic.LoadInt32(&tracing.enabled) + return enabled == 1 +} + +// +// Function bodies are defined in runtime/trace.go +// + +// emits UserTaskCreate event. +func userTaskCreate(id, parentID uint64, taskType string) + +// emits UserTaskEnd event. +func userTaskEnd(id uint64) + +// emits UserRegion event. +func userRegion(id, mode uint64, regionType string) + +// emits UserLog event. +func userLog(id uint64, category, message string) diff --git a/libgo/go/runtime/trace/annotation_test.go b/libgo/go/runtime/trace/annotation_test.go new file mode 100644 index 0000000..71abbfc --- /dev/null +++ b/libgo/go/runtime/trace/annotation_test.go @@ -0,0 +1,152 @@ +package trace_test + +import ( + "bytes" + "context" + "fmt" + "internal/trace" + "reflect" + . "runtime/trace" + "strings" + "sync" + "testing" +) + +func BenchmarkStartRegion(b *testing.B) { + b.ReportAllocs() + ctx, task := NewTask(context.Background(), "benchmark") + defer task.End() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + StartRegion(ctx, "region").End() + } + }) +} + +func BenchmarkNewTask(b *testing.B) { + b.ReportAllocs() + pctx, task := NewTask(context.Background(), "benchmark") + defer task.End() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _, task := NewTask(pctx, "task") + task.End() + } + }) +} + +func TestUserTaskRegion(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } + bgctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + preExistingRegion := StartRegion(bgctx, "pre-existing region") + + buf := new(bytes.Buffer) + if err := Start(buf); err != nil { + t.Fatalf("failed to start tracing: %v", err) + } + + // Beginning of traced execution + var wg sync.WaitGroup + ctx, task := NewTask(bgctx, "task0") // EvUserTaskCreate("task0") + wg.Add(1) + go func() { + defer wg.Done() + defer task.End() // EvUserTaskEnd("task0") + + WithRegion(ctx, "region0", func() { + // EvUserRegionCreate("region0", start) + WithRegion(ctx, "region1", func() { + Log(ctx, "key0", "0123456789abcdef") // EvUserLog("task0", "key0", "0....f") + }) + // EvUserRegion("region0", end) + }) + }() + + wg.Wait() + + preExistingRegion.End() + postExistingRegion := StartRegion(bgctx, "post-existing region") + + // End of traced execution + Stop() + + postExistingRegion.End() + + saveTrace(t, buf, "TestUserTaskRegion") + res, err := trace.Parse(buf, "") + if err == trace.ErrTimeOrder { + // golang.org/issues/16755 + t.Skipf("skipping trace: %v", err) + } + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + + // Check whether we see all user annotation related records in order + type testData struct { + typ byte + strs []string + args []uint64 + setLink bool + } + + var got []testData + tasks := map[uint64]string{} + for _, e := range res.Events { + t.Logf("%s", e) + switch e.Type { + case trace.EvUserTaskCreate: + taskName := e.SArgs[0] + got = append(got, testData{trace.EvUserTaskCreate, []string{taskName}, nil, e.Link != nil}) + if e.Link != nil && e.Link.Type != trace.EvUserTaskEnd { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + tasks[e.Args[0]] = taskName + case trace.EvUserLog: + key, val := e.SArgs[0], e.SArgs[1] + taskName := tasks[e.Args[0]] + got = append(got, testData{trace.EvUserLog, []string{taskName, key, val}, nil, e.Link != nil}) + case trace.EvUserTaskEnd: + taskName := tasks[e.Args[0]] + got = append(got, testData{trace.EvUserTaskEnd, []string{taskName}, nil, e.Link != nil}) + if e.Link != nil && e.Link.Type != trace.EvUserTaskCreate { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + case trace.EvUserRegion: + taskName := tasks[e.Args[0]] + regionName := e.SArgs[0] + got = append(got, testData{trace.EvUserRegion, []string{taskName, regionName}, []uint64{e.Args[1]}, e.Link != nil}) + if e.Link != nil && (e.Link.Type != trace.EvUserRegion || e.Link.SArgs[0] != regionName) { + t.Errorf("Unexpected linked event %q->%q", e, e.Link) + } + } + } + want := []testData{ + {trace.EvUserTaskCreate, []string{"task0"}, nil, true}, + {trace.EvUserRegion, []string{"task0", "region0"}, []uint64{0}, true}, + {trace.EvUserRegion, []string{"task0", "region1"}, []uint64{0}, true}, + {trace.EvUserLog, []string{"task0", "key0", "0123456789abcdef"}, nil, false}, + {trace.EvUserRegion, []string{"task0", "region1"}, []uint64{1}, false}, + {trace.EvUserRegion, []string{"task0", "region0"}, []uint64{1}, false}, + {trace.EvUserTaskEnd, []string{"task0"}, nil, false}, + // Currently, pre-existing region is not recorded to avoid allocations. + // {trace.EvUserRegion, []string{"", "pre-existing region"}, []uint64{1}, false}, + {trace.EvUserRegion, []string{"", "post-existing region"}, []uint64{0}, false}, + } + if !reflect.DeepEqual(got, want) { + pretty := func(data []testData) string { + var s strings.Builder + for _, d := range data { + s.WriteString(fmt.Sprintf("\t%+v\n", d)) + } + return s.String() + } + t.Errorf("Got user region related events\n%+v\nwant:\n%+v", pretty(got), pretty(want)) + } +} diff --git a/libgo/go/runtime/trace/trace.go b/libgo/go/runtime/trace/trace.go index 439f998..7f9d72a 100644 --- a/libgo/go/runtime/trace/trace.go +++ b/libgo/go/runtime/trace/trace.go @@ -2,8 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package trace contains facilities for programs to generate trace -// for Go execution tracer. +// Package trace contains facilities for programs to generate traces +// for the Go execution tracer. +// +// Tracing runtime activities // // The execution trace captures a wide range of execution events such as // goroutine creation/blocking/unblocking, syscall enter/exit/block, @@ -12,8 +14,6 @@ // captured for most events. The generated trace can be interpreted // using `go tool trace`. // -// Tracing a Go program -// // Support for tracing tests and benchmarks built with the standard // testing package is built into `go test`. For example, the following // command runs the test in the current directory and writes the trace @@ -25,24 +25,102 @@ // support to a standalone program. See the Example that demonstrates // how to use this API to enable tracing. // -// There is also a standard HTTP interface to profiling data. Adding the -// following line will install handlers under the /debug/pprof/trace URL -// to download live profiles: +// There is also a standard HTTP interface to trace data. Adding the +// following line will install a handler under the /debug/pprof/trace URL +// to download a live trace: // // import _ "net/http/pprof" // -// See the net/http/pprof package for more details. +// See the net/http/pprof package for more details about all of the +// debug endpoints installed by this import. +// +// User annotation +// +// Package trace provides user annotation APIs that can be used to +// log interesting events during execution. +// +// There are three types of user annotations: log messages, regions, +// and tasks. +// +// Log emits a timestamped message to the execution trace along with +// additional information such as the category of the message and +// which goroutine called Log. The execution tracer provides UIs to filter +// and group goroutines using the log category and the message supplied +// in Log. +// +// A region is for logging a time interval during a goroutine's execution. +// By definition, a region starts and ends in the same goroutine. +// Regions can be nested to represent subintervals. +// For example, the following code records four regions in the execution +// trace to trace the durations of sequential steps in a cappuccino making +// operation. +// +// trace.WithRegion(ctx, "makeCappuccino", func() { +// +// // orderID allows to identify a specific order +// // among many cappuccino order region records. +// trace.Log(ctx, "orderID", orderID) +// +// trace.WithRegion(ctx, "steamMilk", steamMilk) +// trace.WithRegion(ctx, "extractCoffee", extractCoffee) +// trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee) +// }) +// +// A task is a higher-level component that aids tracing of logical +// operations such as an RPC request, an HTTP request, or an +// interesting local operation which may require multiple goroutines +// working together. Since tasks can involve multiple goroutines, +// they are tracked via a context.Context object. NewTask creates +// a new task and embeds it in the returned context.Context object. +// Log messages and regions are attached to the task, if any, in the +// Context passed to Log and WithRegion. +// +// For example, assume that we decided to froth milk, extract coffee, +// and mix milk and coffee in separate goroutines. With a task, +// the trace tool can identify the goroutines involved in a specific +// cappuccino order. +// +// ctx, task := trace.NewTask(ctx, "makeCappuccino") +// trace.Log(ctx, "orderID", orderID) +// +// milk := make(chan bool) +// espresso := make(chan bool) +// +// go func() { +// trace.WithRegion(ctx, "steamMilk", steamMilk) +// milk <- true +// }() +// go func() { +// trace.WithRegion(ctx, "extractCoffee", extractCoffee) +// espresso <- true +// }() +// go func() { +// defer task.End() // When assemble is done, the order is complete. +// <-espresso +// <-milk +// trace.WithRegion(ctx, "mixMilkCoffee", mixMilkCoffee) +// }() +// +// +// The trace tool computes the latency of a task by measuring the +// time between the task creation and the task end and provides +// latency distributions for each task type found in the trace. package trace import ( "io" "runtime" + "sync" + "sync/atomic" ) // Start enables tracing for the current program. // While tracing, the trace will be buffered and written to w. // Start returns an error if tracing is already enabled. func Start(w io.Writer) error { + tracing.Lock() + defer tracing.Unlock() + if err := runtime.StartTrace(); err != nil { return err } @@ -55,11 +133,21 @@ func Start(w io.Writer) error { w.Write(data) } }() + atomic.StoreInt32(&tracing.enabled, 1) return nil } // Stop stops the current tracing, if any. // Stop only returns after all the writes for the trace have completed. func Stop() { + tracing.Lock() + defer tracing.Unlock() + atomic.StoreInt32(&tracing.enabled, 0) + runtime.StopTrace() } + +var tracing struct { + sync.Mutex // gate mutators (Start, Stop) + enabled int32 // accessed via atomic +} diff --git a/libgo/go/runtime/trace/trace_stack_test.go b/libgo/go/runtime/trace/trace_stack_test.go index 274cdf7..62c06e6 100644 --- a/libgo/go/runtime/trace/trace_stack_test.go +++ b/libgo/go/runtime/trace/trace_stack_test.go @@ -6,14 +6,17 @@ package trace_test import ( "bytes" + "fmt" "internal/testenv" "internal/trace" "net" "os" "runtime" . "runtime/trace" + "strings" "sync" "testing" + "text/tabwriter" "time" ) @@ -21,7 +24,7 @@ import ( // In particular that we strip bottom uninteresting frames like goexit, // top uninteresting frames (runtime guts). func TestTraceSymbolize(t *testing.T) { - testenv.MustHaveGoBuild(t) + skipTraceSymbolizeTestIfNecessary(t) buf := new(bytes.Buffer) if err := Start(buf); err != nil { @@ -34,28 +37,28 @@ func TestTraceSymbolize(t *testing.T) { // on a channel, in a select or otherwise. So we kick off goroutines // that need to block first in the hope that while we are executing // the rest of the test, they will block. - go func() { + go func() { // func1 select {} }() - go func() { + go func() { // func2 var c chan int c <- 0 }() - go func() { + go func() { // func3 var c chan int <-c }() done1 := make(chan bool) - go func() { + go func() { // func4 <-done1 }() done2 := make(chan bool) - go func() { + go func() { // func5 done2 <- true }() c1 := make(chan int) c2 := make(chan int) - go func() { + go func() { // func6 select { case <-c1: case <-c2: @@ -63,17 +66,17 @@ func TestTraceSymbolize(t *testing.T) { }() var mu sync.Mutex mu.Lock() - go func() { + go func() { // func7 mu.Lock() mu.Unlock() }() var wg sync.WaitGroup wg.Add(1) - go func() { + go func() { // func8 wg.Wait() }() cv := sync.NewCond(&sync.Mutex{}) - go func() { + go func() { // func9 cv.L.Lock() cv.Wait() cv.L.Unlock() @@ -82,7 +85,7 @@ func TestTraceSymbolize(t *testing.T) { if err != nil { t.Fatalf("failed to listen: %v", err) } - go func() { + go func() { // func10 c, err := ln.Accept() if err != nil { t.Errorf("failed to accept: %v", err) @@ -97,7 +100,7 @@ func TestTraceSymbolize(t *testing.T) { defer rp.Close() defer wp.Close() pipeReadDone := make(chan bool) - go func() { + go func() { // func11 var data [1]byte rp.Read(data[:]) pipeReadDone <- true @@ -125,14 +128,16 @@ func TestTraceSymbolize(t *testing.T) { wp.Write(data[:]) <-pipeReadDone + oldGoMaxProcs := runtime.GOMAXPROCS(0) + runtime.GOMAXPROCS(oldGoMaxProcs + 1) + Stop() + + runtime.GOMAXPROCS(oldGoMaxProcs) + events, _ := parseTrace(t, buf) // Now check that the stacks are correct. - type frame struct { - Fn string - Line int - } type eventDesc struct { Type byte Stk []frame @@ -140,90 +145,96 @@ func TestTraceSymbolize(t *testing.T) { want := []eventDesc{ {trace.EvGCStart, []frame{ {"runtime.GC", 0}, - {"runtime/trace_test.TestTraceSymbolize", 107}, + {"runtime/trace_test.TestTraceSymbolize", 0}, {"testing.tRunner", 0}, }}, {trace.EvGoStart, []frame{ - {"runtime/trace_test.TestTraceSymbolize.func1", 37}, + {"runtime/trace_test.TestTraceSymbolize.func1", 0}, }}, {trace.EvGoSched, []frame{ - {"runtime/trace_test.TestTraceSymbolize", 108}, + {"runtime/trace_test.TestTraceSymbolize", 111}, {"testing.tRunner", 0}, }}, {trace.EvGoCreate, []frame{ - {"runtime/trace_test.TestTraceSymbolize", 37}, + {"runtime/trace_test.TestTraceSymbolize", 40}, {"testing.tRunner", 0}, }}, {trace.EvGoStop, []frame{ {"runtime.block", 0}, - {"runtime/trace_test.TestTraceSymbolize.func1", 38}, + {"runtime/trace_test.TestTraceSymbolize.func1", 0}, }}, {trace.EvGoStop, []frame{ {"runtime.chansend1", 0}, - {"runtime/trace_test.TestTraceSymbolize.func2", 42}, + {"runtime/trace_test.TestTraceSymbolize.func2", 0}, }}, {trace.EvGoStop, []frame{ {"runtime.chanrecv1", 0}, - {"runtime/trace_test.TestTraceSymbolize.func3", 46}, + {"runtime/trace_test.TestTraceSymbolize.func3", 0}, }}, {trace.EvGoBlockRecv, []frame{ {"runtime.chanrecv1", 0}, - {"runtime/trace_test.TestTraceSymbolize.func4", 50}, + {"runtime/trace_test.TestTraceSymbolize.func4", 0}, }}, {trace.EvGoUnblock, []frame{ {"runtime.chansend1", 0}, - {"runtime/trace_test.TestTraceSymbolize", 110}, + {"runtime/trace_test.TestTraceSymbolize", 113}, {"testing.tRunner", 0}, }}, {trace.EvGoBlockSend, []frame{ {"runtime.chansend1", 0}, - {"runtime/trace_test.TestTraceSymbolize.func5", 54}, + {"runtime/trace_test.TestTraceSymbolize.func5", 0}, }}, {trace.EvGoUnblock, []frame{ {"runtime.chanrecv1", 0}, - {"runtime/trace_test.TestTraceSymbolize", 111}, + {"runtime/trace_test.TestTraceSymbolize", 114}, {"testing.tRunner", 0}, }}, {trace.EvGoBlockSelect, []frame{ {"runtime.selectgo", 0}, - {"runtime/trace_test.TestTraceSymbolize.func6", 59}, + {"runtime/trace_test.TestTraceSymbolize.func6", 0}, }}, {trace.EvGoUnblock, []frame{ {"runtime.selectgo", 0}, - {"runtime/trace_test.TestTraceSymbolize", 112}, + {"runtime/trace_test.TestTraceSymbolize", 115}, {"testing.tRunner", 0}, }}, {trace.EvGoBlockSync, []frame{ {"sync.(*Mutex).Lock", 0}, - {"runtime/trace_test.TestTraceSymbolize.func7", 67}, + {"runtime/trace_test.TestTraceSymbolize.func7", 0}, }}, {trace.EvGoUnblock, []frame{ {"sync.(*Mutex).Unlock", 0}, - {"runtime/trace_test.TestTraceSymbolize", 116}, + {"runtime/trace_test.TestTraceSymbolize", 0}, {"testing.tRunner", 0}, }}, {trace.EvGoBlockSync, []frame{ {"sync.(*WaitGroup).Wait", 0}, - {"runtime/trace_test.TestTraceSymbolize.func8", 73}, + {"runtime/trace_test.TestTraceSymbolize.func8", 0}, }}, {trace.EvGoUnblock, []frame{ {"sync.(*WaitGroup).Add", 0}, {"sync.(*WaitGroup).Done", 0}, - {"runtime/trace_test.TestTraceSymbolize", 117}, + {"runtime/trace_test.TestTraceSymbolize", 120}, {"testing.tRunner", 0}, }}, {trace.EvGoBlockCond, []frame{ {"sync.(*Cond).Wait", 0}, - {"runtime/trace_test.TestTraceSymbolize.func9", 78}, + {"runtime/trace_test.TestTraceSymbolize.func9", 0}, }}, {trace.EvGoUnblock, []frame{ {"sync.(*Cond).Signal", 0}, - {"runtime/trace_test.TestTraceSymbolize", 118}, + {"runtime/trace_test.TestTraceSymbolize", 0}, {"testing.tRunner", 0}, }}, {trace.EvGoSleep, []frame{ {"time.Sleep", 0}, - {"runtime/trace_test.TestTraceSymbolize", 109}, + {"runtime/trace_test.TestTraceSymbolize", 0}, + {"testing.tRunner", 0}, + }}, + {trace.EvGomaxprocs, []frame{ + {"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged. + {"runtime.GOMAXPROCS", 0}, + {"runtime/trace_test.TestTraceSymbolize", 0}, {"testing.tRunner", 0}, }}, } @@ -235,7 +246,7 @@ func TestTraceSymbolize(t *testing.T) { {"net.(*netFD).accept", 0}, {"net.(*TCPListener).accept", 0}, {"net.(*TCPListener).Accept", 0}, - {"runtime/trace_test.TestTraceSymbolize.func10", 86}, + {"runtime/trace_test.TestTraceSymbolize.func10", 0}, }}, {trace.EvGoSysCall, []frame{ {"syscall.read", 0}, @@ -243,7 +254,7 @@ func TestTraceSymbolize(t *testing.T) { {"internal/poll.(*FD).Read", 0}, {"os.(*File).read", 0}, {"os.(*File).Read", 0}, - {"runtime/trace_test.TestTraceSymbolize.func11", 102}, + {"runtime/trace_test.TestTraceSymbolize.func11", 0}, }}, }...) } @@ -264,22 +275,57 @@ func TestTraceSymbolize(t *testing.T) { matched[i] = true } } - for i, m := range matched { - if m { + for i, w := range want { + if matched[i] { continue } - w := want[i] - t.Errorf("did not match event %v at %v:%v", trace.EventDescriptions[w.Type].Name, w.Stk[0].Fn, w.Stk[0].Line) - t.Errorf("seen the following events of this type:") - for _, ev := range events { - if ev.Type != w.Type { - continue - } - for _, f := range ev.Stk { - t.Logf(" %v :: %s:%v", f.Fn, f.File, f.Line) + seen, n := dumpEventStacks(w.Type, events) + t.Errorf("Did not match event %v with stack\n%s\nSeen %d events of the type\n%s", + trace.EventDescriptions[w.Type].Name, dumpFrames(w.Stk), n, seen) + } +} + +func skipTraceSymbolizeTestIfNecessary(t *testing.T) { + testenv.MustHaveGoBuild(t) + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } +} + +func dumpEventStacks(typ byte, events []*trace.Event) ([]byte, int) { + matched := 0 + o := new(bytes.Buffer) + tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0) + for _, ev := range events { + if ev.Type != typ { + continue + } + matched++ + fmt.Fprintf(tw, "Offset %d\n", ev.Off) + for _, f := range ev.Stk { + fname := f.File + if idx := strings.Index(fname, "/go/src/"); idx > 0 { + fname = fname[idx:] } - t.Logf("---") + fmt.Fprintf(tw, " %v\t%s:%d\n", f.Fn, fname, f.Line) } - t.Logf("======") } + tw.Flush() + return o.Bytes(), matched +} + +type frame struct { + Fn string + Line int +} + +func dumpFrames(frames []frame) []byte { + o := new(bytes.Buffer) + tw := tabwriter.NewWriter(o, 0, 8, 0, '\t', 0) + + for _, f := range frames { + fmt.Fprintf(tw, " %v\t :%d\n", f.Fn, f.Line) + } + tw.Flush() + return o.Bytes() } diff --git a/libgo/go/runtime/trace/trace_test.go b/libgo/go/runtime/trace/trace_test.go index 997d486..fc81abc 100644 --- a/libgo/go/runtime/trace/trace_test.go +++ b/libgo/go/runtime/trace/trace_test.go @@ -31,6 +31,9 @@ func TestEventBatch(t *testing.T) { if race.Enabled { t.Skip("skipping in race mode") } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } if testing.Short() { t.Skip("skipping in short mode") } @@ -81,6 +84,9 @@ func TestEventBatch(t *testing.T) { } func TestTraceStartStop(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } buf := new(bytes.Buffer) if err := Start(buf); err != nil { t.Fatalf("failed to start tracing: %v", err) @@ -98,6 +104,9 @@ func TestTraceStartStop(t *testing.T) { } func TestTraceDoubleStart(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } Stop() buf := new(bytes.Buffer) if err := Start(buf); err != nil { @@ -111,6 +120,9 @@ func TestTraceDoubleStart(t *testing.T) { } func TestTrace(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } buf := new(bytes.Buffer) if err := Start(buf); err != nil { t.Fatalf("failed to start tracing: %v", err) @@ -168,6 +180,12 @@ func testBrokenTimestamps(t *testing.T, data []byte) { } func TestTraceStress(t *testing.T) { + if runtime.GOOS == "js" { + t.Skip("no os.Pipe on js") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } var wg sync.WaitGroup done := make(chan bool) @@ -307,6 +325,12 @@ func TestTraceStress(t *testing.T) { // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. // And concurrently with all that start/stop trace 3 times. func TestTraceStressStartStop(t *testing.T) { + if runtime.GOOS == "js" { + t.Skip("no os.Pipe on js") + } + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) outerDone := make(chan bool) @@ -454,6 +478,9 @@ func TestTraceStressStartStop(t *testing.T) { } func TestTraceFutileWakeup(t *testing.T) { + if IsEnabled() { + t.Skip("skipping because -test.trace is set") + } buf := new(bytes.Buffer) if err := Start(buf); err != nil { t.Fatalf("failed to start tracing: %v", err) diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go index 9456b1f..e97071e 100644 --- a/libgo/go/runtime/traceback_gccgo.go +++ b/libgo/go/runtime/traceback_gccgo.go @@ -141,8 +141,8 @@ func goroutineheader(gp *g) { } // Override. - if gpstatus == _Gwaiting && gp.waitreason != "" { - status = gp.waitreason + if gpstatus == _Gwaiting && gp.waitreason != waitReasonZero { + status = gp.waitreason.String() } // approx time the G is blocked, in minutes diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go index 3c08f7e..8fd38c3 100644 --- a/libgo/go/runtime/type.go +++ b/libgo/go/runtime/type.go @@ -20,12 +20,30 @@ type _type struct { hashfn func(unsafe.Pointer, uintptr) uintptr equalfn func(unsafe.Pointer, unsafe.Pointer) bool - gcdata *byte - string *string + gcdata *byte + _string *string *uncommontype ptrToThis *_type } +func (t *_type) string() string { + return *t._string +} + +// pkgpath returns the path of the package where t was defined, if +// available. This is not the same as the reflect package's PkgPath +// method, in that it returns the package path for struct and interface +// types, not just named types. +func (t *_type) pkgpath() string { + if u := t.uncommontype; u != nil { + if u.pkgPath == nil { + return "" + } + return *u.pkgPath + } + return "" +} + // Return whether two type descriptors are equal. // This is gccgo-specific, as gccgo, unlike gc, permits multiple // independent descriptors for a single type. @@ -38,7 +56,7 @@ func eqtype(t1, t2 *_type) bool { case t1.kind != t2.kind || t1.hash != t2.hash: return false default: - return *t1.string == *t2.string + return t1.string() == t2.string() } } diff --git a/libgo/go/runtime/unaligned1.go b/libgo/go/runtime/unaligned1.go index 2f5b63a..86e0df0 100644 --- a/libgo/go/runtime/unaligned1.go +++ b/libgo/go/runtime/unaligned1.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be riscv64 +// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x wasm ppc s390 arm64be riscv64 package runtime diff --git a/libgo/go/runtime/utf8.go b/libgo/go/runtime/utf8.go index e845451..0ba0dad 100644 --- a/libgo/go/runtime/utf8.go +++ b/libgo/go/runtime/utf8.go @@ -46,6 +46,15 @@ const ( hicb = 0xBF // 1011 1111 ) +// countrunes returns the number of runes in s. +func countrunes(s string) int { + n := 0 + for range s { + n++ + } + return n +} + // decoderune returns the non-ASCII rune at the start of // s[k:] and the index after the rune in s. // |