aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/runtime
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2020-01-02 15:05:27 -0800
committerIan Lance Taylor <iant@golang.org>2020-01-21 23:53:22 -0800
commit5a8ea165926cb0737ab03bc48c18dc5198ab5305 (patch)
tree962dc3357c57f019f85658f99e2e753e30201c27 /libgo/go/runtime
parent6ac6529e155c9baa0aaaed7aca06bd38ebda5b43 (diff)
downloadgcc-5a8ea165926cb0737ab03bc48c18dc5198ab5305.zip
gcc-5a8ea165926cb0737ab03bc48c18dc5198ab5305.tar.gz
gcc-5a8ea165926cb0737ab03bc48c18dc5198ab5305.tar.bz2
libgo: update to Go1.14beta1
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/214297
Diffstat (limited to 'libgo/go/runtime')
-rw-r--r--libgo/go/runtime/alg.go4
-rw-r--r--libgo/go/runtime/callers_test.go278
-rw-r--r--libgo/go/runtime/cgocall.go22
-rw-r--r--libgo/go/runtime/cgocheck.go2
-rw-r--r--libgo/go/runtime/chan.go90
-rw-r--r--libgo/go/runtime/chan_test.go19
-rw-r--r--libgo/go/runtime/checkptr.go106
-rw-r--r--libgo/go/runtime/crash_nonunix_test.go2
-rw-r--r--libgo/go/runtime/crash_test.go133
-rw-r--r--libgo/go/runtime/crash_unix_test.go100
-rw-r--r--libgo/go/runtime/debug.go4
-rw-r--r--libgo/go/runtime/debug/heapdump_test.go4
-rw-r--r--libgo/go/runtime/debug/mod.go2
-rw-r--r--libgo/go/runtime/debug_test.go12
-rw-r--r--libgo/go/runtime/debuglog.go2
-rw-r--r--libgo/go/runtime/defer_test.go283
-rw-r--r--libgo/go/runtime/env_posix.go2
-rw-r--r--libgo/go/runtime/error.go2
-rw-r--r--libgo/go/runtime/export_debug_test.go11
-rw-r--r--libgo/go/runtime/export_linux_test.go3
-rw-r--r--libgo/go/runtime/export_mmap_test.go2
-rw-r--r--libgo/go/runtime/export_test.go535
-rw-r--r--libgo/go/runtime/export_unix_test.go75
-rw-r--r--libgo/go/runtime/extern.go7
-rw-r--r--libgo/go/runtime/gc_test.go25
-rw-r--r--libgo/go/runtime/gcinfo_test.go8
-rw-r--r--libgo/go/runtime/hash64.go3
-rw-r--r--libgo/go/runtime/heapdump.go10
-rw-r--r--libgo/go/runtime/internal/atomic/atomic.c20
-rw-r--r--libgo/go/runtime/internal/atomic/atomic_test.go127
-rw-r--r--libgo/go/runtime/internal/atomic/bench_test.go40
-rw-r--r--libgo/go/runtime/internal/atomic/gccgo.go6
-rw-r--r--libgo/go/runtime/internal/sys/intrinsics.go19
-rw-r--r--libgo/go/runtime/internal/sys/intrinsics_common.go143
-rw-r--r--libgo/go/runtime/lfstack_32bit.go2
-rw-r--r--libgo/go/runtime/libfuzzer.go75
-rw-r--r--libgo/go/runtime/lock_futex.go2
-rw-r--r--libgo/go/runtime/lock_js.go91
-rw-r--r--libgo/go/runtime/lock_sema.go4
-rw-r--r--libgo/go/runtime/malloc.go76
-rw-r--r--libgo/go/runtime/malloc_test.go21
-rw-r--r--libgo/go/runtime/map.go2
-rw-r--r--libgo/go/runtime/map_benchmark_test.go2
-rw-r--r--libgo/go/runtime/mbitmap.go93
-rw-r--r--libgo/go/runtime/mcentral.go4
-rw-r--r--libgo/go/runtime/mgc.go90
-rw-r--r--libgo/go/runtime/mgc_gccgo.go31
-rw-r--r--libgo/go/runtime/mgclarge.go657
-rw-r--r--libgo/go/runtime/mgcmark.go121
-rw-r--r--libgo/go/runtime/mgcscavenge.go772
-rw-r--r--libgo/go/runtime/mgcscavenge_test.go419
-rw-r--r--libgo/go/runtime/mgcsweep.go14
-rw-r--r--libgo/go/runtime/mgcsweepbuf.go15
-rw-r--r--libgo/go/runtime/mgcwork.go10
-rw-r--r--libgo/go/runtime/mheap.go1091
-rw-r--r--libgo/go/runtime/mkpreempt.go522
-rw-r--r--libgo/go/runtime/mpagealloc.go938
-rw-r--r--libgo/go/runtime/mpagealloc_32bit.go116
-rw-r--r--libgo/go/runtime/mpagealloc_64bit.go180
-rw-r--r--libgo/go/runtime/mpagealloc_test.go921
-rw-r--r--libgo/go/runtime/mpagecache.go156
-rw-r--r--libgo/go/runtime/mpagecache_test.go364
-rw-r--r--libgo/go/runtime/mpallocbits.go394
-rw-r--r--libgo/go/runtime/mpallocbits_test.go510
-rw-r--r--libgo/go/runtime/mprof.go23
-rw-r--r--libgo/go/runtime/mranges.go147
-rw-r--r--libgo/go/runtime/msize.go2
-rw-r--r--libgo/go/runtime/mstats.go15
-rw-r--r--libgo/go/runtime/nbpipe_pipe.go19
-rw-r--r--libgo/go/runtime/nbpipe_pipe2.go22
-rw-r--r--libgo/go/runtime/nbpipe_test.go102
-rw-r--r--libgo/go/runtime/netpoll.go70
-rw-r--r--libgo/go/runtime/netpoll_aix.go91
-rw-r--r--libgo/go/runtime/netpoll_epoll.go111
-rw-r--r--libgo/go/runtime/netpoll_fake.go15
-rw-r--r--libgo/go/runtime/netpoll_kqueue.go89
-rw-r--r--libgo/go/runtime/netpoll_solaris.go95
-rw-r--r--libgo/go/runtime/netpoll_stub.go30
-rw-r--r--libgo/go/runtime/netpoll_windows.go70
-rw-r--r--libgo/go/runtime/os3_solaris.go7
-rw-r--r--libgo/go/runtime/os_darwin.go18
-rw-r--r--libgo/go/runtime/os_freebsd_arm64.go155
-rw-r--r--libgo/go/runtime/os_gccgo.go42
-rw-r--r--libgo/go/runtime/os_illumos.go102
-rw-r--r--libgo/go/runtime/os_js.go9
-rw-r--r--libgo/go/runtime/os_linux_arm.go8
-rw-r--r--libgo/go/runtime/os_linux_arm64.go9
-rw-r--r--libgo/go/runtime/os_linux_mips64x.go10
-rw-r--r--libgo/go/runtime/os_linux_mipsx.go10
-rw-r--r--libgo/go/runtime/os_netbsd_arm64.go1
-rw-r--r--libgo/go/runtime/os_only_solaris.go18
-rw-r--r--libgo/go/runtime/os_openbsd_arm64.go1
-rw-r--r--libgo/go/runtime/panic.go31
-rw-r--r--libgo/go/runtime/pprof/label.go6
-rw-r--r--libgo/go/runtime/pprof/label_test.go2
-rw-r--r--libgo/go/runtime/pprof/mprof_test.go118
-rw-r--r--libgo/go/runtime/pprof/pprof.go30
-rw-r--r--libgo/go/runtime/pprof/pprof_test.go244
-rw-r--r--libgo/go/runtime/pprof/proto.go297
-rw-r--r--libgo/go/runtime/pprof/proto_test.go17
-rw-r--r--libgo/go/runtime/pprof/protomem.go23
-rw-r--r--libgo/go/runtime/pprof/testdata/README9
-rw-r--r--libgo/go/runtime/pprof/testdata/mappingtest/main.go13
-rwxr-xr-xlibgo/go/runtime/pprof/testdata/test32bin0 -> 528 bytes
-rwxr-xr-xlibgo/go/runtime/pprof/testdata/test32bebin0 -> 520 bytes
-rwxr-xr-xlibgo/go/runtime/pprof/testdata/test64bin0 -> 760 bytes
-rwxr-xr-xlibgo/go/runtime/pprof/testdata/test64bebin0 -> 856 bytes
-rw-r--r--libgo/go/runtime/preempt.go370
-rw-r--r--libgo/go/runtime/proc.go647
-rw-r--r--libgo/go/runtime/proc_test.go50
-rw-r--r--libgo/go/runtime/race0.go2
-rw-r--r--libgo/go/runtime/runtime1.go2
-rw-r--r--libgo/go/runtime/runtime2.go124
-rw-r--r--libgo/go/runtime/runtime_mmap_test.go2
-rw-r--r--libgo/go/runtime/runtime_test.go48
-rw-r--r--libgo/go/runtime/select.go8
-rw-r--r--libgo/go/runtime/sema.go45
-rw-r--r--libgo/go/runtime/sema_test.go97
-rw-r--r--libgo/go/runtime/semasleep_test.go2
-rw-r--r--libgo/go/runtime/signal_sighandler.go176
-rw-r--r--libgo/go/runtime/signal_unix.go305
-rw-r--r--libgo/go/runtime/signal_windows_test.go61
-rw-r--r--libgo/go/runtime/sizeof_test.go2
-rw-r--r--libgo/go/runtime/slice.go2
-rw-r--r--libgo/go/runtime/stack_test.go10
-rw-r--r--libgo/go/runtime/string.go34
-rw-r--r--libgo/go/runtime/string_test.go31
-rw-r--r--libgo/go/runtime/stubs.go9
-rw-r--r--libgo/go/runtime/stubs2.go8
-rw-r--r--libgo/go/runtime/stubs3.go2
-rw-r--r--libgo/go/runtime/symtab.go8
-rw-r--r--libgo/go/runtime/testdata/testfaketime/faketime.go28
-rw-r--r--libgo/go/runtime/testdata/testprog/deadlock.go93
-rw-r--r--libgo/go/runtime/testdata/testprog/gc.go36
-rw-r--r--libgo/go/runtime/testdata/testprog/preempt.go71
-rw-r--r--libgo/go/runtime/testdata/testprog/signal.go2
-rw-r--r--libgo/go/runtime/testdata/testprog/vdso.go55
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/bigstack_windows.c46
-rw-r--r--libgo/go/runtime/testdata/testprogcgo/numgoroutine.go7
-rw-r--r--libgo/go/runtime/testdata/testprognet/signal.go2
-rw-r--r--libgo/go/runtime/testdata/testwinlib/main.c57
-rw-r--r--libgo/go/runtime/testdata/testwinlib/main.go28
-rw-r--r--libgo/go/runtime/time.go1037
-rw-r--r--libgo/go/runtime/time_fake.go100
-rw-r--r--libgo/go/runtime/time_nofake.go31
-rw-r--r--libgo/go/runtime/time_test.go96
-rw-r--r--libgo/go/runtime/timestub2.go3
-rw-r--r--libgo/go/runtime/trace.go30
-rw-r--r--libgo/go/runtime/trace/trace_stack_test.go1
-rw-r--r--libgo/go/runtime/traceback_gccgo.go6
-rw-r--r--libgo/go/runtime/treap_test.go270
151 files changed, 12420 insertions, 3499 deletions
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index e802fdd..101402c 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -281,6 +281,7 @@ func efaceeq(x, y eface) bool {
}
return eq(x.data, y.data)
}
+
func ifaceeq(x, y iface) bool {
xtab := x.tab
if xtab == nil && y.tab == nil {
@@ -463,7 +464,6 @@ var hashkey [4]uintptr
func alginit() {
// Install AES hash algorithms if the instructions needed are present.
if (GOARCH == "386" || GOARCH == "amd64") &&
- GOOS != "nacl" &&
support_aes &&
cpu.X86.HasAES && // AESENC
cpu.X86.HasSSSE3 && // PSHUFB
@@ -488,7 +488,7 @@ func initAlgAES() {
getRandomData(aeskeysched[:])
}
-// Note: These routines perform the read with an native endianness.
+// Note: These routines perform the read with a native endianness.
func readUnaligned32(p unsafe.Pointer) uint32 {
q := (*[4]byte)(p)
if sys.BigEndian {
diff --git a/libgo/go/runtime/callers_test.go b/libgo/go/runtime/callers_test.go
index ad83f99..26a6f3a 100644
--- a/libgo/go/runtime/callers_test.go
+++ b/libgo/go/runtime/callers_test.go
@@ -5,25 +5,26 @@
package runtime_test
import (
+ "reflect"
"runtime"
"strings"
"testing"
)
func f1(pan bool) []uintptr {
- return f2(pan) // line 14
+ return f2(pan) // line 15
}
func f2(pan bool) []uintptr {
- return f3(pan) // line 18
+ return f3(pan) // line 19
}
func f3(pan bool) []uintptr {
if pan {
- panic("f3") // line 23
+ panic("f3") // line 24
}
ret := make([]uintptr, 20)
- return ret[:runtime.Callers(0, ret)] // line 26
+ return ret[:runtime.Callers(0, ret)] // line 27
}
func testCallers(t *testing.T, pcs []uintptr, pan bool) {
@@ -47,16 +48,16 @@ func testCallers(t *testing.T, pcs []uintptr, pan bool) {
var f3Line int
if pan {
- f3Line = 23
+ f3Line = 24
} else {
- f3Line = 26
+ f3Line = 27
}
want := []struct {
name string
line int
}{
- {"f1", 14},
- {"f2", 18},
+ {"f1", 15},
+ {"f2", 19},
{"f3", f3Line},
}
for _, w := range want {
@@ -66,11 +67,38 @@ func testCallers(t *testing.T, pcs []uintptr, pan bool) {
}
}
+func testCallersEqual(t *testing.T, pcs []uintptr, want []string) {
+ got := make([]string, 0, len(want))
+
+ frames := runtime.CallersFrames(pcs)
+ for {
+ frame, more := frames.Next()
+ if !more || len(got) >= len(want) {
+ break
+ }
+ got = append(got, frame.Function)
+ }
+ if !reflect.DeepEqual(want, got) {
+ t.Fatalf("wanted %v, got %v", want, got)
+ }
+}
+
func TestCallers(t *testing.T) {
testCallers(t, f1(false), false)
}
func TestCallersPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersPanic.func1",
+ "runtime.gopanic", "runtime_test.f3", "runtime_test.f2", "runtime_test.f1",
+ "runtime_test.TestCallersPanic"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersPanic..func1",
+ "runtime.gopanic", "runtime_test.f3", "runtime_test.f2", "runtime_test.f1",
+ "runtime_test.TestCallersPanic"}
+ }
+
defer func() {
if r := recover(); r == nil {
t.Fatal("did not panic")
@@ -78,6 +106,240 @@ func TestCallersPanic(t *testing.T) {
pcs := make([]uintptr, 20)
pcs = pcs[:runtime.Callers(0, pcs)]
testCallers(t, pcs, true)
+ testCallersEqual(t, pcs, want)
}()
f1(true)
}
+
+func TestCallersDoublePanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDoublePanic.func1.1",
+ "runtime.gopanic", "runtime_test.TestCallersDoublePanic.func1", "runtime.gopanic", "runtime_test.TestCallersDoublePanic"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersDoublePanic..func2",
+ "runtime.gopanic", "runtime_test.TestCallersDoublePanic..func1", "runtime.gopanic", "runtime_test.TestCallersDoublePanic"}
+ }
+
+ defer func() {
+ defer func() {
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ if recover() == nil {
+ t.Fatal("did not panic")
+ }
+ testCallersEqual(t, pcs, want)
+ }()
+ if recover() == nil {
+ t.Fatal("did not panic")
+ }
+ panic(2)
+ }()
+ panic(1)
+}
+
+// Test that a defer after a successful recovery looks like it is called directly
+// from the function with the defers.
+func TestCallersAfterRecovery(t *testing.T) {
+ want := []string{"runtime.Callers", "runtime_test.TestCallersAfterRecovery.func1", "runtime_test.TestCallersAfterRecovery"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersAfterRecovery..func1", "runtime_test.TestCallersAfterRecovery"}
+ }
+
+ defer func() {
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ defer func() {
+ if recover() == nil {
+ t.Fatal("did not recover from panic")
+ }
+ }()
+ panic(1)
+}
+
+func TestCallersAbortedPanic(t *testing.T) {
+ want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic.func2", "runtime_test.TestCallersAbortedPanic"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic..func2", "runtime_test.TestCallersAbortedPanic"}
+ }
+
+ defer func() {
+ r := recover()
+ if r != nil {
+ t.Fatalf("should be no panic remaining to recover")
+ }
+ }()
+
+ defer func() {
+ // panic2 was aborted/replaced by panic1, so when panic2 was
+ // recovered, there is no remaining panic on the stack.
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ defer func() {
+ r := recover()
+ if r != "panic2" {
+ t.Fatalf("got %v, wanted %v", r, "panic2")
+ }
+ }()
+ defer func() {
+ // panic2 aborts/replaces panic1, because it is a recursive panic
+ // that is not recovered within the defer function called by
+ // panic1 panicking sequence
+ panic("panic2")
+ }()
+ panic("panic1")
+}
+
+func TestCallersAbortedPanic2(t *testing.T) {
+ want := []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic2.func2", "runtime_test.TestCallersAbortedPanic2"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersAbortedPanic2..func2", "runtime_test.TestCallersAbortedPanic2"}
+ }
+ defer func() {
+ r := recover()
+ if r != nil {
+ t.Fatalf("should be no panic remaining to recover")
+ }
+ }()
+ defer func() {
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ func() {
+ defer func() {
+ r := recover()
+ if r != "panic2" {
+ t.Fatalf("got %v, wanted %v", r, "panic2")
+ }
+ }()
+ func() {
+ defer func() {
+ // Again, panic2 aborts/replaces panic1
+ panic("panic2")
+ }()
+ panic("panic1")
+ }()
+ }()
+}
+
+func TestCallersNilPointerPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersNilPointerPanic.func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic",
+ "runtime_test.TestCallersNilPointerPanic"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersNilPointerPanic..func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic",
+ "runtime_test.TestCallersNilPointerPanic"}
+ }
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ var p *int
+ if *p == 3 {
+ t.Fatal("did not see nil pointer panic")
+ }
+}
+
+func TestCallersDivZeroPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack (due to
+ // open-coded defer processing)
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDivZeroPanic.func1",
+ "runtime.gopanic", "runtime.panicdivide",
+ "runtime_test.TestCallersDivZeroPanic"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersDivZeroPanic..func1",
+ "runtime.gopanic", "runtime.panicdivide",
+ "runtime_test.TestCallersDivZeroPanic"}
+ }
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ }()
+ var n int
+ if 5/n == 1 {
+ t.Fatal("did not see divide-by-sizer panic")
+ }
+}
+
+func TestCallersDeferNilFuncPanic(t *testing.T) {
+ // Make sure we don't have any extra frames on the stack. We cut off the check
+ // at runtime.sigpanic, because non-open-coded defers (which may be used in
+ // non-opt or race checker mode) include an extra 'deferreturn' frame (which is
+ // where the nil pointer deref happens).
+ state := 1
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanic.func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanic..func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic"}
+ }
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ if state == 1 {
+ t.Fatal("nil defer func panicked at defer time rather than function exit time")
+ }
+
+ }()
+ var f func()
+ defer f()
+ // Use the value of 'state' to make sure nil defer func f causes panic at
+ // function exit, rather than at the defer statement.
+ state = 2
+}
+
+// Same test, but forcing non-open-coded defer by putting the defer in a loop. See
+// issue #36050
+func TestCallersDeferNilFuncPanicWithLoop(t *testing.T) {
+ state := 1
+ want := []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanicWithLoop.func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic", "runtime.deferreturn", "runtime_test.TestCallersDeferNilFuncPanicWithLoop"}
+ if runtime.Compiler == "gccgo" {
+ want = []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanicWithLoop..func1",
+ "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic", "runtime_test.TestCallersDeferNilFuncPanicWithLoop"}
+ }
+
+ defer func() {
+ if r := recover(); r == nil {
+ t.Fatal("did not panic")
+ }
+ pcs := make([]uintptr, 20)
+ pcs = pcs[:runtime.Callers(0, pcs)]
+ testCallersEqual(t, pcs, want)
+ if state == 1 {
+ t.Fatal("nil defer func panicked at defer time rather than function exit time")
+ }
+
+ }()
+
+ for i := 0; i < 1; i++ {
+ var f func()
+ defer f()
+ }
+ // Use the value of 'state' to make sure nil defer func f causes panic at
+ // function exit, rather than at the defer statement.
+ state = 2
+}
diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go
index 587001c..efd0e24 100644
--- a/libgo/go/runtime/cgocall.go
+++ b/libgo/go/runtime/cgocall.go
@@ -47,24 +47,24 @@ import (
// cgoCheckPointer checks if the argument contains a Go pointer that
// points to a Go pointer, and panics if it does.
-func cgoCheckPointer(ptr interface{}, args ...interface{}) {
+func cgoCheckPointer(ptr interface{}, arg interface{}) {
if debug.cgocheck == 0 {
return
}
- ep := (*eface)(unsafe.Pointer(&ptr))
+ ep := efaceOf(&ptr)
t := ep._type
top := true
- if len(args) > 0 && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) {
+ if arg != nil && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) {
p := ep.data
if t.kind&kindDirectIface == 0 {
p = *(*unsafe.Pointer)(p)
}
- if !cgoIsGoPointer(p) {
+ if p == nil || !cgoIsGoPointer(p) {
return
}
- aep := (*eface)(unsafe.Pointer(&args[0]))
+ aep := efaceOf(&arg)
switch aep._type.kind & kindMask {
case kindBool:
if t.kind&kindMask == kindUnsafePointer {
@@ -101,7 +101,7 @@ const cgoResultFail = "cgo result has Go pointer"
// depending on indir. The top parameter is whether we are at the top
// level, where Go pointers are allowed.
func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
- if t.ptrdata == 0 {
+ if t.ptrdata == 0 || p == nil {
// If the type has no pointers there is nothing to do.
return
}
@@ -158,7 +158,7 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
st := (*slicetype)(unsafe.Pointer(t))
s := (*slice)(p)
p = s.array
- if !cgoIsGoPointer(p) {
+ if p == nil || !cgoIsGoPointer(p) {
return
}
if !top {
@@ -189,11 +189,17 @@ func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) {
return
}
for _, f := range st.fields {
+ if f.typ.ptrdata == 0 {
+ continue
+ }
cgoCheckArg(f.typ, add(p, f.offset()), true, top, msg)
}
case kindPtr, kindUnsafePointer:
if indir {
p = *(*unsafe.Pointer)(p)
+ if p == nil {
+ return
+ }
}
if !cgoIsGoPointer(p) {
@@ -298,7 +304,7 @@ func cgoCheckResult(val interface{}) {
return
}
- ep := (*eface)(unsafe.Pointer(&val))
+ ep := efaceOf(&val)
t := ep._type
cgoCheckArg(t, ep.data, t.kind&kindDirectIface == 0, false, cgoResultFail)
}
diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go
index 130db29..c03bafe 100644
--- a/libgo/go/runtime/cgocheck.go
+++ b/libgo/go/runtime/cgocheck.go
@@ -134,7 +134,7 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) {
}
s := spanOfUnchecked(uintptr(src))
- if s.state == mSpanManual {
+ if s.state.get() == mSpanManual {
// There are no heap bits for value stored on the stack.
// For a channel receive src might be on the stack of some
// other goroutine, so we can't unwind the stack even if
diff --git a/libgo/go/runtime/chan.go b/libgo/go/runtime/chan.go
index 291fe00..549e566 100644
--- a/libgo/go/runtime/chan.go
+++ b/libgo/go/runtime/chan.go
@@ -133,6 +133,21 @@ func chanbuf(c *hchan, i uint) unsafe.Pointer {
return add(c.buf, uintptr(i)*uintptr(c.elemsize))
}
+// full reports whether a send on c would block (that is, the channel is full).
+// It uses a single word-sized read of mutable state, so although
+// the answer is instantaneously true, the correct answer may have changed
+// by the time the calling function receives the return value.
+func full(c *hchan) bool {
+ // c.dataqsiz is immutable (never written after the channel is created)
+ // so it is safe to read at any time during channel operation.
+ if c.dataqsiz == 0 {
+ // Assumes that a pointer read is relaxed-atomic.
+ return c.recvq.first == nil
+ }
+ // Assumes that a uint read is relaxed-atomic.
+ return c.qcount == c.dataqsiz
+}
+
// entry point for c <- x from compiled code
//go:nosplit
func chansend1(c *hchan, elem unsafe.Pointer) {
@@ -177,7 +192,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
//
// After observing that the channel is not closed, we observe that the channel is
// not ready for sending. Each of these observations is a single word-sized read
- // (first c.closed and second c.recvq.first or c.qcount depending on kind of channel).
+ // (first c.closed and second full()).
// Because a closed channel cannot transition from 'ready for sending' to
// 'not ready for sending', even if the channel is closed between the two observations,
// they imply a moment between the two when the channel was both not yet closed
@@ -186,9 +201,10 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
//
// It is okay if the reads are reordered here: if we observe that the channel is not
// ready for sending and then observe that it is not closed, that implies that the
- // channel wasn't closed during the first observation.
- if !block && c.closed == 0 && ((c.dataqsiz == 0 && c.recvq.first == nil) ||
- (c.dataqsiz > 0 && c.qcount == c.dataqsiz)) {
+ // channel wasn't closed during the first observation. However, nothing here
+ // guarantees forward progress. We rely on the side effects of lock release in
+ // chanrecv() and closechan() to update this thread's view of c.closed and full().
+ if !block && c.closed == 0 && full(c) {
return false
}
@@ -250,7 +266,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
gp.waiting = mysg
gp.param = nil
c.sendq.enqueue(mysg)
- goparkunlock(&c.lock, waitReasonChanSend, traceEvGoBlockSend, 3)
+ gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
// Ensure the value being sent is kept alive until the
// receiver copies it out. The sudog has a pointer to the
// stack object, but sudogs aren't considered as roots of the
@@ -262,6 +278,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
throw("G waiting list is corrupted")
}
gp.waiting = nil
+ gp.activeStackChans = false
if gp.param == nil {
if c.closed == 0 {
throw("chansend: spurious wakeup")
@@ -417,6 +434,16 @@ func closechan(c *hchan) {
}
}
+// empty reports whether a read from c would block (that is, the channel is
+// empty). It uses a single atomic read of mutable state.
+func empty(c *hchan) bool {
+ // c.dataqsiz is immutable.
+ if c.dataqsiz == 0 {
+ return atomic.Loadp(unsafe.Pointer(&c.sendq.first)) == nil
+ }
+ return atomic.Loaduint(&c.qcount) == 0
+}
+
// entry points for <- c from compiled code
//go:nosplit
func chanrecv1(c *hchan, elem unsafe.Pointer) {
@@ -457,21 +484,33 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
}
// Fast path: check for failed non-blocking operation without acquiring the lock.
- //
- // After observing that the channel is not ready for receiving, we observe that the
- // channel is not closed. Each of these observations is a single word-sized read
- // (first c.sendq.first or c.qcount, and second c.closed).
- // Because a channel cannot be reopened, the later observation of the channel
- // being not closed implies that it was also not closed at the moment of the
- // first observation. We behave as if we observed the channel at that moment
- // and report that the receive cannot proceed.
- //
- // The order of operations is important here: reversing the operations can lead to
- // incorrect behavior when racing with a close.
- if !block && (c.dataqsiz == 0 && c.sendq.first == nil ||
- c.dataqsiz > 0 && atomic.Loaduint(&c.qcount) == 0) &&
- atomic.Load(&c.closed) == 0 {
- return
+ if !block && empty(c) {
+ // After observing that the channel is not ready for receiving, we observe whether the
+ // channel is closed.
+ //
+ // Reordering of these checks could lead to incorrect behavior when racing with a close.
+ // For example, if the channel was open and not empty, was closed, and then drained,
+ // reordered reads could incorrectly indicate "open and empty". To prevent reordering,
+ // we use atomic loads for both checks, and rely on emptying and closing to happen in
+ // separate critical sections under the same lock. This assumption fails when closing
+ // an unbuffered channel with a blocked send, but that is an error condition anyway.
+ if atomic.Load(&c.closed) == 0 {
+ // Because a channel cannot be reopened, the later observation of the channel
+ // being not closed implies that it was also not closed at the moment of the
+ // first observation. We behave as if we observed the channel at that moment
+ // and report that the receive cannot proceed.
+ return
+ }
+ // The channel is irreversibly closed. Re-check whether the channel has any pending data
+ // to receive, which could have arrived between the empty and closed checks above.
+ // Sequential consistency is also required here, when racing with such a send.
+ if empty(c) {
+ // The channel is irreversibly closed and empty.
+ if ep != nil {
+ typedmemclr(c.elemtype, ep)
+ }
+ return true, false
+ }
}
var t0 int64
@@ -543,13 +582,14 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
- goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3)
+ gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
// someone woke us up
if mysg != gp.waiting {
throw("G waiting list is corrupted")
}
gp.waiting = nil
+ gp.activeStackChans = false
if mysg.releasetime > 0 {
blockevent(mysg.releasetime-t0, 2)
}
@@ -616,6 +656,14 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
goready(gp, skip+1)
}
+func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
+ // There are unlocked sudogs that point into gp's stack. Stack
+ // copying must lock the channels of those sudogs.
+ gp.activeStackChans = true
+ unlock((*mutex)(chanLock))
+ return true
+}
+
// compiler implements
//
// select {
diff --git a/libgo/go/runtime/chan_test.go b/libgo/go/runtime/chan_test.go
index 29fb321..ac81d40 100644
--- a/libgo/go/runtime/chan_test.go
+++ b/libgo/go/runtime/chan_test.go
@@ -485,11 +485,11 @@ func TestSelectFairness(t *testing.T) {
// If the select in the goroutine is fair,
// cnt1 and cnt2 should be about the same value.
// With 10,000 trials, the expected margin of error at
- // a confidence level of five nines is 4.4172 / (2 * Sqrt(10000)).
+ // a confidence level of six nines is 4.891676 / (2 * Sqrt(10000)).
r := float64(cnt1) / trials
e := math.Abs(r - 0.5)
t.Log(cnt1, cnt2, r, e)
- if e > 4.4172/(2*math.Sqrt(trials)) {
+ if e > 4.891676/(2*math.Sqrt(trials)) {
t.Errorf("unfair select: in %d trials, results were %d, %d", trials, cnt1, cnt2)
}
close(done)
@@ -724,6 +724,7 @@ func TestSelectStackAdjust(t *testing.T) {
if after.NumGC-before.NumGC >= 2 {
goto done
}
+ runtime.Gosched()
}
t.Fatal("failed to trigger concurrent GC")
done:
@@ -1131,6 +1132,20 @@ func BenchmarkChanPopular(b *testing.B) {
wg.Wait()
}
+func BenchmarkChanClosed(b *testing.B) {
+ c := make(chan struct{})
+ close(c)
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ select {
+ case <-c:
+ default:
+ b.Error("Unreachable")
+ }
+ }
+ })
+}
+
var (
alwaysFalse = false
workSink = 0
diff --git a/libgo/go/runtime/checkptr.go b/libgo/go/runtime/checkptr.go
new file mode 100644
index 0000000..f478ddd
--- /dev/null
+++ b/libgo/go/runtime/checkptr.go
@@ -0,0 +1,106 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package runtime
+
+import "unsafe"
+
+type ptrAlignError struct {
+ ptr unsafe.Pointer
+ elem *_type
+ n uintptr
+}
+
+func (e ptrAlignError) RuntimeError() {}
+
+func (e ptrAlignError) Error() string {
+ return "runtime error: unsafe pointer conversion"
+}
+
+func checkptrAlignment(p unsafe.Pointer, elem *_type, n uintptr) {
+ // Check that (*[n]elem)(p) is appropriately aligned.
+ // TODO(mdempsky): What about fieldAlign?
+ if uintptr(p)&(uintptr(elem.align)-1) != 0 {
+ panic(ptrAlignError{p, elem, n})
+ }
+
+ // Check that (*[n]elem)(p) doesn't straddle multiple heap objects.
+ if size := n * elem.size; size > 1 && checkptrBase(p) != checkptrBase(add(p, size-1)) {
+ panic(ptrAlignError{p, elem, n})
+ }
+}
+
+type ptrArithError struct {
+ ptr unsafe.Pointer
+ originals []unsafe.Pointer
+}
+
+func (e ptrArithError) RuntimeError() {}
+
+func (e ptrArithError) Error() string {
+ return "runtime error: unsafe pointer arithmetic"
+}
+
+func checkptrArithmetic(p unsafe.Pointer, originals []unsafe.Pointer) {
+ if 0 < uintptr(p) && uintptr(p) < minLegalPointer {
+ panic(ptrArithError{p, originals})
+ }
+
+ // Check that if the computed pointer p points into a heap
+ // object, then one of the original pointers must have pointed
+ // into the same object.
+ base := checkptrBase(p)
+ if base == 0 {
+ return
+ }
+
+ for _, original := range originals {
+ if base == checkptrBase(original) {
+ return
+ }
+ }
+
+ panic(ptrArithError{p, originals})
+}
+
+// checkptrBase returns the base address for the allocation containing
+// the address p.
+//
+// Importantly, if p1 and p2 point into the same variable, then
+// checkptrBase(p1) == checkptrBase(p2). However, the converse/inverse
+// is not necessarily true as allocations can have trailing padding,
+// and multiple variables may be packed into a single allocation.
+func checkptrBase(p unsafe.Pointer) uintptr {
+ // stack
+ if gp := getg(); gp.stack.lo <= uintptr(p) && uintptr(p) < gp.stack.hi {
+ // TODO(mdempsky): Walk the stack to identify the
+ // specific stack frame or even stack object that p
+ // points into.
+ //
+ // In the mean time, use "1" as a pseudo-address to
+ // represent the stack. This is an invalid address on
+ // all platforms, so it's guaranteed to be distinct
+ // from any of the addresses we might return below.
+ return 1
+ }
+
+ // heap (must check after stack because of #35068)
+ if base, _, _ := findObject(uintptr(p), 0, 0); base != 0 {
+ return base
+ }
+
+ // data or bss
+ for _, datap := range activeModules() {
+ if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
+ return datap.data
+ }
+ if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
+ return datap.bss
+ }
+ }
+
+ return 0
+}
diff --git a/libgo/go/runtime/crash_nonunix_test.go b/libgo/go/runtime/crash_nonunix_test.go
index bf349a5..06c197e 100644
--- a/libgo/go/runtime/crash_nonunix_test.go
+++ b/libgo/go/runtime/crash_nonunix_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build windows plan9 nacl js,wasm
+// +build windows plan9 js,wasm
package runtime_test
diff --git a/libgo/go/runtime/crash_test.go b/libgo/go/runtime/crash_test.go
index 6343e8e..6268f2e 100644
--- a/libgo/go/runtime/crash_test.go
+++ b/libgo/go/runtime/crash_test.go
@@ -104,8 +104,6 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error)
t.Skip("-quick")
}
- checkStaleRuntime(t)
-
testprog.Lock()
defer testprog.Unlock()
if testprog.dir == "" {
@@ -143,34 +141,12 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error)
return exe, nil
}
-var (
- staleRuntimeOnce sync.Once // guards init of staleRuntimeErr
- staleRuntimeErr error
-)
-
-func checkStaleRuntime(t *testing.T) {
- staleRuntimeOnce.Do(func() {
- if runtime.Compiler == "gccgo" {
- return
- }
- // 'go run' uses the installed copy of runtime.a, which may be out of date.
- out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.Stale}}", "runtime")).CombinedOutput()
- if err != nil {
- staleRuntimeErr = fmt.Errorf("failed to execute 'go list': %v\n%v", err, string(out))
- return
- }
- if string(out) != "false\n" {
- t.Logf("go list -f {{.Stale}} runtime:\n%s", out)
- out, err := testenv.CleanCmdEnv(exec.Command(testenv.GoToolPath(t), "list", "-gcflags=all="+os.Getenv("GO_GCFLAGS"), "-f", "{{.StaleReason}}", "runtime")).CombinedOutput()
- if err != nil {
- t.Logf("go list -f {{.StaleReason}} failed: %v", err)
- }
- t.Logf("go list -f {{.StaleReason}} runtime:\n%s", out)
- staleRuntimeErr = fmt.Errorf("Stale runtime.a. Run 'go install runtime'.")
- }
- })
- if staleRuntimeErr != nil {
- t.Fatal(staleRuntimeErr)
+func TestVDSO(t *testing.T) {
+ t.Parallel()
+ output := runTestProg(t, "testprog", "SignalInVDSO")
+ want := "success\n"
+ if output != want {
+ t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
}
}
@@ -231,9 +207,23 @@ func TestStackOverflow(t *testing.T) {
t.Skip("gccgo does not do stack overflow checking")
}
output := runTestProg(t, "testprog", "StackOverflow")
- want := "runtime: goroutine stack exceeds 1474560-byte limit\nfatal error: stack overflow"
- if !strings.HasPrefix(output, want) {
- t.Fatalf("output does not start with %q:\n%s", want, output)
+ want := []string{
+ "runtime: goroutine stack exceeds 1474560-byte limit\n",
+ "fatal error: stack overflow",
+ // information about the current SP and stack bounds
+ "runtime: sp=",
+ "stack=[",
+ }
+ if !strings.HasPrefix(output, want[0]) {
+ t.Errorf("output does not start with %q", want[0])
+ }
+ for _, s := range want[1:] {
+ if !strings.Contains(output, s) {
+ t.Errorf("output does not contain %q", s)
+ }
+ }
+ if t.Failed() {
+ t.Logf("output:\n%s", output)
}
}
@@ -257,6 +247,41 @@ panic: again
}
+func TestRecursivePanic2(t *testing.T) {
+ output := runTestProg(t, "testprog", "RecursivePanic2")
+ want := `first panic
+second panic
+panic: third panic
+
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+
+}
+
+func TestRecursivePanic3(t *testing.T) {
+ output := runTestProg(t, "testprog", "RecursivePanic3")
+ want := `panic: first panic
+
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+
+}
+
+func TestRecursivePanic4(t *testing.T) {
+ output := runTestProg(t, "testprog", "RecursivePanic4")
+ want := `panic: first panic [recovered]
+ panic: second panic
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+
+}
+
func TestGoexitCrash(t *testing.T) {
output := runTestProg(t, "testprog", "GoexitExit")
want := "no goroutines (main called runtime.Goexit) - deadlock!"
@@ -389,26 +414,32 @@ func TestRecoveredPanicAfterGoexit(t *testing.T) {
}
func TestRecoverBeforePanicAfterGoexit(t *testing.T) {
- // 1. defer a function that recovers
- // 2. defer a function that panics
- // 3. call goexit
- // Goexit should run the #2 defer. Its panic
- // should be caught by the #1 defer, and execution
- // should resume in the caller. Like the Goexit
- // never happened!
- defer func() {
- r := recover()
- if r == nil {
- panic("bad recover")
- }
- }()
- defer func() {
- panic("hello")
- }()
- runtime.Goexit()
+ t.Parallel()
+ output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit")
+ want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+}
+
+func TestRecoverBeforePanicAfterGoexit2(t *testing.T) {
+ t.Parallel()
+ output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit2")
+ want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!"
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
}
func TestNetpollDeadlock(t *testing.T) {
+ if os.Getenv("GO_BUILDER_NAME") == "darwin-amd64-10_12" {
+ // A suspected kernel bug in macOS 10.12 occasionally results in
+ // an apparent deadlock when dialing localhost. The errors have not
+ // been observed on newer versions of the OS, so we don't plan to work
+ // around them. See https://golang.org/issue/22019.
+ testenv.SkipFlaky(t, 22019)
+ }
+
t.Parallel()
output := runTestProg(t, "testprognet", "NetpollDeadlock")
want := "done\n"
@@ -420,7 +451,7 @@ func TestNetpollDeadlock(t *testing.T) {
func TestPanicTraceback(t *testing.T) {
t.Parallel()
output := runTestProg(t, "testprog", "PanicTraceback")
- want := "panic: hello"
+ want := "panic: hello\n\tpanic: panic pt2\n\tpanic: panic pt1\n"
if !strings.HasPrefix(output, want) {
t.Fatalf("output does not start with %q:\n%s", want, output)
}
diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go
index b4b015e..7ce5bb2 100644
--- a/libgo/go/runtime/crash_unix_test.go
+++ b/libgo/go/runtime/crash_unix_test.go
@@ -15,9 +15,11 @@ import (
"os/exec"
"path/filepath"
"runtime"
- "strings"
+ "sync"
"syscall"
"testing"
+ "time"
+ "unsafe"
)
// sigquit is the signal to send to kill a hanging testdata program.
@@ -33,6 +35,29 @@ func init() {
}
}
+func TestBadOpen(t *testing.T) {
+ // make sure we get the correct error code if open fails. Same for
+ // read/write/close on the resulting -1 fd. See issue 10052.
+ nonfile := []byte("/notreallyafile")
+ fd := runtime.Open(&nonfile[0], 0, 0)
+ if fd != -1 {
+ t.Errorf("open(%q)=%d, want -1", nonfile, fd)
+ }
+ var buf [32]byte
+ r := runtime.Read(-1, unsafe.Pointer(&buf[0]), int32(len(buf)))
+ if got, want := r, -int32(syscall.EBADF); got != want {
+ t.Errorf("read()=%d, want %d", got, want)
+ }
+ w := runtime.Write(^uintptr(0), unsafe.Pointer(&buf[0]), int32(len(buf)))
+ if got, want := w, -int32(syscall.EBADF); got != want {
+ t.Errorf("write()=%d, want %d", got, want)
+ }
+ c := runtime.Close(-1)
+ if c != -1 {
+ t.Errorf("close()=%d, want -1", c)
+ }
+}
+
func TestCrashDumpsAllThreads(t *testing.T) {
if *flagQuick {
t.Skip("-quick")
@@ -53,8 +78,6 @@ func TestCrashDumpsAllThreads(t *testing.T) {
testenv.MustHaveGoBuild(t)
- checkStaleRuntime(t)
-
t.Parallel()
dir, err := ioutil.TempDir("", "go-build")
@@ -76,18 +99,17 @@ func TestCrashDumpsAllThreads(t *testing.T) {
cmd = exec.Command(filepath.Join(dir, "a.exe"))
cmd = testenv.CleanCmdEnv(cmd)
- cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
-
- // Set GOGC=off. Because of golang.org/issue/10958, the tight
- // loops in the test program are not preemptible. If GC kicks
- // in, it may lock up and prevent main from saying it's ready.
- newEnv := []string{}
- for _, s := range cmd.Env {
- if !strings.HasPrefix(s, "GOGC=") {
- newEnv = append(newEnv, s)
- }
- }
- cmd.Env = append(newEnv, "GOGC=off")
+ cmd.Env = append(cmd.Env,
+ "GOTRACEBACK=crash",
+ // Set GOGC=off. Because of golang.org/issue/10958, the tight
+ // loops in the test program are not preemptible. If GC kicks
+ // in, it may lock up and prevent main from saying it's ready.
+ "GOGC=off",
+ // Set GODEBUG=asyncpreemptoff=1. If a thread is preempted
+ // when it receives SIGQUIT, it won't show the expected
+ // stack trace. See issue 35356.
+ "GODEBUG=asyncpreemptoff=1",
+ )
var outbuf bytes.Buffer
cmd.Stdout = &outbuf
@@ -288,3 +310,51 @@ func TestSignalDuringExec(t *testing.T) {
t.Fatalf("want %s, got %s\n", want, output)
}
}
+
+func TestSignalM(t *testing.T) {
+ if runtime.Compiler == "gccgo" {
+ t.Skip("no signalM for gccgo")
+ }
+
+ r, w, errno := runtime.Pipe()
+ if errno != 0 {
+ t.Fatal(syscall.Errno(errno))
+ }
+ defer func() {
+ runtime.Close(r)
+ runtime.Close(w)
+ }()
+ runtime.Closeonexec(r)
+ runtime.Closeonexec(w)
+
+ var want, got int64
+ var wg sync.WaitGroup
+ ready := make(chan *runtime.M)
+ wg.Add(1)
+ go func() {
+ runtime.LockOSThread()
+ want, got = runtime.WaitForSigusr1(r, w, func(mp *runtime.M) {
+ ready <- mp
+ })
+ runtime.UnlockOSThread()
+ wg.Done()
+ }()
+ waitingM := <-ready
+ runtime.SendSigusr1(waitingM)
+
+ timer := time.AfterFunc(time.Second, func() {
+ // Write 1 to tell WaitForSigusr1 that we timed out.
+ bw := byte(1)
+ if n := runtime.Write(uintptr(w), unsafe.Pointer(&bw), 1); n != 1 {
+ t.Errorf("pipe write failed: %d", n)
+ }
+ })
+ defer timer.Stop()
+
+ wg.Wait()
+ if got == -1 {
+ t.Fatal("signalM signal not received")
+ } else if want != got {
+ t.Fatalf("signal sent to M %d, but received on M %d", want, got)
+ }
+}
diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go
index e480466..1202e36 100644
--- a/libgo/go/runtime/debug.go
+++ b/libgo/go/runtime/debug.go
@@ -26,12 +26,12 @@ func GOMAXPROCS(n int) int {
return ret
}
- stopTheWorld("GOMAXPROCS")
+ stopTheWorldGC("GOMAXPROCS")
// newprocs will be processed by startTheWorld
newprocs = int32(n)
- startTheWorld()
+ startTheWorldGC()
return ret
}
diff --git a/libgo/go/runtime/debug/heapdump_test.go b/libgo/go/runtime/debug/heapdump_test.go
index c986efc..de1ec27 100644
--- a/libgo/go/runtime/debug/heapdump_test.go
+++ b/libgo/go/runtime/debug/heapdump_test.go
@@ -13,7 +13,7 @@ import (
)
func TestWriteHeapDumpNonempty(t *testing.T) {
- if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+ if runtime.GOOS == "js" {
t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
}
f, err := ioutil.TempFile("", "heapdumptest")
@@ -42,7 +42,7 @@ func objfin(x *Obj) {
}
func TestWriteHeapDumpFinalizers(t *testing.T) {
- if runtime.GOOS == "nacl" || runtime.GOOS == "js" {
+ if runtime.GOOS == "js" {
t.Skipf("WriteHeapDump is not available on %s.", runtime.GOOS)
}
f, err := ioutil.TempFile("", "heapdumptest")
diff --git a/libgo/go/runtime/debug/mod.go b/libgo/go/runtime/debug/mod.go
index 58c6ae0..c283928 100644
--- a/libgo/go/runtime/debug/mod.go
+++ b/libgo/go/runtime/debug/mod.go
@@ -23,7 +23,7 @@ func ReadBuildInfo() (info *BuildInfo, ok bool) {
// the running binary.
type BuildInfo struct {
Path string // The main package path
- Main Module // The main module information
+ Main Module // The module containing the main package
Deps []*Module // Module dependencies
}
diff --git a/libgo/go/runtime/debug_test.go b/libgo/go/runtime/debug_test.go
index 12d93de..967477d 100644
--- a/libgo/go/runtime/debug_test.go
+++ b/libgo/go/runtime/debug_test.go
@@ -127,7 +127,7 @@ func TestDebugCall(t *testing.T) {
return x + 1
}
args.x = 42
- if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill, false); err != nil {
t.Fatal(err)
}
if args.yRet != 43 {
@@ -156,7 +156,7 @@ func TestDebugCallLarge(t *testing.T) {
args.in[i] = i
want[i] = i + 1
}
- if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, fn, &args, debugCallTKill, false); err != nil {
t.Fatal(err)
}
if want != args.out {
@@ -169,7 +169,7 @@ func TestDebugCallGC(t *testing.T) {
defer after()
// Inject a call that performs a GC.
- if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, runtime.GC, nil, debugCallTKill, false); err != nil {
t.Fatal(err)
}
}
@@ -180,7 +180,7 @@ func TestDebugCallGrowStack(t *testing.T) {
// Inject a call that grows the stack. debugCallWorker checks
// for stack pointer breakage.
- if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill); err != nil {
+ if _, err := runtime.InjectDebugCall(g, func() { growStack(nil) }, nil, debugCallTKill, false); err != nil {
t.Fatal(err)
}
}
@@ -216,7 +216,7 @@ func TestDebugCallUnsafePoint(t *testing.T) {
runtime.Gosched()
}
- _, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill)
+ _, err := runtime.InjectDebugCall(g, func() {}, nil, debugCallTKill, true)
if msg := "call not at safe point"; err == nil || err.Error() != msg {
t.Fatalf("want %q, got %s", msg, err)
}
@@ -240,7 +240,7 @@ func TestDebugCallPanic(t *testing.T) {
}()
g := <-ready
- p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill)
+ p, err := runtime.InjectDebugCall(g, func() { panic("test") }, nil, debugCallTKill, false)
if err != nil {
t.Fatal(err)
}
diff --git a/libgo/go/runtime/debuglog.go b/libgo/go/runtime/debuglog.go
index 4f4109f..404d057 100644
--- a/libgo/go/runtime/debuglog.go
+++ b/libgo/go/runtime/debuglog.go
@@ -803,7 +803,7 @@ func printDebugLog() {
func printDebugLogPC(pc uintptr) {
print(hex(pc))
- name, file, line, _ := funcfileline(pc, -1)
+ name, file, line, _ := funcfileline(pc, -1, false)
if name == "" {
print(" [unknown PC]")
} else {
diff --git a/libgo/go/runtime/defer_test.go b/libgo/go/runtime/defer_test.go
new file mode 100644
index 0000000..3d8f812
--- /dev/null
+++ b/libgo/go/runtime/defer_test.go
@@ -0,0 +1,283 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "reflect"
+ "runtime"
+ "testing"
+)
+
+// Make sure open-coded defer exit code is not lost, even when there is an
+// unconditional panic (hence no return from the function)
+func TestUnconditionalPanic(t *testing.T) {
+ defer func() {
+ if recover() != "testUnconditional" {
+ t.Fatal("expected unconditional panic")
+ }
+ }()
+ panic("testUnconditional")
+}
+
+var glob int = 3
+
+// Test an open-coded defer and non-open-coded defer - make sure both defers run
+// and call recover()
+func TestOpenAndNonOpenDefers(t *testing.T) {
+ for {
+ // Non-open defer because in a loop
+ defer func(n int) {
+ if recover() != "testNonOpenDefer" {
+ t.Fatal("expected testNonOpen panic")
+ }
+ }(3)
+ if glob > 2 {
+ break
+ }
+ }
+ testOpen(t, 47)
+ panic("testNonOpenDefer")
+}
+
+//go:noinline
+func testOpen(t *testing.T, arg int) {
+ defer func(n int) {
+ if recover() != "testOpenDefer" {
+ t.Fatal("expected testOpen panic")
+ }
+ }(4)
+ if arg > 2 {
+ panic("testOpenDefer")
+ }
+}
+
+// Test a non-open-coded defer and an open-coded defer - make sure both defers run
+// and call recover()
+func TestNonOpenAndOpenDefers(t *testing.T) {
+ testOpen(t, 47)
+ for {
+ // Non-open defer because in a loop
+ defer func(n int) {
+ if recover() != "testNonOpenDefer" {
+ t.Fatal("expected testNonOpen panic")
+ }
+ }(3)
+ if glob > 2 {
+ break
+ }
+ }
+ panic("testNonOpenDefer")
+}
+
+var list []int
+
+// Make sure that conditional open-coded defers are activated correctly and run in
+// the correct order.
+func TestConditionalDefers(t *testing.T) {
+ list = make([]int, 0, 10)
+
+ defer func() {
+ if recover() != "testConditional" {
+ t.Fatal("expected panic")
+ }
+ want := []int{4, 2, 1}
+ if !reflect.DeepEqual(want, list) {
+ t.Fatal(fmt.Sprintf("wanted %v, got %v", want, list))
+ }
+
+ }()
+ testConditionalDefers(8)
+}
+
+func testConditionalDefers(n int) {
+ doappend := func(i int) {
+ list = append(list, i)
+ }
+
+ defer doappend(1)
+ if n > 5 {
+ defer doappend(2)
+ if n > 8 {
+ defer doappend(3)
+ } else {
+ defer doappend(4)
+ }
+ }
+ panic("testConditional")
+}
+
+// Test that there is no compile-time or run-time error if an open-coded defer
+// call is removed by constant propagation and dead-code elimination.
+func TestDisappearingDefer(t *testing.T) {
+ switch runtime.GOOS {
+ case "invalidOS":
+ defer func() {
+ t.Fatal("Defer shouldn't run")
+ }()
+ }
+}
+
+// This tests an extra recursive panic behavior that is only specified in the
+// code. Suppose a first panic P1 happens and starts processing defer calls. If a
+// second panic P2 happens while processing defer call D in frame F, then defer
+// call processing is restarted (with some potentially new defer calls created by
+// D or its callees). If the defer processing reaches the started defer call D
+// again in the defer stack, then the original panic P1 is aborted and cannot
+// continue panic processing or be recovered. If the panic P2 does a recover at
+// some point, it will naturally remove the original panic P1 from the stack
+// (since the original panic had to be in frame F or a descendant of F).
+func TestAbortedPanic(t *testing.T) {
+ defer func() {
+ r := recover()
+ if r != nil {
+ t.Fatal(fmt.Sprintf("wanted nil recover, got %v", r))
+ }
+ }()
+ defer func() {
+ r := recover()
+ if r != "panic2" {
+ t.Fatal(fmt.Sprintf("wanted %v, got %v", "panic2", r))
+ }
+ }()
+ defer func() {
+ panic("panic2")
+ }()
+ panic("panic1")
+}
+
+// This tests that recover() does not succeed unless it is called directly from a
+// defer function that is directly called by the panic. Here, we first call it
+// from a defer function that is created by the defer function called directly by
+// the panic. In
+func TestRecoverMatching(t *testing.T) {
+ defer func() {
+ r := recover()
+ if r != "panic1" {
+ t.Fatal(fmt.Sprintf("wanted %v, got %v", "panic1", r))
+ }
+ }()
+ defer func() {
+ defer func() {
+ // Shouldn't succeed, even though it is called directly
+ // from a defer function, since this defer function was
+ // not directly called by the panic.
+ r := recover()
+ if r != nil {
+ t.Fatal(fmt.Sprintf("wanted nil recover, got %v", r))
+ }
+ }()
+ }()
+ panic("panic1")
+}
+
+type nonSSAable [128]byte
+
+type bigStruct struct {
+ x, y, z, w, p, q int64
+}
+
+type containsBigStruct struct {
+ element bigStruct
+}
+
+func mknonSSAable() nonSSAable {
+ globint1++
+ return nonSSAable{0, 0, 0, 0, 5}
+}
+
+var globint1, globint2, globint3 int
+
+//go:noinline
+func sideeffect(n int64) int64 {
+ globint2++
+ return n
+}
+
+func sideeffect2(in containsBigStruct) containsBigStruct {
+ globint3++
+ return in
+}
+
+// Test that nonSSAable arguments to defer are handled correctly and only evaluated once.
+func TestNonSSAableArgs(t *testing.T) {
+ globint1 = 0
+ globint2 = 0
+ globint3 = 0
+ var save1 byte
+ var save2 int64
+ var save3 int64
+ var save4 int64
+
+ defer func() {
+ if globint1 != 1 {
+ t.Fatal(fmt.Sprintf("globint1: wanted: 1, got %v", globint1))
+ }
+ if save1 != 5 {
+ t.Fatal(fmt.Sprintf("save1: wanted: 5, got %v", save1))
+ }
+ if globint2 != 1 {
+ t.Fatal(fmt.Sprintf("globint2: wanted: 1, got %v", globint2))
+ }
+ if save2 != 2 {
+ t.Fatal(fmt.Sprintf("save2: wanted: 2, got %v", save2))
+ }
+ if save3 != 4 {
+ t.Fatal(fmt.Sprintf("save3: wanted: 4, got %v", save3))
+ }
+ if globint3 != 1 {
+ t.Fatal(fmt.Sprintf("globint3: wanted: 1, got %v", globint3))
+ }
+ if save4 != 4 {
+ t.Fatal(fmt.Sprintf("save1: wanted: 4, got %v", save4))
+ }
+ }()
+
+ // Test function returning a non-SSAable arg
+ defer func(n nonSSAable) {
+ save1 = n[4]
+ }(mknonSSAable())
+ // Test composite literal that is not SSAable
+ defer func(b bigStruct) {
+ save2 = b.y
+ }(bigStruct{1, 2, 3, 4, 5, sideeffect(6)})
+
+ // Test struct field reference that is non-SSAable
+ foo := containsBigStruct{}
+ foo.element.z = 4
+ defer func(element bigStruct) {
+ save3 = element.z
+ }(foo.element)
+ defer func(element bigStruct) {
+ save4 = element.z
+ }(sideeffect2(foo).element)
+}
+
+//go:noinline
+func doPanic() {
+ panic("Test panic")
+}
+
+func TestDeferForFuncWithNoExit(t *testing.T) {
+ cond := 1
+ defer func() {
+ if cond != 2 {
+ t.Fatal(fmt.Sprintf("cond: wanted 2, got %v", cond))
+ }
+ if recover() != "Test panic" {
+ t.Fatal("Didn't find expected panic")
+ }
+ }()
+ x := 0
+ // Force a stack copy, to make sure that the &cond pointer passed to defer
+ // function is properly updated.
+ growStackIter(&x, 1000)
+ cond = 2
+ doPanic()
+
+ // This function has no exit/return, since it ends with an infinite loop
+ for {
+ }
+}
diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go
index c725bd93..bf8996c 100644
--- a/libgo/go/runtime/env_posix.go
+++ b/libgo/go/runtime/env_posix.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd hurd js,wasm linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd hurd js,wasm linux netbsd openbsd solaris windows
package runtime
diff --git a/libgo/go/runtime/error.go b/libgo/go/runtime/error.go
index 0c7f631..6cc46bf 100644
--- a/libgo/go/runtime/error.go
+++ b/libgo/go/runtime/error.go
@@ -129,7 +129,7 @@ func (e plainError) Error() string {
return string(e)
}
-// An boundsError represents a an indexing or slicing operation gone wrong.
+// A boundsError represents an indexing or slicing operation gone wrong.
type boundsError struct {
x int64
y int
diff --git a/libgo/go/runtime/export_debug_test.go b/libgo/go/runtime/export_debug_test.go
index 608d756..769ad55 100644
--- a/libgo/go/runtime/export_debug_test.go
+++ b/libgo/go/runtime/export_debug_test.go
@@ -21,7 +21,7 @@ import (
//
// On success, InjectDebugCall returns the panic value of fn or nil.
// If fn did not panic, its results will be available in args.
-func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (interface{}, error) {
+func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error, returnOnUnsafePoint bool) (interface{}, error) {
if gp.lockedm == 0 {
return nil, plainError("goroutine not locked to thread")
}
@@ -65,9 +65,16 @@ func InjectDebugCall(gp *g, fn, args interface{}, tkill func(tid int) error) (in
notetsleepg(&h.done, -1)
if h.err != "" {
switch h.err {
- case "retry _Grunnable", "executing on Go runtime stack":
+ case "call not at safe point":
+ if returnOnUnsafePoint {
+ // This is for TestDebugCallUnsafePoint.
+ return nil, h.err
+ }
+ fallthrough
+ case "retry _Grunnable", "executing on Go runtime stack", "call from within the Go runtime":
// These are transient states. Try to get out of them.
if i < 100 {
+ usleep(100)
Gosched()
continue
}
diff --git a/libgo/go/runtime/export_linux_test.go b/libgo/go/runtime/export_linux_test.go
index 96ff1c7..1f8e633 100644
--- a/libgo/go/runtime/export_linux_test.go
+++ b/libgo/go/runtime/export_linux_test.go
@@ -10,6 +10,9 @@ import "unsafe"
// var NewOSProc0 = newosproc0
// var Mincore = mincore
+var Add = add
+
+type EpollEvent epollevent
func Epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32 {
return epollctl(epfd, op, fd, (*epollevent)(ev))
diff --git a/libgo/go/runtime/export_mmap_test.go b/libgo/go/runtime/export_mmap_test.go
index 5f3e99a..000948b 100644
--- a/libgo/go/runtime/export_mmap_test.go
+++ b/libgo/go/runtime/export_mmap_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd hurd linux nacl netbsd openbsd solaris
+// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
// Export guts for testing.
diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go
index 10890d3..9a977d8 100644
--- a/libgo/go/runtime/export_test.go
+++ b/libgo/go/runtime/export_test.go
@@ -35,9 +35,18 @@ var Atoi = atoi
var Atoi32 = atoi32
var Nanotime = nanotime
+var NetpollBreak = netpollBreak
+var Usleep = usleep
+var PhysPageSize = physPageSize
var PhysHugePageSize = physHugePageSize
+var NetpollGenericInit = netpollGenericInit
+
+var ParseRelease = parseRelease
+
+const PreemptMSupported = preemptMSupported
+
type LFNode struct {
Next uint64
Pushcnt uintptr
@@ -51,6 +60,12 @@ func LFStackPop(head *uint64) *LFNode {
return (*LFNode)(unsafe.Pointer((*lfstack)(head).pop()))
}
+func Netpoll(delta int64) {
+ systemstack(func() {
+ netpoll(delta)
+ })
+}
+
func GCMask(x interface{}) (ret []byte) {
return nil
}
@@ -241,7 +256,7 @@ func CountPagesInUse() (pagesInUse, counted uintptr) {
pagesInUse = uintptr(mheap_.pagesInUse)
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
counted += s.npages
}
}
@@ -303,7 +318,7 @@ func ReadMemStatsSlow() (base, slow MemStats) {
// Add up current allocations in spans.
for _, s := range mheap_.allspans {
- if s.state != mSpanInUse {
+ if s.state.get() != mSpanInUse {
continue
}
if sizeclass := s.spanclass.sizeclass(); sizeclass == 0 {
@@ -336,10 +351,18 @@ func ReadMemStatsSlow() (base, slow MemStats) {
slow.BySize[i].Frees = bySize[i].Frees
}
- for i := mheap_.free.start(0, 0); i.valid(); i = i.next() {
- slow.HeapReleased += uint64(i.span().released())
+ for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
+ pg := mheap_.pages.chunkOf(i).scavenged.popcntRange(0, pallocChunkPages)
+ slow.HeapReleased += uint64(pg) * pageSize
+ }
+ for _, p := range allp {
+ pg := sys.OnesCount64(p.pcache.scav)
+ slow.HeapReleased += uint64(pg) * pageSize
}
+ // Unused space in the current arena also counts as released space.
+ slow.HeapReleased += uint64(mheap_.curArena.end - mheap_.curArena.base)
+
getg().m.mallocing--
})
@@ -512,200 +535,410 @@ func MapTombstoneCheck(m map[int]int) {
}
}
-// UnscavHugePagesSlow returns the value of mheap_.freeHugePages
-// and the number of unscavenged huge pages calculated by
-// scanning the heap.
-func UnscavHugePagesSlow() (uintptr, uintptr) {
- var base, slow uintptr
- // Run on the system stack to avoid deadlock from stack growth
- // trying to acquire the heap lock.
- systemstack(func() {
- lock(&mheap_.lock)
- base = mheap_.free.unscavHugePages
- for _, s := range mheap_.allspans {
- if s.state == mSpanFree && !s.scavenged {
- slow += s.hugePages()
- }
- }
- unlock(&mheap_.lock)
- })
- return base, slow
-}
+func RunGetgThreadSwitchTest() {
+ // Test that getg works correctly with thread switch.
+ // With gccgo, if we generate getg inlined, the backend
+ // may cache the address of the TLS variable, which
+ // will become invalid after a thread switch. This test
+ // checks that the bad caching doesn't happen.
-// Span is a safe wrapper around an mspan, whose memory
-// is managed manually.
-type Span struct {
- *mspan
+ ch := make(chan int)
+ go func(ch chan int) {
+ ch <- 5
+ LockOSThread()
+ }(ch)
+
+ g1 := getg()
+
+ // Block on a receive. This is likely to get us a thread
+ // switch. If we yield to the sender goroutine, it will
+ // lock the thread, forcing us to resume on a different
+ // thread.
+ <-ch
+
+ g2 := getg()
+ if g1 != g2 {
+ panic("g1 != g2")
+ }
+
+ // Also test getg after some control flow, as the
+ // backend is sensitive to control flow.
+ g3 := getg()
+ if g1 != g3 {
+ panic("g1 != g3")
+ }
}
-func AllocSpan(base, npages uintptr, scavenged bool) Span {
- var s *mspan
- systemstack(func() {
- lock(&mheap_.lock)
- s = (*mspan)(mheap_.spanalloc.alloc())
- unlock(&mheap_.lock)
- })
- s.init(base, npages)
- s.scavenged = scavenged
- return Span{s}
+const (
+ PageSize = pageSize
+ PallocChunkPages = pallocChunkPages
+ PageAlloc64Bit = pageAlloc64Bit
+)
+
+// Expose pallocSum for testing.
+type PallocSum pallocSum
+
+func PackPallocSum(start, max, end uint) PallocSum { return PallocSum(packPallocSum(start, max, end)) }
+func (m PallocSum) Start() uint { return pallocSum(m).start() }
+func (m PallocSum) Max() uint { return pallocSum(m).max() }
+func (m PallocSum) End() uint { return pallocSum(m).end() }
+
+// Expose pallocBits for testing.
+type PallocBits pallocBits
+
+func (b *PallocBits) Find(npages uintptr, searchIdx uint) (uint, uint) {
+ return (*pallocBits)(b).find(npages, searchIdx)
}
+func (b *PallocBits) AllocRange(i, n uint) { (*pallocBits)(b).allocRange(i, n) }
+func (b *PallocBits) Free(i, n uint) { (*pallocBits)(b).free(i, n) }
+func (b *PallocBits) Summarize() PallocSum { return PallocSum((*pallocBits)(b).summarize()) }
+func (b *PallocBits) PopcntRange(i, n uint) uint { return (*pageBits)(b).popcntRange(i, n) }
-func (s *Span) Free() {
- systemstack(func() {
- lock(&mheap_.lock)
- mheap_.spanalloc.free(unsafe.Pointer(s.mspan))
- unlock(&mheap_.lock)
- })
- s.mspan = nil
+// SummarizeSlow is a slow but more obviously correct implementation
+// of (*pallocBits).summarize. Used for testing.
+func SummarizeSlow(b *PallocBits) PallocSum {
+ var start, max, end uint
+
+ const N = uint(len(b)) * 64
+ for start < N && (*pageBits)(b).get(start) == 0 {
+ start++
+ }
+ for end < N && (*pageBits)(b).get(N-end-1) == 0 {
+ end++
+ }
+ run := uint(0)
+ for i := uint(0); i < N; i++ {
+ if (*pageBits)(b).get(i) == 0 {
+ run++
+ } else {
+ run = 0
+ }
+ if run > max {
+ max = run
+ }
+ }
+ return PackPallocSum(start, max, end)
}
-func (s Span) Base() uintptr {
- return s.mspan.base()
+// Expose non-trivial helpers for testing.
+func FindBitRange64(c uint64, n uint) uint { return findBitRange64(c, n) }
+
+// Given two PallocBits, returns a set of bit ranges where
+// they differ.
+func DiffPallocBits(a, b *PallocBits) []BitRange {
+ ba := (*pageBits)(a)
+ bb := (*pageBits)(b)
+
+ var d []BitRange
+ base, size := uint(0), uint(0)
+ for i := uint(0); i < uint(len(ba))*64; i++ {
+ if ba.get(i) != bb.get(i) {
+ if size == 0 {
+ base = i
+ }
+ size++
+ } else {
+ if size != 0 {
+ d = append(d, BitRange{base, size})
+ }
+ size = 0
+ }
+ }
+ if size != 0 {
+ d = append(d, BitRange{base, size})
+ }
+ return d
+}
+
+// StringifyPallocBits gets the bits in the bit range r from b,
+// and returns a string containing the bits as ASCII 0 and 1
+// characters.
+func StringifyPallocBits(b *PallocBits, r BitRange) string {
+ str := ""
+ for j := r.I; j < r.I+r.N; j++ {
+ if (*pageBits)(b).get(j) != 0 {
+ str += "1"
+ } else {
+ str += "0"
+ }
+ }
+ return str
}
-func (s Span) Pages() uintptr {
- return s.mspan.npages
+// Expose pallocData for testing.
+type PallocData pallocData
+
+func (d *PallocData) FindScavengeCandidate(searchIdx uint, min, max uintptr) (uint, uint) {
+ return (*pallocData)(d).findScavengeCandidate(searchIdx, min, max)
+}
+func (d *PallocData) AllocRange(i, n uint) { (*pallocData)(d).allocRange(i, n) }
+func (d *PallocData) ScavengedSetRange(i, n uint) {
+ (*pallocData)(d).scavenged.setRange(i, n)
+}
+func (d *PallocData) PallocBits() *PallocBits {
+ return (*PallocBits)(&(*pallocData)(d).pallocBits)
+}
+func (d *PallocData) Scavenged() *PallocBits {
+ return (*PallocBits)(&(*pallocData)(d).scavenged)
}
-type TreapIterType treapIterType
+// Expose fillAligned for testing.
+func FillAligned(x uint64, m uint) uint64 { return fillAligned(x, m) }
-const (
- TreapIterScav TreapIterType = TreapIterType(treapIterScav)
- TreapIterHuge = TreapIterType(treapIterHuge)
- TreapIterBits = treapIterBits
-)
+// Expose pageCache for testing.
+type PageCache pageCache
-type TreapIterFilter treapIterFilter
+const PageCachePages = pageCachePages
-func TreapFilter(mask, match TreapIterType) TreapIterFilter {
- return TreapIterFilter(treapFilter(treapIterType(mask), treapIterType(match)))
+func NewPageCache(base uintptr, cache, scav uint64) PageCache {
+ return PageCache(pageCache{base: base, cache: cache, scav: scav})
}
-
-func (s Span) MatchesIter(mask, match TreapIterType) bool {
- return treapFilter(treapIterType(mask), treapIterType(match)).matches(s.treapFilter())
+func (c *PageCache) Empty() bool { return (*pageCache)(c).empty() }
+func (c *PageCache) Base() uintptr { return (*pageCache)(c).base }
+func (c *PageCache) Cache() uint64 { return (*pageCache)(c).cache }
+func (c *PageCache) Scav() uint64 { return (*pageCache)(c).scav }
+func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) {
+ return (*pageCache)(c).alloc(npages)
}
-
-type TreapIter struct {
- treapIter
+func (c *PageCache) Flush(s *PageAlloc) {
+ (*pageCache)(c).flush((*pageAlloc)(s))
}
-func (t TreapIter) Span() Span {
- return Span{t.span()}
+// Expose chunk index type.
+type ChunkIdx chunkIdx
+
+// Expose pageAlloc for testing. Note that because pageAlloc is
+// not in the heap, so is PageAlloc.
+type PageAlloc pageAlloc
+
+func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) {
+ return (*pageAlloc)(p).alloc(npages)
+}
+func (p *PageAlloc) AllocToCache() PageCache {
+ return PageCache((*pageAlloc)(p).allocToCache())
+}
+func (p *PageAlloc) Free(base, npages uintptr) {
+ (*pageAlloc)(p).free(base, npages)
+}
+func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
+ return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
+}
+func (p *PageAlloc) Scavenge(nbytes uintptr, locked bool) (r uintptr) {
+ systemstack(func() {
+ r = (*pageAlloc)(p).scavenge(nbytes, locked)
+ })
+ return
+}
+func (p *PageAlloc) InUse() []AddrRange {
+ ranges := make([]AddrRange, 0, len(p.inUse.ranges))
+ for _, r := range p.inUse.ranges {
+ ranges = append(ranges, AddrRange{
+ Base: r.base,
+ Limit: r.limit,
+ })
+ }
+ return ranges
}
-func (t TreapIter) Valid() bool {
- return t.valid()
+// Returns nil if the PallocData's L2 is missing.
+func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData {
+ ci := chunkIdx(i)
+ l2 := (*pageAlloc)(p).chunks[ci.l1()]
+ if l2 == nil {
+ return nil
+ }
+ return (*PallocData)(&l2[ci.l2()])
}
-func (t TreapIter) Next() TreapIter {
- return TreapIter{t.next()}
+// AddrRange represents a range over addresses.
+// Specifically, it represents the range [Base, Limit).
+type AddrRange struct {
+ Base, Limit uintptr
}
-func (t TreapIter) Prev() TreapIter {
- return TreapIter{t.prev()}
+// BitRange represents a range over a bitmap.
+type BitRange struct {
+ I, N uint // bit index and length in bits
}
-// Treap is a safe wrapper around mTreap for testing.
+// NewPageAlloc creates a new page allocator for testing and
+// initializes it with the scav and chunks maps. Each key in these maps
+// represents a chunk index and each value is a series of bit ranges to
+// set within each bitmap's chunk.
//
-// It must never be heap-allocated because mTreap is
-// notinheap.
+// The initialization of the pageAlloc preserves the invariant that if a
+// scavenged bit is set the alloc bit is necessarily unset, so some
+// of the bits described by scav may be cleared in the final bitmap if
+// ranges in chunks overlap with them.
//
-//go:notinheap
-type Treap struct {
- mTreap
-}
+// scav is optional, and if nil, the scavenged bitmap will be cleared
+// (as opposed to all 1s, which it usually is). Furthermore, every
+// chunk index in scav must appear in chunks; ones that do not are
+// ignored.
+func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
+ p := new(pageAlloc)
+
+ // We've got an entry, so initialize the pageAlloc.
+ p.init(new(mutex), nil)
+ p.test = true
+
+ for i, init := range chunks {
+ addr := chunkBase(chunkIdx(i))
+
+ // Mark the chunk's existence in the pageAlloc.
+ p.grow(addr, pallocChunkBytes)
+
+ // Initialize the bitmap and update pageAlloc metadata.
+ chunk := p.chunkOf(chunkIndex(addr))
+
+ // Clear all the scavenged bits which grow set.
+ chunk.scavenged.clearRange(0, pallocChunkPages)
+
+ // Apply scavenge state if applicable.
+ if scav != nil {
+ if scvg, ok := scav[i]; ok {
+ for _, s := range scvg {
+ // Ignore the case of s.N == 0. setRange doesn't handle
+ // it and it's a no-op anyway.
+ if s.N != 0 {
+ chunk.scavenged.setRange(s.I, s.N)
+ }
+ }
+ }
+ }
+ p.resetScavengeAddr()
+
+ // Apply alloc state.
+ for _, s := range init {
+ // Ignore the case of s.N == 0. allocRange doesn't handle
+ // it and it's a no-op anyway.
+ if s.N != 0 {
+ chunk.allocRange(s.I, s.N)
+ }
+ }
-func (t *Treap) Start(mask, match TreapIterType) TreapIter {
- return TreapIter{t.start(treapIterType(mask), treapIterType(match))}
+ // Update heap metadata for the allocRange calls above.
+ p.update(addr, pallocChunkPages, false, false)
+ }
+ return (*PageAlloc)(p)
}
-func (t *Treap) End(mask, match TreapIterType) TreapIter {
- return TreapIter{t.end(treapIterType(mask), treapIterType(match))}
-}
+// FreePageAlloc releases hard OS resources owned by the pageAlloc. Once this
+// is called the pageAlloc may no longer be used. The object itself will be
+// collected by the garbage collector once it is no longer live.
+func FreePageAlloc(pp *PageAlloc) {
+ p := (*pageAlloc)(pp)
-func (t *Treap) Insert(s Span) {
- // mTreap uses a fixalloc in mheap_ for treapNode
- // allocation which requires the mheap_ lock to manipulate.
- // Locking here is safe because the treap itself never allocs
- // or otherwise ends up grabbing this lock.
- systemstack(func() {
- lock(&mheap_.lock)
- t.insert(s.mspan)
- unlock(&mheap_.lock)
- })
- t.CheckInvariants()
+ // Free all the mapped space for the summary levels.
+ if pageAlloc64Bit != 0 {
+ for l := 0; l < summaryLevels; l++ {
+ sysFree(unsafe.Pointer(&p.summary[l][0]), uintptr(cap(p.summary[l]))*pallocSumBytes, nil)
+ }
+ } else {
+ resSize := uintptr(0)
+ for _, s := range p.summary {
+ resSize += uintptr(cap(s)) * pallocSumBytes
+ }
+ sysFree(unsafe.Pointer(&p.summary[0][0]), alignUp(resSize, physPageSize), nil)
+ }
+
+ // Free the mapped space for chunks.
+ for i := range p.chunks {
+ if x := p.chunks[i]; x != nil {
+ p.chunks[i] = nil
+ // This memory comes from sysAlloc and will always be page-aligned.
+ sysFree(unsafe.Pointer(x), unsafe.Sizeof(*p.chunks[0]), nil)
+ }
+ }
}
-func (t *Treap) Find(npages uintptr) TreapIter {
- return TreapIter{t.find(npages)}
+// BaseChunkIdx is a convenient chunkIdx value which works on both
+// 64 bit and 32 bit platforms, allowing the tests to share code
+// between the two.
+//
+// On AIX, the arenaBaseOffset is 0x0a00000000000000. However, this
+// constant can't be used here because it is negative and will cause
+// a constant overflow.
+//
+// This should not be higher than 0x100*pallocChunkBytes to support
+// mips and mipsle, which only have 31-bit address spaces.
+var BaseChunkIdx = ChunkIdx(chunkIndex(((0xc000*pageAlloc64Bit + 0x100*pageAlloc32Bit) * pallocChunkBytes) + 0x0a00000000000000*sys.GoosAix))
+
+// PageBase returns an address given a chunk index and a page index
+// relative to that chunk.
+func PageBase(c ChunkIdx, pageIdx uint) uintptr {
+ return chunkBase(chunkIdx(c)) + uintptr(pageIdx)*pageSize
}
-func (t *Treap) Erase(i TreapIter) {
- // mTreap uses a fixalloc in mheap_ for treapNode
- // freeing which requires the mheap_ lock to manipulate.
- // Locking here is safe because the treap itself never allocs
- // or otherwise ends up grabbing this lock.
- systemstack(func() {
- lock(&mheap_.lock)
- t.erase(i.treapIter)
- unlock(&mheap_.lock)
- })
- t.CheckInvariants()
+type BitsMismatch struct {
+ Base uintptr
+ Got, Want uint64
}
-func (t *Treap) RemoveSpan(s Span) {
- // See Erase about locking.
+func CheckScavengedBitsCleared(mismatches []BitsMismatch) (n int, ok bool) {
+ ok = true
+
+ // Run on the system stack to avoid stack growth allocation.
systemstack(func() {
+ getg().m.mallocing++
+
+ // Lock so that we can safely access the bitmap.
lock(&mheap_.lock)
- t.removeSpan(s.mspan)
+ chunkLoop:
+ for i := mheap_.pages.start; i < mheap_.pages.end; i++ {
+ chunk := mheap_.pages.chunkOf(i)
+ for j := 0; j < pallocChunkPages/64; j++ {
+ // Run over each 64-bit bitmap section and ensure
+ // scavenged is being cleared properly on allocation.
+ // If a used bit and scavenged bit are both set, that's
+ // an error, and could indicate a larger problem, or
+ // an accounting problem.
+ want := chunk.scavenged[j] &^ chunk.pallocBits[j]
+ got := chunk.scavenged[j]
+ if want != got {
+ ok = false
+ if n >= len(mismatches) {
+ break chunkLoop
+ }
+ mismatches[n] = BitsMismatch{
+ Base: chunkBase(i) + uintptr(j)*64*pageSize,
+ Got: got,
+ Want: want,
+ }
+ n++
+ }
+ }
+ }
unlock(&mheap_.lock)
- })
- t.CheckInvariants()
-}
-func (t *Treap) Size() int {
- i := 0
- t.mTreap.treap.walkTreap(func(t *treapNode) {
- i++
+ getg().m.mallocing--
})
- return i
+ return
}
-func (t *Treap) CheckInvariants() {
- t.mTreap.treap.walkTreap(checkTreapNode)
- t.mTreap.treap.validateInvariants()
-}
+func PageCachePagesLeaked() (leaked uintptr) {
+ stopTheWorld("PageCachePagesLeaked")
-func RunGetgThreadSwitchTest() {
- // Test that getg works correctly with thread switch.
- // With gccgo, if we generate getg inlined, the backend
- // may cache the address of the TLS variable, which
- // will become invalid after a thread switch. This test
- // checks that the bad caching doesn't happen.
-
- ch := make(chan int)
- go func(ch chan int) {
- ch <- 5
- LockOSThread()
- }(ch)
-
- g1 := getg()
+ // Walk over destroyed Ps and look for unflushed caches.
+ deadp := allp[len(allp):cap(allp)]
+ for _, p := range deadp {
+ // Since we're going past len(allp) we may see nil Ps.
+ // Just ignore them.
+ if p != nil {
+ leaked += uintptr(sys.OnesCount64(p.pcache.cache))
+ }
+ }
- // Block on a receive. This is likely to get us a thread
- // switch. If we yield to the sender goroutine, it will
- // lock the thread, forcing us to resume on a different
- // thread.
- <-ch
+ startTheWorld()
+ return
+}
- g2 := getg()
- if g1 != g2 {
- panic("g1 != g2")
- }
+var Semacquire = semacquire
+var Semrelease1 = semrelease1
- // Also test getg after some control flow, as the
- // backend is sensitive to control flow.
- g3 := getg()
- if g1 != g3 {
- panic("g1 != g3")
- }
+func SemNwait(addr *uint32) uint32 {
+ root := semroot(addr)
+ return atomic.Load(&root.nwait)
}
+
+var Pusestackmaps = &usestackmaps
diff --git a/libgo/go/runtime/export_unix_test.go b/libgo/go/runtime/export_unix_test.go
index 064d2b2..9d0f0d8 100644
--- a/libgo/go/runtime/export_unix_test.go
+++ b/libgo/go/runtime/export_unix_test.go
@@ -6,6 +6,13 @@
package runtime
+import "unsafe"
+
+var NonblockingPipe = nonblockingPipe
+var Pipe = pipe
+var SetNonblock = setNonblock
+var Closeonexec = closeonexec
+
func sigismember(mask *sigset, i int) bool {
clear := *mask
sigdelset(&clear, i)
@@ -17,3 +24,71 @@ func Sigisblocked(i int) bool {
sigprocmask(_SIG_SETMASK, nil, &sigmask)
return sigismember(&sigmask, i)
}
+
+type M = m
+
+var waitForSigusr1 struct {
+ rdpipe int32
+ wrpipe int32
+ mID int64
+}
+
+// WaitForSigusr1 blocks until a SIGUSR1 is received. It calls ready
+// when it is set up to receive SIGUSR1. The ready function should
+// cause a SIGUSR1 to be sent. The r and w arguments are a pipe that
+// the signal handler can use to report when the signal is received.
+//
+// Once SIGUSR1 is received, it returns the ID of the current M and
+// the ID of the M the SIGUSR1 was received on. If the caller writes
+// a non-zero byte to w, WaitForSigusr1 returns immediately with -1, -1.
+func WaitForSigusr1(r, w int32, ready func(mp *M)) (int64, int64) {
+ lockOSThread()
+ // Make sure we can receive SIGUSR1.
+ unblocksig(_SIGUSR1)
+
+ waitForSigusr1.rdpipe = r
+ waitForSigusr1.wrpipe = w
+
+ mp := getg().m
+ testSigusr1 = waitForSigusr1Callback
+ ready(mp)
+
+ // Wait for the signal. We use a pipe rather than a note
+ // because write is always async-signal-safe.
+ entersyscallblock()
+ var b byte
+ read(waitForSigusr1.rdpipe, noescape(unsafe.Pointer(&b)), 1)
+ exitsyscall()
+
+ gotM := waitForSigusr1.mID
+ testSigusr1 = nil
+
+ unlockOSThread()
+
+ if b != 0 {
+ // timeout signal from caller
+ return -1, -1
+ }
+ return mp.id, gotM
+}
+
+// waitForSigusr1Callback is called from the signal handler during
+// WaitForSigusr1. It must not have write barriers because there may
+// not be a P.
+//
+//go:nowritebarrierrec
+func waitForSigusr1Callback(gp *g) bool {
+ if gp == nil || gp.m == nil {
+ waitForSigusr1.mID = -1
+ } else {
+ waitForSigusr1.mID = gp.m.id
+ }
+ b := byte(0)
+ write(uintptr(waitForSigusr1.wrpipe), noescape(unsafe.Pointer(&b)), 1)
+ return true
+}
+
+// SendSigusr1 sends SIGUSR1 to mp.
+func SendSigusr1(mp *M) {
+ panic("SendSigusr1")
+}
diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go
index 9dbf057..e2e601f 100644
--- a/libgo/go/runtime/extern.go
+++ b/libgo/go/runtime/extern.go
@@ -125,6 +125,13 @@ It is a comma-separated list of name=val pairs setting these named variables:
IDs will refer to the ID of the goroutine at the time of creation; it's possible for this
ID to be reused for another goroutine. Setting N to 0 will report no ancestry information.
+ asyncpreemptoff: asyncpreemptoff=1 disables signal-based
+ asynchronous goroutine preemption. This makes some loops
+ non-preemptible for long periods, which may delay GC and
+ goroutine scheduling. This is useful for debugging GC issues
+ because it also disables the conservative stack scanning used
+ for asynchronously preempted goroutines.
+
The net, net/http, and crypto/tls packages also refer to debugging variables in GODEBUG.
See the documentation for those packages for details.
diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go
index 3eb01bf..8f14bf9 100644
--- a/libgo/go/runtime/gc_test.go
+++ b/libgo/go/runtime/gc_test.go
@@ -22,12 +22,6 @@ func TestGcSys(t *testing.T) {
if os.Getenv("GOGC") == "off" {
t.Skip("skipping test; GOGC=off in environment")
}
- if runtime.GOOS == "windows" {
- t.Skip("skipping test; GOOS=windows http://golang.org/issue/27156")
- }
- if runtime.GOOS == "linux" && runtime.GOARCH == "arm64" {
- t.Skip("skipping test; GOOS=linux GOARCH=arm64 https://github.com/golang/go/issues/27636")
- }
got := runTestProg(t, "testprog", "GCSys")
want := "OK\n"
if got != want {
@@ -473,25 +467,6 @@ func TestReadMemStats(t *testing.T) {
}
}
-func TestUnscavHugePages(t *testing.T) {
- // Allocate 20 MiB and immediately free it a few times to increase
- // the chance that unscavHugePages isn't zero and that some kind of
- // accounting had to happen in the runtime.
- for j := 0; j < 3; j++ {
- var large [][]byte
- for i := 0; i < 5; i++ {
- large = append(large, make([]byte, runtime.PhysHugePageSize))
- }
- runtime.KeepAlive(large)
- runtime.GC()
- }
- base, slow := runtime.UnscavHugePagesSlow()
- if base != slow {
- logDiff(t, "unscavHugePages", reflect.ValueOf(base), reflect.ValueOf(slow))
- t.Fatal("unscavHugePages mismatch")
- }
-}
-
func logDiff(t *testing.T, prefix string, got, want reflect.Value) {
typ := got.Type()
switch typ.Kind() {
diff --git a/libgo/go/runtime/gcinfo_test.go b/libgo/go/runtime/gcinfo_test.go
index 89144d5..fc24f04 100644
--- a/libgo/go/runtime/gcinfo_test.go
+++ b/libgo/go/runtime/gcinfo_test.go
@@ -173,14 +173,6 @@ func infoBigStruct() []byte {
typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
typePointer, typeScalar, // i string
}
- case "amd64p32":
- return []byte{
- typePointer, // q *int
- typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
- typePointer, typeScalar, typeScalar, // r []byte
- typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
- typePointer, typeScalar, // i string
- }
default:
panic("unknown arch")
}
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index 3f94256..cff663a 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -25,8 +25,7 @@ const (
)
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
- if (GOARCH == "amd64" || GOARCH == "arm64") &&
- GOOS != "nacl" && useAeshash {
+ if (GOARCH == "amd64" || GOARCH == "arm64") && useAeshash {
return aeshash(p, seed, s)
}
h := uint64(seed + s*hashkey[0])
diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go
index c968ab3..e8f16e9 100644
--- a/libgo/go/runtime/heapdump.go
+++ b/libgo/go/runtime/heapdump.go
@@ -313,7 +313,7 @@ func finq_callback(fn *funcval, obj unsafe.Pointer, ft *functype, ot *ptrtype) {
func dumproots() {
// MSpan.types
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
// Finalizers
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
@@ -336,7 +336,7 @@ var freemark [_PageSize / 8]bool
func dumpobjs() {
for _, s := range mheap_.allspans {
- if s.state != mSpanInUse {
+ if s.state.get() != mSpanInUse {
continue
}
p := s.base()
@@ -445,7 +445,7 @@ func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs,
dumpint(uint64(nstk))
for i := uintptr(0); i < nstk; i++ {
pc := stk[i]
- fn, file, line, _ := funcfileline(pc, -1)
+ fn, file, line, _ := funcfileline(pc, -1, false)
if fn == "" {
var buf [64]byte
n := len(buf)
@@ -483,7 +483,7 @@ func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs,
func dumpmemprof() {
iterate_memprof(dumpmemprof_callback)
for _, s := range mheap_.allspans {
- if s.state != mSpanInUse {
+ if s.state.get() != mSpanInUse {
continue
}
for sp := s.specials; sp != nil; sp = sp.next {
@@ -504,7 +504,7 @@ var dumphdr = []byte("go1.7 heap dump\n")
func mdump() {
// make sure we're done sweeping
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
s.ensureSwept()
}
}
diff --git a/libgo/go/runtime/internal/atomic/atomic.c b/libgo/go/runtime/internal/atomic/atomic.c
index 17c83a2..8ae4d7b 100644
--- a/libgo/go/runtime/internal/atomic/atomic.c
+++ b/libgo/go/runtime/internal/atomic/atomic.c
@@ -26,6 +26,16 @@ Loadp (void *ptr)
return __atomic_load_n ((void **) ptr, __ATOMIC_SEQ_CST);
}
+uint8_t Load8 (uint8_t *ptr)
+ __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.Load8")
+ __attribute__ ((no_split_stack));
+
+uint8_t
+Load8 (uint8_t *ptr)
+{
+ return __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+}
+
uint64_t Load64 (uint64_t *ptr)
__asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.Load64")
__attribute__ ((no_split_stack));
@@ -238,6 +248,16 @@ Store (uint32_t *ptr, uint32_t val)
__atomic_store_n (ptr, val, __ATOMIC_SEQ_CST);
}
+void Store8 (uint8_t *ptr, uint8_t val)
+ __asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.Store8")
+ __attribute__ ((no_split_stack));
+
+void
+Store8 (uint8_t *ptr, uint8_t val)
+{
+ __atomic_store_n (ptr, val, __ATOMIC_SEQ_CST);
+}
+
void Store64 (uint64_t *ptr, uint64_t val)
__asm__ (GOSYM_PREFIX "runtime..z2finternal..z2fatomic.Store64")
__attribute__ ((no_split_stack));
diff --git a/libgo/go/runtime/internal/atomic/atomic_test.go b/libgo/go/runtime/internal/atomic/atomic_test.go
index 0ba7544..0c1125c 100644
--- a/libgo/go/runtime/internal/atomic/atomic_test.go
+++ b/libgo/go/runtime/internal/atomic/atomic_test.go
@@ -86,14 +86,8 @@ func TestUnaligned64(t *testing.T) {
// a continual source of pain. Test that on 32-bit systems they crash
// instead of failing silently.
- switch runtime.GOARCH {
- default:
- if unsafe.Sizeof(int(0)) != 4 {
- t.Skip("test only runs on 32-bit systems")
- }
- case "amd64p32":
- // amd64p32 can handle unaligned atomics.
- t.Skipf("test not needed on %v", runtime.GOARCH)
+ if unsafe.Sizeof(int(0)) != 4 {
+ t.Skip("test only runs on 32-bit systems")
}
x := make([]uint32, 4)
@@ -109,3 +103,120 @@ func TestUnaligned64(t *testing.T) {
shouldPanic(t, "Xchg64", func() { atomic.Xchg64(up64, 1) })
shouldPanic(t, "Cas64", func() { atomic.Cas64(up64, 1, 2) })
}
+
+func TestAnd8(t *testing.T) {
+ // Basic sanity check.
+ x := uint8(0xff)
+ for i := uint8(0); i < 8; i++ {
+ atomic.And8(&x, ^(1 << i))
+ if r := uint8(0xff) << (i + 1); x != r {
+ t.Fatalf("clearing bit %#x: want %#x, got %#x", uint8(1<<i), r, x)
+ }
+ }
+
+ // Set every bit in array to 1.
+ a := make([]uint8, 1<<12)
+ for i := range a {
+ a[i] = 0xff
+ }
+
+ // Clear array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := ^uint8(1 << i)
+ go func() {
+ for i := range a {
+ atomic.And8(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint8(0), v)
+ }
+ }
+}
+
+func TestOr8(t *testing.T) {
+ // Basic sanity check.
+ x := uint8(0)
+ for i := uint8(0); i < 8; i++ {
+ atomic.Or8(&x, 1<<i)
+ if r := (uint8(1) << (i + 1)) - 1; x != r {
+ t.Fatalf("setting bit %#x: want %#x, got %#x", uint8(1)<<i, r, x)
+ }
+ }
+
+ // Start with every bit in array set to 0.
+ a := make([]uint8, 1<<12)
+
+ // Set every bit in array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := uint8(1 << i)
+ go func() {
+ for i := range a {
+ atomic.Or8(&a[i], m)
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally set.
+ for i, v := range a {
+ if v != 0xff {
+ t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint8(0xff), v)
+ }
+ }
+}
+
+func TestBitwiseContended(t *testing.T) {
+ // Start with every bit in array set to 0.
+ a := make([]uint8, 16)
+
+ // Iterations to try.
+ N := 1 << 16
+ if testing.Short() {
+ N = 1 << 10
+ }
+
+ // Set and then clear every bit in the array bit-by-bit in different goroutines.
+ done := make(chan bool)
+ for i := 0; i < 8; i++ {
+ m := uint8(1 << i)
+ go func() {
+ for n := 0; n < N; n++ {
+ for i := range a {
+ atomic.Or8(&a[i], m)
+ if atomic.Load8(&a[i])&m != m {
+ t.Errorf("a[%v] bit %#x not set", i, m)
+ }
+ atomic.And8(&a[i], ^m)
+ if atomic.Load8(&a[i])&m != 0 {
+ t.Errorf("a[%v] bit %#x not clear", i, m)
+ }
+ }
+ }
+ done <- true
+ }()
+ }
+ for i := 0; i < 8; i++ {
+ <-done
+ }
+
+ // Check that the array has been totally cleared.
+ for i, v := range a {
+ if v != 0 {
+ t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint8(0), v)
+ }
+ }
+}
diff --git a/libgo/go/runtime/internal/atomic/bench_test.go b/libgo/go/runtime/internal/atomic/bench_test.go
index 083a75c..de71b0f 100644
--- a/libgo/go/runtime/internal/atomic/bench_test.go
+++ b/libgo/go/runtime/internal/atomic/bench_test.go
@@ -43,6 +43,46 @@ func BenchmarkAtomicStore(b *testing.B) {
}
}
+func BenchmarkAnd8(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.And8(&x[255], uint8(i))
+ }
+}
+
+func BenchmarkAnd8Parallel(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint8(0)
+ for pb.Next() {
+ atomic.And8(&x[255], i)
+ i++
+ }
+ })
+}
+
+func BenchmarkOr8(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ for i := 0; i < b.N; i++ {
+ atomic.Or8(&x[255], uint8(i))
+ }
+}
+
+func BenchmarkOr8Parallel(b *testing.B) {
+ var x [512]uint8 // give byte its own cache line
+ sink = &x
+ b.RunParallel(func(pb *testing.PB) {
+ i := uint8(0)
+ for pb.Next() {
+ atomic.Or8(&x[255], i)
+ i++
+ }
+ })
+}
+
func BenchmarkXadd(b *testing.B) {
var x uint32
ptr := &x
diff --git a/libgo/go/runtime/internal/atomic/gccgo.go b/libgo/go/runtime/internal/atomic/gccgo.go
index e5edbfb..4df8346 100644
--- a/libgo/go/runtime/internal/atomic/gccgo.go
+++ b/libgo/go/runtime/internal/atomic/gccgo.go
@@ -15,6 +15,9 @@ func Load(ptr *uint32) uint32
func Loadp(ptr unsafe.Pointer) unsafe.Pointer
//go:noescape
+func Load8(ptr *uint8) uint8
+
+//go:noescape
func Load64(ptr *uint64) uint64
//go:noescape
@@ -56,6 +59,9 @@ func CasRel(ptr *uint32, old, new uint32) bool
func Store(ptr *uint32, val uint32)
//go:noescape
+func Store8(ptr *uint8, val uint8)
+
+//go:noescape
func Store64(ptr *uint64, val uint64)
//go:noescape
diff --git a/libgo/go/runtime/internal/sys/intrinsics.go b/libgo/go/runtime/internal/sys/intrinsics.go
index 6906938..d25f350 100644
--- a/libgo/go/runtime/internal/sys/intrinsics.go
+++ b/libgo/go/runtime/internal/sys/intrinsics.go
@@ -37,25 +37,6 @@ func Ctz8(x uint8) int {
return int(ntz8tab[x])
}
-var ntz8tab = [256]uint8{
- 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
- 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
-}
-
//extern __builtin_bswap64
func bswap64(uint64) uint64
diff --git a/libgo/go/runtime/internal/sys/intrinsics_common.go b/libgo/go/runtime/internal/sys/intrinsics_common.go
new file mode 100644
index 0000000..818d75e
--- /dev/null
+++ b/libgo/go/runtime/internal/sys/intrinsics_common.go
@@ -0,0 +1,143 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+// Copied from math/bits to avoid dependence.
+
+var len8tab = [256]uint8{
+ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+}
+
+var ntz8tab = [256]uint8{
+ 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+ 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
+}
+
+// len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len64(x uint64) (n int) {
+ if x >= 1<<32 {
+ x >>= 32
+ n = 32
+ }
+ if x >= 1<<16 {
+ x >>= 16
+ n += 16
+ }
+ if x >= 1<<8 {
+ x >>= 8
+ n += 8
+ }
+ return n + int(len8tab[x])
+}
+
+// --- OnesCount ---
+
+const m0 = 0x5555555555555555 // 01010101 ...
+const m1 = 0x3333333333333333 // 00110011 ...
+const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
+
+// OnesCount64 returns the number of one bits ("population count") in x.
+func OnesCount64(x uint64) int {
+ // Implementation: Parallel summing of adjacent bits.
+ // See "Hacker's Delight", Chap. 5: Counting Bits.
+ // The following pattern shows the general approach:
+ //
+ // x = x>>1&(m0&m) + x&(m0&m)
+ // x = x>>2&(m1&m) + x&(m1&m)
+ // x = x>>4&(m2&m) + x&(m2&m)
+ // x = x>>8&(m3&m) + x&(m3&m)
+ // x = x>>16&(m4&m) + x&(m4&m)
+ // x = x>>32&(m5&m) + x&(m5&m)
+ // return int(x)
+ //
+ // Masking (& operations) can be left away when there's no
+ // danger that a field's sum will carry over into the next
+ // field: Since the result cannot be > 64, 8 bits is enough
+ // and we can ignore the masks for the shifts by 8 and up.
+ // Per "Hacker's Delight", the first line can be simplified
+ // more, but it saves at best one instruction, so we leave
+ // it alone for clarity.
+ const m = 1<<64 - 1
+ x = x>>1&(m0&m) + x&(m0&m)
+ x = x>>2&(m1&m) + x&(m1&m)
+ x = (x>>4 + x) & (m2 & m)
+ x += x >> 8
+ x += x >> 16
+ x += x >> 32
+ return int(x) & (1<<7 - 1)
+}
+
+var deBruijn64tab = [64]byte{
+ 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
+ 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
+ 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
+ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
+}
+
+const deBruijn64 = 0x03f79d71b4ca8b09
+
+// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
+func TrailingZeros64(x uint64) int {
+ if x == 0 {
+ return 64
+ }
+ // If popcount is fast, replace code below with return popcount(^x & (x - 1)).
+ //
+ // x & -x leaves only the right-most bit set in the word. Let k be the
+ // index of that bit. Since only a single bit is set, the value is two
+ // to the power of k. Multiplying by a power of two is equivalent to
+ // left shifting, in this case by k bits. The de Bruijn (64 bit) constant
+ // is such that all six bit, consecutive substrings are distinct.
+ // Therefore, if we have a left shifted version of this constant we can
+ // find by how many bits it was shifted by looking at which six bit
+ // substring ended up at the top of the word.
+ // (Knuth, volume 4, section 7.3.1)
+ return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
+}
+
+// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
+func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
+
+// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
+func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
+
+// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
+func TrailingZeros8(x uint8) int {
+ return int(ntz8tab[x])
+}
+
+// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
+func Len8(x uint8) int {
+ return int(len8tab[x])
+}
diff --git a/libgo/go/runtime/lfstack_32bit.go b/libgo/go/runtime/lfstack_32bit.go
index f50c508..6da037e 100644
--- a/libgo/go/runtime/lfstack_32bit.go
+++ b/libgo/go/runtime/lfstack_32bit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build 386 amd64p32 arm nacl armbe m68k mips mipsle mips64p32 mips64p32le nios2 ppc s390 sh shbe sparc
+// +build 386 amd64p32 arm armbe m68k mips mipsle mips64p32 mips64p32le nios2 ppc s390 sh shbe sparc
package runtime
diff --git a/libgo/go/runtime/libfuzzer.go b/libgo/go/runtime/libfuzzer.go
new file mode 100644
index 0000000..0161955
--- /dev/null
+++ b/libgo/go/runtime/libfuzzer.go
@@ -0,0 +1,75 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build libfuzzer
+
+package runtime
+
+import _ "unsafe" // for go:linkname
+
+func libfuzzerCall(fn *byte, arg0, arg1 uintptr)
+
+func libfuzzerTraceCmp1(arg0, arg1 uint8) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp1, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceCmp2(arg0, arg1 uint16) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp2, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceCmp4(arg0, arg1 uint32) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp4, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceCmp8(arg0, arg1 uint64) {
+ libfuzzerCall(&__sanitizer_cov_trace_cmp8, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp1(arg0, arg1 uint8) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp1, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp2(arg0, arg1 uint16) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp2, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp4(arg0, arg1 uint32) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp4, uintptr(arg0), uintptr(arg1))
+}
+
+func libfuzzerTraceConstCmp8(arg0, arg1 uint64) {
+ libfuzzerCall(&__sanitizer_cov_trace_const_cmp8, uintptr(arg0), uintptr(arg1))
+}
+
+//go:linkname __sanitizer_cov_trace_cmp1 __sanitizer_cov_trace_cmp1
+//go:cgo_import_static __sanitizer_cov_trace_cmp1
+var __sanitizer_cov_trace_cmp1 byte
+
+//go:linkname __sanitizer_cov_trace_cmp2 __sanitizer_cov_trace_cmp2
+//go:cgo_import_static __sanitizer_cov_trace_cmp2
+var __sanitizer_cov_trace_cmp2 byte
+
+//go:linkname __sanitizer_cov_trace_cmp4 __sanitizer_cov_trace_cmp4
+//go:cgo_import_static __sanitizer_cov_trace_cmp4
+var __sanitizer_cov_trace_cmp4 byte
+
+//go:linkname __sanitizer_cov_trace_cmp8 __sanitizer_cov_trace_cmp8
+//go:cgo_import_static __sanitizer_cov_trace_cmp8
+var __sanitizer_cov_trace_cmp8 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp1 __sanitizer_cov_trace_const_cmp1
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp1
+var __sanitizer_cov_trace_const_cmp1 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp2 __sanitizer_cov_trace_const_cmp2
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp2
+var __sanitizer_cov_trace_const_cmp2 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp4 __sanitizer_cov_trace_const_cmp4
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp4
+var __sanitizer_cov_trace_const_cmp4 byte
+
+//go:linkname __sanitizer_cov_trace_const_cmp8 __sanitizer_cov_trace_const_cmp8
+//go:cgo_import_static __sanitizer_cov_trace_const_cmp8
+var __sanitizer_cov_trace_const_cmp8 byte
diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go
index 6f86e91..f672efd 100644
--- a/libgo/go/runtime/lock_futex.go
+++ b/libgo/go/runtime/lock_futex.go
@@ -241,7 +241,7 @@ func notetsleepg(n *note, ns int64) bool {
return ok
}
-func beforeIdle() bool {
+func beforeIdle(int64) bool {
return false
}
diff --git a/libgo/go/runtime/lock_js.go b/libgo/go/runtime/lock_js.go
index c038499..3168c86 100644
--- a/libgo/go/runtime/lock_js.go
+++ b/libgo/go/runtime/lock_js.go
@@ -111,6 +111,8 @@ func notetsleepg(n *note, ns int64) bool {
gopark(nil, nil, waitReasonSleep, traceEvNone, 1)
clearTimeoutEvent(id) // note might have woken early, clear timeout
+ clearIdleID()
+
mp = acquirem()
delete(notes, n)
delete(notesWithTimeout, n)
@@ -144,33 +146,65 @@ func checkTimeouts() {
}
}
-var returnedEventHandler *g
+// events is a stack of calls from JavaScript into Go.
+var events []*event
+
+type event struct {
+ // g was the active goroutine when the call from JavaScript occurred.
+ // It needs to be active when returning to JavaScript.
+ gp *g
+ // returned reports whether the event handler has returned.
+ // When all goroutines are idle and the event handler has returned,
+ // then g gets resumed and returns the execution to JavaScript.
+ returned bool
+}
-func init() {
- // At the toplevel we need an extra goroutine that handles asynchronous events.
- initg := getg()
- go func() {
- returnedEventHandler = getg()
- goready(initg, 1)
+// The timeout event started by beforeIdle.
+var idleID int32
- gopark(nil, nil, waitReasonZero, traceEvNone, 1)
- returnedEventHandler = nil
+// beforeIdle gets called by the scheduler if no goroutine is awake.
+// If we are not already handling an event, then we pause for an async event.
+// If an event handler returned, we resume it and it will pause the execution.
+func beforeIdle(delay int64) bool {
+ if delay > 0 {
+ clearIdleID()
+ if delay < 1e6 {
+ delay = 1
+ } else if delay < 1e15 {
+ delay = delay / 1e6
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ delay = 1e9
+ }
+ idleID = scheduleTimeoutEvent(delay)
+ }
- pause(getcallersp() - 16)
- }()
- gopark(nil, nil, waitReasonZero, traceEvNone, 1)
-}
+ if len(events) == 0 {
+ go handleAsyncEvent()
+ return true
+ }
-// beforeIdle gets called by the scheduler if no goroutine is awake.
-// We resume the event handler (if available) which will pause the execution.
-func beforeIdle() bool {
- if returnedEventHandler != nil {
- goready(returnedEventHandler, 1)
+ e := events[len(events)-1]
+ if e.returned {
+ goready(e.gp, 1)
return true
}
return false
}
+func handleAsyncEvent() {
+ pause(getcallersp() - 16)
+}
+
+// clearIdleID clears our record of the timeout started by beforeIdle.
+func clearIdleID() {
+ if idleID != 0 {
+ clearTimeoutEvent(idleID)
+ idleID = 0
+ }
+}
+
// pause sets SP to newsp and pauses the execution of Go's WebAssembly code until an event is triggered.
func pause(newsp uintptr)
@@ -181,18 +215,29 @@ func scheduleTimeoutEvent(ms int64) int32
// clearTimeoutEvent clears a timeout event scheduled by scheduleTimeoutEvent.
func clearTimeoutEvent(id int32)
+// handleEvent gets invoked on a call from JavaScript into Go. It calls the event handler of the syscall/js package
+// and then parks the handler goroutine to allow other goroutines to run before giving execution back to JavaScript.
+// When no other goroutine is awake any more, beforeIdle resumes the handler goroutine. Now that the same goroutine
+// is running as was running when the call came in from JavaScript, execution can be safely passed back to JavaScript.
func handleEvent() {
- prevReturnedEventHandler := returnedEventHandler
- returnedEventHandler = nil
+ e := &event{
+ gp: getg(),
+ returned: false,
+ }
+ events = append(events, e)
- checkTimeouts()
eventHandler()
- returnedEventHandler = getg()
+ clearIdleID()
+
+ // wait until all goroutines are idle
+ e.returned = true
gopark(nil, nil, waitReasonZero, traceEvNone, 1)
- returnedEventHandler = prevReturnedEventHandler
+ events[len(events)-1] = nil
+ events = events[:len(events)-1]
+ // return execution to JavaScript
pause(getcallersp() - 16)
}
diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go
index 5cf2406..63a6014 100644
--- a/libgo/go/runtime/lock_sema.go
+++ b/libgo/go/runtime/lock_sema.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin hurd nacl netbsd openbsd plan9 solaris windows
+// +build aix darwin hurd netbsd openbsd plan9 solaris windows
package runtime
@@ -300,7 +300,7 @@ func notetsleepg(n *note, ns int64) bool {
return ok
}
-func beforeIdle() bool {
+func beforeIdle(int64) bool {
return false
}
diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go
index 0eee55e..fda2273 100644
--- a/libgo/go/runtime/malloc.go
+++ b/libgo/go/runtime/malloc.go
@@ -19,7 +19,7 @@
// fixalloc: a free-list allocator for fixed-size off-heap objects,
// used to manage storage used by the allocator.
// mheap: the malloc heap, managed at page (8192-byte) granularity.
-// mspan: a run of pages managed by the mheap.
+// mspan: a run of in-use pages managed by the mheap.
// mcentral: collects all spans of a given size class.
// mcache: a per-P cache of mspans with free space.
// mstats: allocation statistics.
@@ -56,13 +56,8 @@
// it is placed on the mcentral free list for the mspan's size
// class.
//
-// 3. Otherwise, if all objects in the mspan are free, the mspan
-// is now "idle", so it is returned to the mheap and no longer
-// has a size class.
-// This may coalesce it with adjacent idle mspans.
-//
-// 4. If an mspan remains idle for long enough, return its pages
-// to the operating system.
+// 3. Otherwise, if all objects in the mspan are free, the mspan's
+// pages are returned to the mheap and the mspan is now dead.
//
// Allocating and freeing a large object uses the mheap
// directly, bypassing the mcache and mcentral.
@@ -207,17 +202,21 @@ const (
// exceed Go's 48 bit limit, it's extremely unlikely in
// practice.
//
- // On aix/ppc64, the limits is increased to 1<<60 to accept addresses
- // returned by mmap syscall. These are in range:
- // 0x0a00000000000000 - 0x0afffffffffffff
- //
// On 32-bit platforms, we accept the full 32-bit address
// space because doing so is cheap.
// mips32 only has access to the low 2GB of virtual memory, so
// we further limit it to 31 bits.
//
+ // On darwin/arm64, although 64-bit pointers are presumably
+ // available, pointers are truncated to 33 bits. Furthermore,
+ // only the top 4 GiB of the address space are actually available
+ // to the application, but we allow the whole 33 bits anyway for
+ // simplicity.
+ // TODO(mknyszek): Consider limiting it to 32 bits and using
+ // arenaBaseOffset to offset into the top 4 GiB.
+ //
// WebAssembly currently has a limit of 4GB linear memory.
- heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosAix))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 60*(sys.GoosAix*_64bit)
+ heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosDarwin*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*sys.GoosDarwin*sys.GoarchArm64
// maxAlloc is the maximum size of an allocation. On 64-bit,
// it's theoretically possible to allocate 1<<heapAddrBits bytes. On
@@ -236,7 +235,6 @@ const (
// Platform Addr bits Arena size L1 entries L2 entries
// -------------- --------- ---------- ---------- -----------
// */64-bit 48 64MB 1 4M (32MB)
- // aix/64-bit 60 256MB 4096 4M (32MB)
// windows/64-bit 48 4MB 64 1M (8MB)
// */32-bit 32 4MB 1 1024 (4KB)
// */mips(le) 31 4MB 1 512 (2KB)
@@ -258,7 +256,7 @@ const (
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
// prefer using heapArenaBytes where possible (we need the
// constant to compute some other constants).
- logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoosAix)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (8+20)*(sys.GoosAix*_64bit)
+ logHeapArenaBytes = (6+20)*(_64bit*(1-sys.GoosWindows)*(1-sys.GoarchWasm)) + (2+20)*(_64bit*sys.GoosWindows) + (2+20)*(1-_64bit) + (2+20)*sys.GoarchWasm
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
@@ -278,10 +276,7 @@ const (
// We use the L1 map on 64-bit Windows because the arena size
// is small, but the address space is still 48 bits, and
// there's a high cost to having a large L2.
- //
- // We use the L1 map on aix/ppc64 to keep the same L2 value
- // as on Linux.
- arenaL1Bits = 6*(_64bit*sys.GoosWindows) + 12*(sys.GoosAix*_64bit)
+ arenaL1Bits = 6 * (_64bit * sys.GoosWindows)
// arenaL2Bits is the number of bits of the arena number
// covered by the second level arena index.
@@ -308,9 +303,15 @@ const (
// bits. This offset lets us handle "negative" addresses (or
// high addresses if viewed as unsigned).
//
+ // On aix/ppc64, this offset allows to keep the heapAddrBits to
+ // 48. Otherwize, it would be 60 in order to handle mmap addresses
+ // (in range 0x0a00000000000000 - 0x0afffffffffffff). But in this
+ // case, the memory reserved in (s *pageAlloc).init for chunks
+ // is causing important slowdowns.
+ //
// On other platforms, the user address space is contiguous
// and starts at 0, so no offset is necessary.
- arenaBaseOffset uintptr = sys.GoarchAmd64 * (1 << 47)
+ arenaBaseOffset = sys.GoarchAmd64*(1<<47) + (^0x0a00000000000000+1)&uintptrMask*sys.GoosAix
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
@@ -444,6 +445,10 @@ func mallocinit() {
// The OS init code failed to fetch the physical page size.
throw("failed to get system page size")
}
+ if physPageSize > maxPhysPageSize {
+ print("system page size (", physPageSize, ") is larger than maximum page size (", maxPhysPageSize, ")\n")
+ throw("bad system page size")
+ }
if physPageSize < minPhysPageSize {
print("system page size (", physPageSize, ") is smaller than minimum page size (", minPhysPageSize, ")\n")
throw("bad system page size")
@@ -456,6 +461,13 @@ func mallocinit() {
print("system huge page size (", physHugePageSize, ") must be a power of 2\n")
throw("bad system huge page size")
}
+ if physHugePageSize > maxPhysHugePageSize {
+ // physHugePageSize is greater than the maximum supported huge page size.
+ // Don't throw here, like in the other cases, since a system configured
+ // in this way isn't wrong, we just don't have the code to support them.
+ // Instead, silently set the huge page size to zero.
+ physHugePageSize = 0
+ }
if physHugePageSize != 0 {
// Since physHugePageSize is a power of 2, it suffices to increase
// physHugePageShift until 1<<physHugePageShift == physHugePageSize.
@@ -579,7 +591,7 @@ func mallocinit() {
if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
p = mheap_.heapArenaAlloc.end
}
- p = round(p+(256<<10), heapArenaBytes)
+ p = alignUp(p+(256<<10), heapArenaBytes)
// Because we're worried about fragmentation on
// 32-bit, we try to make a large initial reservation.
arenaSizes := [...]uintptr{
@@ -612,7 +624,7 @@ func mallocinit() {
//
// h must be locked.
func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
- n = round(n, heapArenaBytes)
+ n = alignUp(n, heapArenaBytes)
// First, try the arena pre-reservation.
v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
@@ -795,7 +807,7 @@ retry:
// re-reserve the aligned sub-region. This may race,
// so we may have to try again.
sysFree(unsafe.Pointer(p), size+align, nil)
- p = round(p, align)
+ p = alignUp(p, align)
p2 := sysReserve(unsafe.Pointer(p), size)
if p != uintptr(p2) {
// Must have raced. Try again.
@@ -809,7 +821,7 @@ retry:
return p2, size
default:
// Trim off the unaligned parts.
- pAligned := round(p, align)
+ pAligned := alignUp(p, align)
sysFree(unsafe.Pointer(p), pAligned-p, nil)
end := pAligned + size
endLen := (p + size + align) - end
@@ -998,11 +1010,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
off := c.tinyoffset
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
- off = round(off, 8)
+ off = alignUp(off, 8)
} else if size&3 == 0 {
- off = round(off, 4)
+ off = alignUp(off, 4)
} else if size&1 == 0 {
- off = round(off, 2)
+ off = alignUp(off, 2)
}
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
@@ -1160,7 +1172,7 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
// pays the debt down to npage pages.
deductSweepCredit(npages*_PageSize, npages)
- s := mheap_.alloc(npages, makeSpanClass(0, noscan), true, needzero)
+ s := mheap_.alloc(npages, makeSpanClass(0, noscan), needzero)
if s == nil {
throw("out of memory")
}
@@ -1338,7 +1350,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
lock(&globalAlloc.mutex)
persistent = &globalAlloc.persistentAlloc
}
- persistent.off = round(persistent.off, align)
+ persistent.off = alignUp(persistent.off, align)
if persistent.off+size > persistentChunkSize || persistent.base == nil {
persistent.base = (*notInHeap)(sysAlloc(persistentChunkSize, &memstats.other_sys))
if persistent.base == nil {
@@ -1356,7 +1368,7 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
break
}
}
- persistent.off = round(sys.PtrSize, align)
+ persistent.off = alignUp(sys.PtrSize, align)
}
p := persistent.base.add(persistent.off)
persistent.off += size
@@ -1402,12 +1414,12 @@ func (l *linearAlloc) init(base, size uintptr) {
}
func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
- p := round(l.next, align)
+ p := alignUp(l.next, align)
if p+size > l.end {
return nil
}
l.next = p + size
- if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped {
+ if pEnd := alignUp(l.next-1, physPageSize); pEnd > l.mapped {
// Transition from Reserved to Prepared to Ready.
sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat)
sysUsed(unsafe.Pointer(l.mapped), pEnd-l.mapped)
diff --git a/libgo/go/runtime/malloc_test.go b/libgo/go/runtime/malloc_test.go
index c9282ba..bd30bc1 100644
--- a/libgo/go/runtime/malloc_test.go
+++ b/libgo/go/runtime/malloc_test.go
@@ -170,6 +170,14 @@ func TestTinyAlloc(t *testing.T) {
}
}
+func TestPageCacheLeak(t *testing.T) {
+ defer GOMAXPROCS(GOMAXPROCS(1))
+ leaked := PageCachePagesLeaked()
+ if leaked != 0 {
+ t.Fatalf("found %d leaked pages in page caches", leaked)
+ }
+}
+
func TestPhysicalMemoryUtilization(t *testing.T) {
got := runTestProg(t, "testprog", "GCPhys")
want := "OK\n"
@@ -178,6 +186,19 @@ func TestPhysicalMemoryUtilization(t *testing.T) {
}
}
+func TestScavengedBitsCleared(t *testing.T) {
+ var mismatches [128]BitsMismatch
+ if n, ok := CheckScavengedBitsCleared(mismatches[:]); !ok {
+ t.Errorf("uncleared scavenged bits")
+ for _, m := range mismatches[:n] {
+ t.Logf("\t@ address 0x%x", m.Base)
+ t.Logf("\t| got: %064b", m.Got)
+ t.Logf("\t| want: %064b", m.Want)
+ }
+ t.FailNow()
+ }
+}
+
type acLink struct {
x [1 << 20]byte
}
diff --git a/libgo/go/runtime/map.go b/libgo/go/runtime/map.go
index 3672908..6667fe7 100644
--- a/libgo/go/runtime/map.go
+++ b/libgo/go/runtime/map.go
@@ -1429,5 +1429,5 @@ func reflectlite_maplen(h *hmap) int {
return h.count
}
-const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go
+const maxZero = 1024 // must match value in cmd/compile/internal/gc/walk.go:zeroValSize
var zeroVal [maxZero]byte
diff --git a/libgo/go/runtime/map_benchmark_test.go b/libgo/go/runtime/map_benchmark_test.go
index cf04ead..bae1aa0 100644
--- a/libgo/go/runtime/map_benchmark_test.go
+++ b/libgo/go/runtime/map_benchmark_test.go
@@ -251,7 +251,7 @@ func BenchmarkMapLast(b *testing.B) {
}
func BenchmarkMapCycle(b *testing.B) {
- // Arrange map entries to be a permuation, so that
+ // Arrange map entries to be a permutation, so that
// we hit all entries, and one lookup is data dependent
// on the previous lookup.
const N = 3127
diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go
index b84fe0f..457da13 100644
--- a/libgo/go/runtime/mbitmap.go
+++ b/libgo/go/runtime/mbitmap.go
@@ -243,6 +243,10 @@ func (s *mspan) nextFreeIndex() uintptr {
}
// isFree reports whether the index'th object in s is unallocated.
+//
+// The caller must ensure s.state is mSpanInUse, and there must have
+// been no preemption points since ensuring this (which could allow a
+// GC transition, which would allow the state to change).
func (s *mspan) isFree(index uintptr) bool {
if index < s.freeindex {
return false
@@ -349,6 +353,33 @@ func heapBitsForAddr(addr uintptr) (h heapBits) {
return
}
+// badPointer throws bad pointer in heap panic.
+func badPointer(s *mspan, p, refBase, refOff uintptr) {
+ // Typically this indicates an incorrect use
+ // of unsafe or cgo to store a bad pointer in
+ // the Go heap. It may also indicate a runtime
+ // bug.
+ //
+ // TODO(austin): We could be more aggressive
+ // and detect pointers to unallocated objects
+ // in allocated spans.
+ printlock()
+ print("runtime: pointer ", hex(p))
+ state := s.state.get()
+ if state != mSpanInUse {
+ print(" to unallocated span")
+ } else {
+ print(" to unused region of span")
+ }
+ print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state, "\n")
+ if refBase != 0 {
+ print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
+ gcDumpObject("object", refBase, refOff)
+ }
+ getg().m.traceback = 2
+ throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
+}
+
// findObject returns the base address for the heap object containing
// the address p, the object's span, and the index of the object in s.
// If p does not point into a heap object, it returns base == 0.
@@ -362,42 +393,30 @@ func heapBitsForAddr(addr uintptr) (h heapBits) {
// refBase and refOff optionally give the base address of the object
// in which the pointer p was found and the byte offset at which it
// was found. These are used for error reporting.
+//
+// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
+// Since p is a uintptr, it would not be adjusted if the stack were to move.
+//go:nosplit
func findObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, s *mspan, objIndex uintptr) {
s = spanOf(p)
+ // If s is nil, the virtual address has never been part of the heap.
+ // This pointer may be to some mmap'd region, so we allow it.
+ if s == nil {
+ return
+ }
// If p is a bad pointer, it may not be in s's bounds.
- if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
- if s == nil || s.state == mSpanManual || forStack {
- // If s is nil, the virtual address has never been part of the heap.
- // This pointer may be to some mmap'd region, so we allow it.
- // Pointers into stacks are also ok, the runtime manages these explicitly.
+ //
+ // Check s.state to synchronize with span initialization
+ // before checking other fields. See also spanOfHeap.
+ if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit {
+ // Pointers into stacks are also ok, the runtime manages these explicitly.
+ if state == mSpanManual || forStack {
return
}
-
// The following ensures that we are rigorous about what data
// structures hold valid pointers.
if debug.invalidptr != 0 {
- // Typically this indicates an incorrect use
- // of unsafe or cgo to store a bad pointer in
- // the Go heap. It may also indicate a runtime
- // bug.
- //
- // TODO(austin): We could be more aggressive
- // and detect pointers to unallocated objects
- // in allocated spans.
- printlock()
- print("runtime: pointer ", hex(p))
- if s.state != mSpanInUse {
- print(" to unallocated span")
- } else {
- print(" to unused region of span")
- }
- print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n")
- if refBase != 0 {
- print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
- gcDumpObject("object", refBase, refOff)
- }
- getg().m.traceback = 2
- throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
+ badPointer(s, p, refBase, refOff)
}
return
}
@@ -629,7 +648,7 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
}
}
return
- } else if s.state != mSpanInUse || dst < s.base() || s.limit <= dst {
+ } else if s.state.get() != mSpanInUse || dst < s.base() || s.limit <= dst {
// dst was heap memory at some point, but isn't now.
// It can't be a global. It must be either our stack,
// or in the case of direct channel sends, it could be
@@ -801,29 +820,19 @@ func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
// words to pointer/scan.
// Otherwise, it initializes all words to scalar/dead.
func (h heapBits) initSpan(s *mspan) {
- size, n, total := s.layout()
-
- // Init the markbit structures
- s.freeindex = 0
- s.allocCache = ^uint64(0) // all 1s indicating all free.
- s.nelems = n
- s.allocBits = nil
- s.gcmarkBits = nil
- s.gcmarkBits = newMarkBits(s.nelems)
- s.allocBits = newAllocBits(s.nelems)
-
// Clear bits corresponding to objects.
- nw := total / sys.PtrSize
+ nw := (s.npages << _PageShift) / sys.PtrSize
if nw%wordsPerBitmapByte != 0 {
throw("initSpan: unaligned length")
}
if h.shift != 0 {
throw("initSpan: unaligned base")
}
+ isPtrs := sys.PtrSize == 8 && s.elemsize == sys.PtrSize
for nw > 0 {
hNext, anw := h.forwardOrBoundary(nw)
nbyte := anw / wordsPerBitmapByte
- if sys.PtrSize == 8 && size == sys.PtrSize {
+ if isPtrs {
bitp := h.bitp
for i := uintptr(0); i < nbyte; i++ {
*bitp = bitPointerAll | bitScanAll
diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go
index cd59010..78a3ae6 100644
--- a/libgo/go/runtime/mcentral.go
+++ b/libgo/go/runtime/mcentral.go
@@ -243,7 +243,7 @@ func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
c.nonempty.remove(s)
unlock(&c.lock)
- mheap_.freeSpan(s, false)
+ mheap_.freeSpan(s)
return true
}
@@ -252,7 +252,7 @@ func (c *mcentral) grow() *mspan {
npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
size := uintptr(class_to_size[c.spanclass.sizeclass()])
- s := mheap_.alloc(npages, c.spanclass, false, true)
+ s := mheap_.alloc(npages, c.spanclass, true)
if s == nil {
return nil
}
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index 46b7334..b0040f9 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -139,6 +139,10 @@ const (
_ConcurrentSweep = true
_FinBlockSize = 4 * 1024
+ // debugScanConservative enables debug logging for stack
+ // frames that are scanned conservatively.
+ debugScanConservative = false
+
// sweepMinHeapDistance is a lower bound on the heap distance
// (in bytes) reserved for concurrent sweeping between GC
// cycles.
@@ -231,6 +235,8 @@ func setGCPercent(in int32) (out int32) {
gcSetTriggerRatio(memstats.triggerRatio)
unlock(&mheap_.lock)
})
+ // Pacing changed, so the scavenger should be awoken.
+ wakeScavenger()
// If we just disabled GC, wait for any concurrent GC mark to
// finish so we always return with no GC running.
@@ -490,25 +496,25 @@ func (c *gcControllerState) revise() {
}
live := atomic.Load64(&memstats.heap_live)
- var heapGoal, scanWorkExpected int64
- if live <= memstats.next_gc {
- // We're under the soft goal. Pace GC to complete at
- // next_gc assuming the heap is in steady-state.
- heapGoal = int64(memstats.next_gc)
+ // Assume we're under the soft goal. Pace GC to complete at
+ // next_gc assuming the heap is in steady-state.
+ heapGoal := int64(memstats.next_gc)
- // Compute the expected scan work remaining.
- //
- // This is estimated based on the expected
- // steady-state scannable heap. For example, with
- // GOGC=100, only half of the scannable heap is
- // expected to be live, so that's what we target.
- //
- // (This is a float calculation to avoid overflowing on
- // 100*heap_scan.)
- scanWorkExpected = int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
- } else {
- // We're past the soft goal. Pace GC so that in the
- // worst case it will complete by the hard goal.
+ // Compute the expected scan work remaining.
+ //
+ // This is estimated based on the expected
+ // steady-state scannable heap. For example, with
+ // GOGC=100, only half of the scannable heap is
+ // expected to be live, so that's what we target.
+ //
+ // (This is a float calculation to avoid overflowing on
+ // 100*heap_scan.)
+ scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
+
+ if live > memstats.next_gc || c.scanWork > scanWorkExpected {
+ // We're past the soft goal, or we've already done more scan
+ // work than we expected. Pace GC so that in the worst case it
+ // will complete by the hard goal.
const maxOvershoot = 1.1
heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
@@ -520,7 +526,7 @@ func (c *gcControllerState) revise() {
//
// Note that we currently count allocations during GC as both
// scannable heap (heap_scan) and scan work completed
- // (scanWork), so allocation will change this difference will
+ // (scanWork), so allocation will change this difference
// slowly in the soft regime and not at all in the hard
// regime.
scanWorkRemaining := scanWorkExpected - c.scanWork
@@ -765,11 +771,25 @@ func gcSetTriggerRatio(triggerRatio float64) {
goal = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100
}
+ // If we let triggerRatio go too low, then if the application
+ // is allocating very rapidly we might end up in a situation
+ // where we're allocating black during a nearly always-on GC.
+ // The result of this is a growing heap and ultimately an
+ // increase in RSS. By capping us at a point >0, we're essentially
+ // saying that we're OK using more CPU during the GC to prevent
+ // this growth in RSS.
+ //
+ // The current constant was chosen empirically: given a sufficiently
+ // fast/scalable allocator with 48 Ps that could drive the trigger ratio
+ // to <0.05, this constant causes applications to retain the same peak
+ // RSS compared to not having this allocator.
+ const minTriggerRatio = 0.6
+
// Set the trigger ratio, capped to reasonable bounds.
- if triggerRatio < 0 {
+ if triggerRatio < minTriggerRatio {
// This can happen if the mutator is allocating very
// quickly or the GC is scanning very slowly.
- triggerRatio = 0
+ triggerRatio = minTriggerRatio
} else if gcpercent >= 0 {
// Ensure there's always a little margin so that the
// mutator assist ratio isn't infinity.
@@ -847,7 +867,8 @@ func gcSetTriggerRatio(triggerRatio float64) {
heapDistance = _PageSize
}
pagesSwept := atomic.Load64(&mheap_.pagesSwept)
- sweepDistancePages := int64(mheap_.pagesInUse) - int64(pagesSwept)
+ pagesInUse := atomic.Load64(&mheap_.pagesInUse)
+ sweepDistancePages := int64(pagesInUse) - int64(pagesSwept)
if sweepDistancePages <= 0 {
mheap_.sweepPagesPerByte = 0
} else {
@@ -1250,6 +1271,7 @@ func gcStart(trigger gcTrigger) {
}
// Ok, we're doing it! Stop everybody else
+ semacquire(&gcsema)
semacquire(&worldsema)
if trace.enabled {
@@ -1348,6 +1370,13 @@ func gcStart(trigger gcTrigger) {
work.pauseNS += now - work.pauseStart
work.tMark = now
})
+
+ // Release the world sema before Gosched() in STW mode
+ // because we will need to reacquire it later but before
+ // this goroutine becomes runnable again, and we could
+ // self-deadlock otherwise.
+ semrelease(&worldsema)
+
// In STW mode, we could block the instant systemstack
// returns, so don't do anything important here. Make sure we
// block rather than returning to user code.
@@ -1417,6 +1446,10 @@ top:
return
}
+ // forEachP needs worldsema to execute, and we'll need it to
+ // stop the world later, so acquire worldsema now.
+ semacquire(&worldsema)
+
// Flush all local buffers and collect flushedWork flags.
gcMarkDoneFlushed = 0
systemstack(func() {
@@ -1477,6 +1510,7 @@ top:
// work to do. Keep going. It's possible the
// transition condition became true again during the
// ragged barrier, so re-check it.
+ semrelease(&worldsema)
goto top
}
@@ -1553,6 +1587,7 @@ top:
now := startTheWorldWithSema(true)
work.pauseNS += now - work.pauseStart
})
+ semrelease(&worldsema)
goto top
}
}
@@ -1651,9 +1686,16 @@ func gcMarkTermination(nextTriggerRatio float64) {
throw("gc done but gcphase != _GCoff")
}
+ // Record next_gc and heap_inuse for scavenger.
+ memstats.last_next_gc = memstats.next_gc
+ memstats.last_heap_inuse = memstats.heap_inuse
+
// Update GC trigger and pacing for the next cycle.
gcSetTriggerRatio(nextTriggerRatio)
+ // Pacing changed, so the scavenger should be awoken.
+ wakeScavenger()
+
// Update timing memstats
now := nanotime()
sec, nsec, _ := time_now()
@@ -1760,6 +1802,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
}
semrelease(&worldsema)
+ semrelease(&gcsema)
// Careful: another GC cycle may start now.
releasem(mp)
@@ -2152,8 +2195,7 @@ func gcResetMarkState() {
// allgs doesn't change.
lock(&allglock)
for _, gp := range allgs {
- gp.gcscandone = false // set to true in gcphasework
- gp.gcscanvalid = false // stack has not been scanned
+ gp.gcscandone = false // set to true in gcphasework
gp.gcAssistBytes = 0
}
unlock(&allglock)
diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go
index d7ae260..21539eb 100644
--- a/libgo/go/runtime/mgc_gccgo.go
+++ b/libgo/go/runtime/mgc_gccgo.go
@@ -145,40 +145,15 @@ func registerGCRoots(r *gcRootList) {
// and carries on.
func checkPreempt() {
gp := getg()
- if !gp.preempt || gp != gp.m.curg || gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" || gp.m.incgo {
+ if !gp.preempt || gp != gp.m.curg || !canPreemptM(gp.m) {
return
}
- // Synchronize with scang.
- gp.scanningself = true
- casgstatus(gp, _Grunning, _Gwaiting)
- if gp.preemptscan {
- for !castogscanstatus(gp, _Gwaiting, _Gscanwaiting) {
- // Likely to be racing with the GC as
- // it sees a _Gwaiting and does the
- // stack scan. If so, gcworkdone will
- // be set and gcphasework will simply
- // return.
- }
- if !gp.gcscandone {
- mp := acquirem()
- gcw := &gp.m.p.ptr().gcw
- scanstack(gp, gcw)
- releasem(mp)
- gp.gcscandone = true
- }
- gp.preemptscan = false
- gp.preempt = false
- casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting)
- // This clears gcscanvalid.
- casgstatus(gp, _Gwaiting, _Grunning)
- gp.scanningself = false
- return
+ if gp.preemptStop {
+ mcall(preemptPark)
}
// Act like goroutine called runtime.Gosched.
- casgstatus(gp, _Gwaiting, _Grunning)
- gp.scanningself = false
mcall(gopreempt_m)
}
diff --git a/libgo/go/runtime/mgclarge.go b/libgo/go/runtime/mgclarge.go
deleted file mode 100644
index 414db10..0000000
--- a/libgo/go/runtime/mgclarge.go
+++ /dev/null
@@ -1,657 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Page heap.
-//
-// See malloc.go for the general overview.
-//
-// Allocation policy is the subject of this file. All free spans live in
-// a treap for most of their time being free. See
-// https://en.wikipedia.org/wiki/Treap or
-// https://faculty.washington.edu/aragon/pubs/rst89.pdf for an overview.
-// sema.go also holds an implementation of a treap.
-//
-// Each treapNode holds a single span. The treap is sorted by base address
-// and each span necessarily has a unique base address.
-// Spans are returned based on a first-fit algorithm, acquiring the span
-// with the lowest base address which still satisfies the request.
-//
-// The first-fit algorithm is possible due to an augmentation of each
-// treapNode to maintain the size of the largest span in the subtree rooted
-// at that treapNode. Below we refer to this invariant as the maxPages
-// invariant.
-//
-// The primary routines are
-// insert: adds a span to the treap
-// remove: removes the span from that treap that best fits the required size
-// removeSpan: which removes a specific span from the treap
-//
-// Whenever a pointer to a span which is owned by the treap is acquired, that
-// span must not be mutated. To mutate a span in the treap, remove it first.
-//
-// mheap_.lock must be held when manipulating this data structure.
-
-package runtime
-
-import (
- "unsafe"
-)
-
-//go:notinheap
-type mTreap struct {
- treap *treapNode
- unscavHugePages uintptr // number of unscavenged huge pages in the treap
-}
-
-//go:notinheap
-type treapNode struct {
- right *treapNode // all treapNodes > this treap node
- left *treapNode // all treapNodes < this treap node
- parent *treapNode // direct parent of this node, nil if root
- key uintptr // base address of the span, used as primary sort key
- span *mspan // span at base address key
- maxPages uintptr // the maximum size of any span in this subtree, including the root
- priority uint32 // random number used by treap algorithm to keep tree probabilistically balanced
- types treapIterFilter // the types of spans available in this subtree
-}
-
-// updateInvariants is a helper method which has a node recompute its own
-// maxPages and types values by looking at its own span as well as the
-// values of its direct children.
-//
-// Returns true if anything changed.
-func (t *treapNode) updateInvariants() bool {
- m, i := t.maxPages, t.types
- t.maxPages = t.span.npages
- t.types = t.span.treapFilter()
- if t.left != nil {
- t.types |= t.left.types
- if t.maxPages < t.left.maxPages {
- t.maxPages = t.left.maxPages
- }
- }
- if t.right != nil {
- t.types |= t.right.types
- if t.maxPages < t.right.maxPages {
- t.maxPages = t.right.maxPages
- }
- }
- return m != t.maxPages || i != t.types
-}
-
-// findMinimal finds the minimal (lowest base addressed) node in the treap
-// which matches the criteria set out by the filter f and returns nil if
-// none exists.
-//
-// This algorithm is functionally the same as (*mTreap).find, so see that
-// method for more details.
-func (t *treapNode) findMinimal(f treapIterFilter) *treapNode {
- if t == nil || !f.matches(t.types) {
- return nil
- }
- for t != nil {
- if t.left != nil && f.matches(t.left.types) {
- t = t.left
- } else if f.matches(t.span.treapFilter()) {
- break
- } else if t.right != nil && f.matches(t.right.types) {
- t = t.right
- } else {
- println("runtime: f=", f)
- throw("failed to find minimal node matching filter")
- }
- }
- return t
-}
-
-// findMaximal finds the maximal (highest base addressed) node in the treap
-// which matches the criteria set out by the filter f and returns nil if
-// none exists.
-//
-// This algorithm is the logical inversion of findMinimal and just changes
-// the order of the left and right tests.
-func (t *treapNode) findMaximal(f treapIterFilter) *treapNode {
- if t == nil || !f.matches(t.types) {
- return nil
- }
- for t != nil {
- if t.right != nil && f.matches(t.right.types) {
- t = t.right
- } else if f.matches(t.span.treapFilter()) {
- break
- } else if t.left != nil && f.matches(t.left.types) {
- t = t.left
- } else {
- println("runtime: f=", f)
- throw("failed to find minimal node matching filter")
- }
- }
- return t
-}
-
-// pred returns the predecessor of t in the treap subject to the criteria
-// specified by the filter f. Returns nil if no such predecessor exists.
-func (t *treapNode) pred(f treapIterFilter) *treapNode {
- if t.left != nil && f.matches(t.left.types) {
- // The node has a left subtree which contains at least one matching
- // node, find the maximal matching node in that subtree.
- return t.left.findMaximal(f)
- }
- // Lacking a left subtree, look to the parents.
- p := t // previous node
- t = t.parent
- for t != nil {
- // Walk up the tree until we find a node that has a left subtree
- // that we haven't already visited.
- if t.right == p {
- if f.matches(t.span.treapFilter()) {
- // If this node matches, then it's guaranteed to be the
- // predecessor since everything to its left is strictly
- // greater.
- return t
- } else if t.left != nil && f.matches(t.left.types) {
- // Failing the root of this subtree, if its left subtree has
- // something, that's where we'll find our predecessor.
- return t.left.findMaximal(f)
- }
- }
- p = t
- t = t.parent
- }
- // If the parent is nil, then we've hit the root without finding
- // a suitable left subtree containing the node (and the predecessor
- // wasn't on the path). Thus, there's no predecessor, so just return
- // nil.
- return nil
-}
-
-// succ returns the successor of t in the treap subject to the criteria
-// specified by the filter f. Returns nil if no such successor exists.
-func (t *treapNode) succ(f treapIterFilter) *treapNode {
- // See pred. This method is just the logical inversion of it.
- if t.right != nil && f.matches(t.right.types) {
- return t.right.findMinimal(f)
- }
- p := t
- t = t.parent
- for t != nil {
- if t.left == p {
- if f.matches(t.span.treapFilter()) {
- return t
- } else if t.right != nil && f.matches(t.right.types) {
- return t.right.findMinimal(f)
- }
- }
- p = t
- t = t.parent
- }
- return nil
-}
-
-// isSpanInTreap is handy for debugging. One should hold the heap lock, usually
-// mheap_.lock().
-func (t *treapNode) isSpanInTreap(s *mspan) bool {
- if t == nil {
- return false
- }
- return t.span == s || t.left.isSpanInTreap(s) || t.right.isSpanInTreap(s)
-}
-
-// walkTreap is handy for debugging and testing.
-// Starting at some treapnode t, for example the root, do a depth first preorder walk of
-// the tree executing fn at each treap node. One should hold the heap lock, usually
-// mheap_.lock().
-func (t *treapNode) walkTreap(fn func(tn *treapNode)) {
- if t == nil {
- return
- }
- fn(t)
- t.left.walkTreap(fn)
- t.right.walkTreap(fn)
-}
-
-// checkTreapNode when used in conjunction with walkTreap can usually detect a
-// poorly formed treap.
-func checkTreapNode(t *treapNode) {
- if t == nil {
- return
- }
- if t.span.next != nil || t.span.prev != nil || t.span.list != nil {
- throw("span may be on an mSpanList while simultaneously in the treap")
- }
- if t.span.base() != t.key {
- println("runtime: checkTreapNode treapNode t=", t, " t.key=", t.key,
- "t.span.base()=", t.span.base())
- throw("why does span.base() and treap.key do not match?")
- }
- if t.left != nil && t.key < t.left.key {
- throw("found out-of-order spans in treap (left child has greater base address)")
- }
- if t.right != nil && t.key > t.right.key {
- throw("found out-of-order spans in treap (right child has lesser base address)")
- }
-}
-
-// validateInvariants is handy for debugging and testing.
-// It ensures that the various invariants on each treap node are
-// appropriately maintained throughout the treap by walking the
-// treap in a post-order manner.
-func (t *treapNode) validateInvariants() (uintptr, treapIterFilter) {
- if t == nil {
- return 0, 0
- }
- leftMax, leftTypes := t.left.validateInvariants()
- rightMax, rightTypes := t.right.validateInvariants()
- max := t.span.npages
- if leftMax > max {
- max = leftMax
- }
- if rightMax > max {
- max = rightMax
- }
- if max != t.maxPages {
- println("runtime: t.maxPages=", t.maxPages, "want=", max)
- throw("maxPages invariant violated in treap")
- }
- typ := t.span.treapFilter() | leftTypes | rightTypes
- if typ != t.types {
- println("runtime: t.types=", t.types, "want=", typ)
- throw("types invariant violated in treap")
- }
- return max, typ
-}
-
-// treapIterType represents the type of iteration to perform
-// over the treap. Each different flag is represented by a bit
-// in the type, and types may be combined together by a bitwise
-// or operation.
-//
-// Note that only 5 bits are available for treapIterType, do not
-// use the 3 higher-order bits. This constraint is to allow for
-// expansion into a treapIterFilter, which is a uint32.
-type treapIterType uint8
-
-const (
- treapIterScav treapIterType = 1 << iota // scavenged spans
- treapIterHuge // spans containing at least one huge page
- treapIterBits = iota
-)
-
-// treapIterFilter is a bitwise filter of different spans by binary
-// properties. Each bit of a treapIterFilter represents a unique
-// combination of bits set in a treapIterType, in other words, it
-// represents the power set of a treapIterType.
-//
-// The purpose of this representation is to allow the existence of
-// a specific span type to bubble up in the treap (see the types
-// field on treapNode).
-//
-// More specifically, any treapIterType may be transformed into a
-// treapIterFilter for a specific combination of flags via the
-// following operation: 1 << (0x1f&treapIterType).
-type treapIterFilter uint32
-
-// treapFilterAll represents the filter which allows all spans.
-const treapFilterAll = ^treapIterFilter(0)
-
-// treapFilter creates a new treapIterFilter from two treapIterTypes.
-// mask represents a bitmask for which flags we should check against
-// and match for the expected result after applying the mask.
-func treapFilter(mask, match treapIterType) treapIterFilter {
- allow := treapIterFilter(0)
- for i := treapIterType(0); i < 1<<treapIterBits; i++ {
- if mask&i == match {
- allow |= 1 << i
- }
- }
- return allow
-}
-
-// matches returns true if m and f intersect.
-func (f treapIterFilter) matches(m treapIterFilter) bool {
- return f&m != 0
-}
-
-// treapFilter returns the treapIterFilter exactly matching this span,
-// i.e. popcount(result) == 1.
-func (s *mspan) treapFilter() treapIterFilter {
- have := treapIterType(0)
- if s.scavenged {
- have |= treapIterScav
- }
- if s.hugePages() > 0 {
- have |= treapIterHuge
- }
- return treapIterFilter(uint32(1) << (0x1f & have))
-}
-
-// treapIter is a bidirectional iterator type which may be used to iterate over a
-// an mTreap in-order forwards (increasing order) or backwards (decreasing order).
-// Its purpose is to hide details about the treap from users when trying to iterate
-// over it.
-//
-// To create iterators over the treap, call start or end on an mTreap.
-type treapIter struct {
- f treapIterFilter
- t *treapNode
-}
-
-// span returns the span at the current position in the treap.
-// If the treap is not valid, span will panic.
-func (i *treapIter) span() *mspan {
- return i.t.span
-}
-
-// valid returns whether the iterator represents a valid position
-// in the mTreap.
-func (i *treapIter) valid() bool {
- return i.t != nil
-}
-
-// next moves the iterator forward by one. Once the iterator
-// ceases to be valid, calling next will panic.
-func (i treapIter) next() treapIter {
- i.t = i.t.succ(i.f)
- return i
-}
-
-// prev moves the iterator backwards by one. Once the iterator
-// ceases to be valid, calling prev will panic.
-func (i treapIter) prev() treapIter {
- i.t = i.t.pred(i.f)
- return i
-}
-
-// start returns an iterator which points to the start of the treap (the
-// left-most node in the treap) subject to mask and match constraints.
-func (root *mTreap) start(mask, match treapIterType) treapIter {
- f := treapFilter(mask, match)
- return treapIter{f, root.treap.findMinimal(f)}
-}
-
-// end returns an iterator which points to the end of the treap (the
-// right-most node in the treap) subject to mask and match constraints.
-func (root *mTreap) end(mask, match treapIterType) treapIter {
- f := treapFilter(mask, match)
- return treapIter{f, root.treap.findMaximal(f)}
-}
-
-// mutate allows one to mutate the span without removing it from the treap via a
-// callback. The span's base and size are allowed to change as long as the span
-// remains in the same order relative to its predecessor and successor.
-//
-// Note however that any operation that causes a treap rebalancing inside of fn
-// is strictly forbidden, as that may cause treap node metadata to go
-// out-of-sync.
-func (root *mTreap) mutate(i treapIter, fn func(span *mspan)) {
- s := i.span()
- // Save some state about the span for later inspection.
- hpages := s.hugePages()
- scavenged := s.scavenged
- // Call the mutator.
- fn(s)
- // Update unscavHugePages appropriately.
- if !scavenged {
- mheap_.free.unscavHugePages -= hpages
- }
- if !s.scavenged {
- mheap_.free.unscavHugePages += s.hugePages()
- }
- // Update the key in case the base changed.
- i.t.key = s.base()
- // Updating invariants up the tree needs to happen if
- // anything changed at all, so just go ahead and do it
- // unconditionally.
- //
- // If it turns out nothing changed, it'll exit quickly.
- t := i.t
- for t != nil && t.updateInvariants() {
- t = t.parent
- }
-}
-
-// insert adds span to the large span treap.
-func (root *mTreap) insert(span *mspan) {
- if !span.scavenged {
- root.unscavHugePages += span.hugePages()
- }
- base := span.base()
- var last *treapNode
- pt := &root.treap
- for t := *pt; t != nil; t = *pt {
- last = t
- if t.key < base {
- pt = &t.right
- } else if t.key > base {
- pt = &t.left
- } else {
- throw("inserting span already in treap")
- }
- }
-
- // Add t as new leaf in tree of span size and unique addrs.
- // The balanced tree is a treap using priority as the random heap priority.
- // That is, it is a binary tree ordered according to the key,
- // but then among the space of possible binary trees respecting those
- // keys, it is kept balanced on average by maintaining a heap ordering
- // on the priority: s.priority <= both s.right.priority and s.right.priority.
- // https://en.wikipedia.org/wiki/Treap
- // https://faculty.washington.edu/aragon/pubs/rst89.pdf
-
- t := (*treapNode)(mheap_.treapalloc.alloc())
- t.key = span.base()
- t.priority = fastrand()
- t.span = span
- t.maxPages = span.npages
- t.types = span.treapFilter()
- t.parent = last
- *pt = t // t now at a leaf.
-
- // Update the tree to maintain the various invariants.
- i := t
- for i.parent != nil && i.parent.updateInvariants() {
- i = i.parent
- }
-
- // Rotate up into tree according to priority.
- for t.parent != nil && t.parent.priority > t.priority {
- if t != nil && t.span.base() != t.key {
- println("runtime: insert t=", t, "t.key=", t.key)
- println("runtime: t.span=", t.span, "t.span.base()=", t.span.base())
- throw("span and treap node base addresses do not match")
- }
- if t.parent.left == t {
- root.rotateRight(t.parent)
- } else {
- if t.parent.right != t {
- throw("treap insert finds a broken treap")
- }
- root.rotateLeft(t.parent)
- }
- }
-}
-
-func (root *mTreap) removeNode(t *treapNode) {
- if !t.span.scavenged {
- root.unscavHugePages -= t.span.hugePages()
- }
- if t.span.base() != t.key {
- throw("span and treap node base addresses do not match")
- }
- // Rotate t down to be leaf of tree for removal, respecting priorities.
- for t.right != nil || t.left != nil {
- if t.right == nil || t.left != nil && t.left.priority < t.right.priority {
- root.rotateRight(t)
- } else {
- root.rotateLeft(t)
- }
- }
- // Remove t, now a leaf.
- if t.parent != nil {
- p := t.parent
- if p.left == t {
- p.left = nil
- } else {
- p.right = nil
- }
- // Walk up the tree updating invariants until no updates occur.
- for p != nil && p.updateInvariants() {
- p = p.parent
- }
- } else {
- root.treap = nil
- }
- // Return the found treapNode's span after freeing the treapNode.
- mheap_.treapalloc.free(unsafe.Pointer(t))
-}
-
-// find searches for, finds, and returns the treap iterator over all spans
-// representing the position of the span with the smallest base address which is
-// at least npages in size. If no span has at least npages it returns an invalid
-// iterator.
-//
-// This algorithm is as follows:
-// * If there's a left child and its subtree can satisfy this allocation,
-// continue down that subtree.
-// * If there's no such left child, check if the root of this subtree can
-// satisfy the allocation. If so, we're done.
-// * If the root cannot satisfy the allocation either, continue down the
-// right subtree if able.
-// * Else, break and report that we cannot satisfy the allocation.
-//
-// The preference for left, then current, then right, results in us getting
-// the left-most node which will contain the span with the lowest base
-// address.
-//
-// Note that if a request cannot be satisfied the fourth case will be
-// reached immediately at the root, since neither the left subtree nor
-// the right subtree will have a sufficient maxPages, whilst the root
-// node is also unable to satisfy it.
-func (root *mTreap) find(npages uintptr) treapIter {
- t := root.treap
- for t != nil {
- if t.span == nil {
- throw("treap node with nil span found")
- }
- // Iterate over the treap trying to go as far left
- // as possible while simultaneously ensuring that the
- // subtrees we choose always have a span which can
- // satisfy the allocation.
- if t.left != nil && t.left.maxPages >= npages {
- t = t.left
- } else if t.span.npages >= npages {
- // Before going right, if this span can satisfy the
- // request, stop here.
- break
- } else if t.right != nil && t.right.maxPages >= npages {
- t = t.right
- } else {
- t = nil
- }
- }
- return treapIter{treapFilterAll, t}
-}
-
-// removeSpan searches for, finds, deletes span along with
-// the associated treap node. If the span is not in the treap
-// then t will eventually be set to nil and the t.span
-// will throw.
-func (root *mTreap) removeSpan(span *mspan) {
- base := span.base()
- t := root.treap
- for t.span != span {
- if t.key < base {
- t = t.right
- } else if t.key > base {
- t = t.left
- }
- }
- root.removeNode(t)
-}
-
-// erase removes the element referred to by the current position of the
-// iterator. This operation consumes the given iterator, so it should no
-// longer be used. It is up to the caller to get the next or previous
-// iterator before calling erase, if need be.
-func (root *mTreap) erase(i treapIter) {
- root.removeNode(i.t)
-}
-
-// rotateLeft rotates the tree rooted at node x.
-// turning (x a (y b c)) into (y (x a b) c).
-func (root *mTreap) rotateLeft(x *treapNode) {
- // p -> (x a (y b c))
- p := x.parent
- a, y := x.left, x.right
- b, c := y.left, y.right
-
- y.left = x
- x.parent = y
- y.right = c
- if c != nil {
- c.parent = y
- }
- x.left = a
- if a != nil {
- a.parent = x
- }
- x.right = b
- if b != nil {
- b.parent = x
- }
-
- y.parent = p
- if p == nil {
- root.treap = y
- } else if p.left == x {
- p.left = y
- } else {
- if p.right != x {
- throw("large span treap rotateLeft")
- }
- p.right = y
- }
-
- x.updateInvariants()
- y.updateInvariants()
-}
-
-// rotateRight rotates the tree rooted at node y.
-// turning (y (x a b) c) into (x a (y b c)).
-func (root *mTreap) rotateRight(y *treapNode) {
- // p -> (y (x a b) c)
- p := y.parent
- x, c := y.left, y.right
- a, b := x.left, x.right
-
- x.left = a
- if a != nil {
- a.parent = x
- }
- x.right = y
- y.parent = x
- y.left = b
- if b != nil {
- b.parent = y
- }
- y.right = c
- if c != nil {
- c.parent = y
- }
-
- x.parent = p
- if p == nil {
- root.treap = x
- } else if p.left == y {
- p.left = x
- } else {
- if p.right != y {
- throw("large span treap rotateRight")
- }
- p.right = x
- }
-
- y.updateInvariants()
- x.updateInvariants()
-}
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index b6b69dd..a5af5d2 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -46,8 +46,6 @@ const (
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
// some miscellany) and initializes scanning-related state.
//
-// The caller must have call gcCopySpans().
-//
// The world must be stopped.
//
//go:nowritebarrier
@@ -111,8 +109,7 @@ func gcMarkRootCheck() {
fail:
println("gp", gp, "goid", gp.goid,
"status", readgstatus(gp),
- "gcscandone", gp.gcscandone,
- "gcscanvalid", gp.gcscanvalid)
+ "gcscandone", gp.gcscandone)
unlock(&allglock) // Avoid self-deadlock with traceback.
throw("scan missed a g")
}
@@ -182,7 +179,7 @@ func markroot(gcw *gcWork, i uint32) {
gp.waitsince = work.tstart
}
- // scang must be done on the system stack in case
+ // scanstack must be done on the system stack in case
// we're trying to scan our own stack.
systemstack(func() {
// If this is a self-scan, put the user G in
@@ -196,14 +193,24 @@ func markroot(gcw *gcWork, i uint32) {
userG.waitreason = waitReasonGarbageCollectionScan
}
- // TODO: scang blocks until gp's stack has
- // been scanned, which may take a while for
+ // TODO: suspendG blocks (and spins) until gp
+ // stops, which may take a while for
// running goroutines. Consider doing this in
// two phases where the first is non-blocking:
// we scan the stacks we can and ask running
// goroutines to scan themselves; and the
// second blocks.
- scang(gp, gcw)
+ stopped := suspendG(gp)
+ if stopped.dead {
+ gp.gcscandone = true
+ return
+ }
+ if gp.gcscandone {
+ throw("g already scanned")
+ }
+ scanstack(gp, gcw)
+ gp.gcscandone = true
+ resumeG(stopped)
if selfScan {
casgstatus(userG, _Gwaiting, _Grunning)
@@ -242,13 +249,21 @@ func markrootSpans(gcw *gcWork, shard int) {
sg := mheap_.sweepgen
spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
// Note that work.spans may not include spans that were
- // allocated between entering the scan phase and now. This is
- // okay because any objects with finalizers in those spans
- // must have been allocated and given finalizers after we
- // entered the scan phase, so addfinalizer will have ensured
- // the above invariants for them.
- for _, s := range spans {
- if s.state != mSpanInUse {
+ // allocated between entering the scan phase and now. We may
+ // also race with spans being added into sweepSpans when they're
+ // just created, and as a result we may see nil pointers in the
+ // spans slice. This is okay because any objects with finalizers
+ // in those spans must have been allocated and given finalizers
+ // after we entered the scan phase, so addfinalizer will have
+ // ensured the above invariants for them.
+ for i := 0; i < len(spans); i++ {
+ // sweepBuf.block requires that we read pointers from the block atomically.
+ // It also requires that we ignore nil pointers.
+ s := (*mspan)(atomic.Loadp(unsafe.Pointer(&spans[i])))
+
+ // This is racing with spans being initialized, so
+ // check the state carefully.
+ if s == nil || s.state.get() != mSpanInUse {
continue
}
// Check that this span was swept (it may be cached or uncached).
@@ -600,16 +615,16 @@ func doscanstackswitch(*g, *g)
// scanstack scans gp's stack, greying all pointers found on the stack.
//
+// scanstack will also shrink the stack if it is safe to do so. If it
+// is not, it schedules a stack shrink for the next synchronous safe
+// point.
+//
// scanstack is marked go:systemstack because it must not be preempted
// while using a workbuf.
//
//go:nowritebarrier
//go:systemstack
func scanstack(gp *g, gcw *gcWork) {
- if gp.gcscanvalid {
- return
- }
-
if readgstatus(gp)&_Gscan == 0 {
print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
throw("scanstack - bad status")
@@ -622,17 +637,9 @@ func scanstack(gp *g, gcw *gcWork) {
case _Gdead:
return
case _Grunning:
- // ok for gccgo, though not for gc.
- if usestackmaps {
- print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- throw("scanstack: goroutine not stopped")
- }
- case _Gsyscall:
- if usestackmaps {
- print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- throw("scanstack: goroutine in syscall")
- }
- case _Grunnable, _Gwaiting:
+ print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+ throw("scanstack: goroutine not stopped")
+ case _Grunnable, _Gsyscall, _Gwaiting:
// ok
}
@@ -644,6 +651,8 @@ func scanstack(gp *g, gcw *gcWork) {
doscanstack(gp, gcw)
} else if gp.entry != nil {
// This is a newly created g that hasn't run. No stack to scan.
+ } else if readgstatus(gp)&^_Gscan == _Gsyscall {
+ scanSyscallStack(gp, gcw)
} else {
// Scanning another g's stack. We need to switch to that g
// to unwind its stack. And switch back after scan.
@@ -661,8 +670,6 @@ func scanstack(gp *g, gcw *gcWork) {
// This is necessary as it uses stack objects (a.k.a. stack tracing).
// We don't (yet) do stack objects, and regular stack/heap scan
// will take care of defer records just fine.
-
- gp.gcscanvalid = true
}
// scanstackswitch scans gp's stack by switching (gogo) to gp and
@@ -700,6 +707,38 @@ func scanstackswitch(gp *g, gcw *gcWork) {
releasem(mp)
}
+// scanSyscallStack scans the stack of a goroutine blocked in a
+// syscall by waking it up and asking it to scan its own stack.
+func scanSyscallStack(gp *g, gcw *gcWork) {
+ if gp.scanningself {
+ // We've suspended the goroutine by setting the _Gscan bit,
+ // so this shouldn't be possible.
+ throw("scanSyscallStack: scanningself")
+ }
+ if gp.gcscandone {
+ // We've suspended the goroutine by setting the _Gscan bit,
+ // so this shouldn't be possible.
+
+ throw("scanSyscallStack: gcscandone")
+ }
+
+ gp.gcScannedSyscallStack = false
+ for {
+ mp := gp.m
+ noteclear(&mp.scannote)
+ gp.scangcw = uintptr(unsafe.Pointer(gcw))
+ tgkill(getpid(), _pid_t(mp.procid), _SIGURG)
+ // Wait for gp to scan its own stack.
+ notesleep(&mp.scannote)
+ if gp.gcScannedSyscallStack {
+ return
+ }
+
+ // The signal was delivered at a bad time. Try again.
+ osyield()
+ }
+}
+
type gcDrainFlags int
const (
@@ -1087,10 +1126,10 @@ func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
if obj != 0 {
o, span, objIndex := findObject(obj, b, i, false)
if obj < minPhysPageSize ||
- span != nil && span.state != mSpanManual &&
- (obj < span.base() || obj >= span.limit || span.state != mSpanInUse) {
+ span != nil && span.state.get() != mSpanManual &&
+ (obj < span.base() || obj >= span.limit || span.state.get() != mSpanInUse) {
print("runtime: found in object at *(", hex(b), "+", hex(i), ") = ", hex(obj), ", pc=", hex(pc), "\n")
- name, file, line, _ := funcfileline(pc, -1)
+ name, file, line, _ := funcfileline(pc, -1, false)
print(name, "\n", file, ":", line, "\n")
//gcDumpObject("object", b, i)
throw("found bad pointer in Go stack (incorrect use of unsafe or cgo?)")
@@ -1218,15 +1257,15 @@ func gcDumpObject(label string, obj, off uintptr) {
return
}
print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
- if 0 <= s.state && int(s.state) < len(mSpanStateNames) {
- print(mSpanStateNames[s.state], "\n")
+ if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) {
+ print(mSpanStateNames[state], "\n")
} else {
- print("unknown(", s.state, ")\n")
+ print("unknown(", state, ")\n")
}
skipped := false
size := s.elemsize
- if s.state == mSpanManual && size == 0 {
+ if s.state.get() == mSpanManual && size == 0 {
// We're printing something from a stack frame. We
// don't know how big it is, so just show up to an
// including off.
@@ -1314,7 +1353,7 @@ var useCheckmark = false
func initCheckmarks() {
useCheckmark = true
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
heapBitsForAddr(s.base()).initCheckmarkSpan(s.layout())
}
}
@@ -1323,7 +1362,7 @@ func initCheckmarks() {
func clearCheckmarks() {
useCheckmark = false
for _, s := range mheap_.allspans {
- if s.state == mSpanInUse {
+ if s.state.get() == mSpanInUse {
heapBitsForAddr(s.base()).clearCheckmarkSpan(s.layout())
}
}
diff --git a/libgo/go/runtime/mgcscavenge.go b/libgo/go/runtime/mgcscavenge.go
index 9f8c472..f3856db 100644
--- a/libgo/go/runtime/mgcscavenge.go
+++ b/libgo/go/runtime/mgcscavenge.go
@@ -17,7 +17,29 @@
// scavenger's primary goal is to bring the estimated heap RSS of the
// application down to a goal.
//
-// That goal is defined as (retainExtraPercent+100) / 100 * next_gc.
+// That goal is defined as:
+// (retainExtraPercent+100) / 100 * (next_gc / last_next_gc) * last_heap_inuse
+//
+// Essentially, we wish to have the application's RSS track the heap goal, but
+// the heap goal is defined in terms of bytes of objects, rather than pages like
+// RSS. As a result, we need to take into account for fragmentation internal to
+// spans. next_gc / last_next_gc defines the ratio between the current heap goal
+// and the last heap goal, which tells us by how much the heap is growing and
+// shrinking. We estimate what the heap will grow to in terms of pages by taking
+// this ratio and multiplying it by heap_inuse at the end of the last GC, which
+// allows us to account for this additional fragmentation. Note that this
+// procedure makes the assumption that the degree of fragmentation won't change
+// dramatically over the next GC cycle. Overestimating the amount of
+// fragmentation simply results in higher memory use, which will be accounted
+// for by the next pacing up date. Underestimating the fragmentation however
+// could lead to performance degradation. Handling this case is not within the
+// scope of the scavenger. Situations where the amount of fragmentation balloons
+// over the course of a single GC cycle should be considered pathologies,
+// flagged as bugs, and fixed appropriately.
+//
+// An additional factor of retainExtraPercent is added as a buffer to help ensure
+// that there's more unscavenged memory to allocate out of, since each allocation
+// out of scavenged memory incurs a potentially expensive page fault.
//
// The goal is updated after each GC and the scavenger's pacing parameters
// (which live in mheap_) are updated to match. The pacing parameters work much
@@ -33,25 +55,18 @@
package runtime
+import (
+ "runtime/internal/atomic"
+ "runtime/internal/sys"
+ "unsafe"
+)
+
const (
// The background scavenger is paced according to these parameters.
//
// scavengePercent represents the portion of mutator time we're willing
// to spend on scavenging in percent.
- //
- // scavengePageLatency is a worst-case estimate (order-of-magnitude) of
- // the time it takes to scavenge one (regular-sized) page of memory.
- // scavengeHugePageLatency is the same but for huge pages.
- //
- // scavengePagePeriod is derived from scavengePercent and scavengePageLatency,
- // and represents the average time between scavenging one page that we're
- // aiming for. scavengeHugePagePeriod is the same but for huge pages.
- // These constants are core to the scavenge pacing algorithm.
- scavengePercent = 1 // 1%
- scavengePageLatency = 10e3 // 10µs
- scavengeHugePageLatency = 10e3 // 10µs
- scavengePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
- scavengeHugePagePeriod = scavengePageLatency / (scavengePercent / 100.0)
+ scavengePercent = 1 // 1%
// retainExtraPercent represents the amount of memory over the heap goal
// that the scavenger should keep as a buffer space for the allocator.
@@ -61,34 +76,46 @@ const (
// incurs an additional cost), to account for heap fragmentation and
// the ever-changing layout of the heap.
retainExtraPercent = 10
+
+ // maxPagesPerPhysPage is the maximum number of supported runtime pages per
+ // physical page, based on maxPhysPageSize.
+ maxPagesPerPhysPage = maxPhysPageSize / pageSize
)
// heapRetained returns an estimate of the current heap RSS.
-//
-// mheap_.lock must be held or the world must be stopped.
func heapRetained() uint64 {
- return memstats.heap_sys - memstats.heap_released
+ return atomic.Load64(&memstats.heap_sys) - atomic.Load64(&memstats.heap_released)
}
// gcPaceScavenger updates the scavenger's pacing, particularly
// its rate and RSS goal.
//
// The RSS goal is based on the current heap goal with a small overhead
-// to accomodate non-determinism in the allocator.
+// to accommodate non-determinism in the allocator.
//
// The pacing is based on scavengePageRate, which applies to both regular and
// huge pages. See that constant for more information.
//
// mheap_.lock must be held or the world must be stopped.
func gcPaceScavenger() {
- // Compute our scavenging goal and align it to a physical page boundary
- // to make the following calculations more exact.
- retainedGoal := memstats.next_gc
+ // If we're called before the first GC completed, disable scavenging.
+ // We never scavenge before the 2nd GC cycle anyway (we don't have enough
+ // information about the heap yet) so this is fine, and avoids a fault
+ // or garbage data later.
+ if memstats.last_next_gc == 0 {
+ mheap_.scavengeGoal = ^uint64(0)
+ return
+ }
+ // Compute our scavenging goal.
+ goalRatio := float64(memstats.next_gc) / float64(memstats.last_next_gc)
+ retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio)
// Add retainExtraPercent overhead to retainedGoal. This calculation
// looks strange but the purpose is to arrive at an integer division
// (e.g. if retainExtraPercent = 12.5, then we get a divisor of 8)
// that also avoids the overflow from a multiplication.
retainedGoal += retainedGoal / (1.0 / (retainExtraPercent / 100.0))
+ // Align it to a physical page boundary to make the following calculations
+ // a bit more exact.
retainedGoal = (retainedGoal + uint64(physPageSize) - 1) &^ (uint64(physPageSize) - 1)
// Represents where we are now in the heap's contribution to RSS in bytes.
@@ -104,86 +131,31 @@ func gcPaceScavenger() {
// physical page.
retainedNow := heapRetained()
- // If we're already below our goal, publish the goal in case it changed
- // then disable the background scavenger.
- if retainedNow <= retainedGoal {
- mheap_.scavengeRetainedGoal = retainedGoal
- mheap_.scavengeBytesPerNS = 0
+ // If we're already below our goal, or within one page of our goal, then disable
+ // the background scavenger. We disable the background scavenger if there's
+ // less than one physical page of work to do because it's not worth it.
+ if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) {
+ mheap_.scavengeGoal = ^uint64(0)
return
}
-
- // Now we start to compute the total amount of work necessary and the total
- // amount of time we're willing to give the scavenger to complete this work.
- // This will involve calculating how much of the work consists of huge pages
- // and how much consists of regular pages since the former can let us scavenge
- // more memory in the same time.
- totalWork := retainedNow - retainedGoal
-
- // On systems without huge page support, all work is regular work.
- regularWork := totalWork
- hugeTime := uint64(0)
-
- // On systems where we have huge pages, we want to do as much of the
- // scavenging work as possible on huge pages, because the costs are the
- // same per page, but we can give back more more memory in a shorter
- // period of time.
- if physHugePageSize != 0 {
- // Start by computing the amount of free memory we have in huge pages
- // in total. Trivially, this is all the huge page work we need to do.
- hugeWork := uint64(mheap_.free.unscavHugePages) << physHugePageShift
-
- // ...but it could turn out that there's more huge work to do than
- // total work, so cap it at total work. This might happen for very large
- // heaps where the additional factor of retainExtraPercent can make it so
- // that there are free chunks of memory larger than a huge page that we don't want
- // to scavenge.
- if hugeWork >= totalWork {
- hugePages := totalWork >> physHugePageShift
- hugeWork = hugePages << physHugePageShift
- }
- // Everything that's not huge work is regular work. At this point we
- // know huge work so we can calculate how much time that will take
- // based on scavengePageRate (which applies to pages of any size).
- regularWork = totalWork - hugeWork
- hugeTime = (hugeWork >> physHugePageShift) * scavengeHugePagePeriod
- }
- // Finally, we can compute how much time it'll take to do the regular work
- // and the total time to do all the work.
- regularTime := regularWork / uint64(physPageSize) * scavengePagePeriod
- totalTime := hugeTime + regularTime
-
- now := nanotime()
-
- lock(&scavenge.lock)
-
- // Update all the pacing parameters in mheap with scavenge.lock held,
- // so that scavenge.gen is kept in sync with the updated values.
- mheap_.scavengeRetainedGoal = retainedGoal
- mheap_.scavengeRetainedBasis = retainedNow
- mheap_.scavengeTimeBasis = now
- mheap_.scavengeBytesPerNS = float64(totalWork) / float64(totalTime)
- scavenge.gen++ // increase scavenge generation
-
- // Wake up background scavenger if needed, since the pacing was just updated.
- wakeScavengerLocked()
-
- unlock(&scavenge.lock)
+ mheap_.scavengeGoal = retainedGoal
+ mheap_.pages.resetScavengeAddr()
}
-// State of the background scavenger.
+// Sleep/wait state of the background scavenger.
var scavenge struct {
lock mutex
g *g
parked bool
timer *timer
- gen uint32 // read with either lock or mheap_.lock, write with both
}
-// wakeScavengerLocked unparks the scavenger if necessary. It must be called
+// wakeScavenger unparks the scavenger if necessary. It must be called
// after any pacing update.
//
-// scavenge.lock must be held.
-func wakeScavengerLocked() {
+// mheap_.lock and scavenge.lock must not be held.
+func wakeScavenger() {
+ lock(&scavenge.lock)
if scavenge.parked {
// Try to stop the timer but we don't really care if we succeed.
// It's possible that either a timer was never started, or that
@@ -194,45 +166,44 @@ func wakeScavengerLocked() {
stopTimer(scavenge.timer)
// Unpark the goroutine and tell it that there may have been a pacing
- // change.
+ // change. Note that we skip the scheduler's runnext slot because we
+ // want to avoid having the scavenger interfere with the fair
+ // scheduling of user goroutines. In effect, this schedules the
+ // scavenger at a "lower priority" but that's OK because it'll
+ // catch up on the work it missed when it does get scheduled.
scavenge.parked = false
- ready(scavenge.g, 0, true)
+ systemstack(func() {
+ ready(scavenge.g, 0, false)
+ })
}
+ unlock(&scavenge.lock)
}
// scavengeSleep attempts to put the scavenger to sleep for ns.
-// It also checks to see if gen != scavenge.gen before going to sleep,
-// and aborts if true (meaning an update had occurred).
//
// Note that this function should only be called by the scavenger.
//
// The scavenger may be woken up earlier by a pacing change, and it may not go
// to sleep at all if there's a pending pacing change.
//
-// Returns false if awoken early (i.e. true means a complete sleep).
-func scavengeSleep(gen uint32, ns int64) bool {
+// Returns the amount of time actually slept.
+func scavengeSleep(ns int64) int64 {
lock(&scavenge.lock)
- // If there was an update, just abort the sleep.
- if scavenge.gen != gen {
- unlock(&scavenge.lock)
- return false
- }
-
// Set the timer.
- now := nanotime()
- scavenge.timer.when = now + ns
- startTimer(scavenge.timer)
-
- // Park the goroutine. It's fine that we don't publish the
- // fact that the timer was set; even if the timer wakes up
- // and fire scavengeReady before we park, it'll block on
- // scavenge.lock.
+ //
+ // This must happen here instead of inside gopark
+ // because we can't close over any variables without
+ // failing escape analysis.
+ start := nanotime()
+ resetTimer(scavenge.timer, start+ns)
+
+ // Mark ourself as asleep and go to sleep.
scavenge.parked = true
goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2)
- // Return true if we completed the full sleep.
- return (nanotime() - now) >= ns
+ // Return how long we actually slept for.
+ return nanotime() - start
}
// Background scavenger.
@@ -250,118 +221,543 @@ func bgscavenge(c chan int) {
scavenge.timer = new(timer)
scavenge.timer.f = func(_ interface{}, _ uintptr) {
- lock(&scavenge.lock)
- wakeScavengerLocked()
- unlock(&scavenge.lock)
+ wakeScavenger()
}
c <- 1
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
- // Parameters for sleeping.
- //
- // If we end up doing more work than we need, we should avoid spinning
- // until we have more work to do: instead, we know exactly how much time
- // until more work will need to be done, so we sleep.
- //
- // We should avoid sleeping for less than minSleepNS because Gosched()
- // overheads among other things will work out better in that case.
+ // Exponentially-weighted moving average of the fraction of time this
+ // goroutine spends scavenging (that is, percent of a single CPU).
+ // It represents a measure of scheduling overheads which might extend
+ // the sleep or the critical time beyond what's expected. Assume no
+ // overhead to begin with.
//
- // There's no reason to set a maximum on sleep time because we'll always
- // get woken up earlier if there's any kind of update that could change
- // the scavenger's pacing.
- //
- // retryDelayNS tracks how much to sleep next time we fail to do any
- // useful work.
- const minSleepNS = int64(100 * 1000) // 100 µs
-
- retryDelayNS := minSleepNS
+ // TODO(mknyszek): Consider making this based on total CPU time of the
+ // application (i.e. scavengePercent * GOMAXPROCS). This isn't really
+ // feasible now because the scavenger acquires the heap lock over the
+ // scavenging operation, which means scavenging effectively blocks
+ // allocators and isn't scalable. However, given a scalable allocator,
+ // it makes sense to also make the scavenger scale with it; if you're
+ // allocating more frequently, then presumably you're also generating
+ // more work for the scavenger.
+ const idealFraction = scavengePercent / 100.0
+ scavengeEWMA := float64(idealFraction)
for {
released := uintptr(0)
- park := false
- ttnext := int64(0)
- gen := uint32(0)
+
+ // Time in scavenging critical section.
+ crit := int64(0)
// Run on the system stack since we grab the heap lock,
// and a stack growth with the heap lock means a deadlock.
systemstack(func() {
lock(&mheap_.lock)
- gen = scavenge.gen
-
// If background scavenging is disabled or if there's no work to do just park.
- retained := heapRetained()
- if mheap_.scavengeBytesPerNS == 0 || retained <= mheap_.scavengeRetainedGoal {
+ retained, goal := heapRetained(), mheap_.scavengeGoal
+ if retained <= goal {
unlock(&mheap_.lock)
- park = true
return
}
-
- // Calculate how big we want the retained heap to be
- // at this point in time.
- //
- // The formula is for that of a line, y = b - mx
- // We want y (want),
- // m = scavengeBytesPerNS (> 0)
- // x = time between scavengeTimeBasis and now
- // b = scavengeRetainedBasis
- rate := mheap_.scavengeBytesPerNS
- tdist := nanotime() - mheap_.scavengeTimeBasis
- rdist := uint64(rate * float64(tdist))
- want := mheap_.scavengeRetainedBasis - rdist
-
- // If we're above the line, scavenge to get below the
- // line.
- if retained > want {
- released = mheap_.scavengeLocked(uintptr(retained - want))
- }
unlock(&mheap_.lock)
- // If we over-scavenged a bit, calculate how much time it'll
- // take at the current rate for us to make that up. We definitely
- // won't have any work to do until at least that amount of time
- // passes.
- if released > uintptr(retained-want) {
- extra := released - uintptr(retained-want)
- ttnext = int64(float64(extra) / rate)
- }
+ // Scavenge one page, and measure the amount of time spent scavenging.
+ start := nanotime()
+ released = mheap_.pages.scavengeOne(physPageSize, false)
+ crit = nanotime() - start
})
- if park {
+ if debug.gctrace > 0 {
+ if released > 0 {
+ print("scvg: ", released>>10, " KB released\n")
+ }
+ print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
+ }
+
+ if released == 0 {
lock(&scavenge.lock)
scavenge.parked = true
goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1)
continue
}
- if debug.gctrace > 0 {
- if released > 0 {
- print("scvg: ", released>>20, " MB released\n")
- }
- print("scvg: inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
+ // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone
+ // put their machine to sleep) in the critical section, bound the time we use to
+ // calculate at 10 ms to avoid letting the sleep time get arbitrarily high.
+ const maxCrit = 10e6
+ if crit > maxCrit {
+ crit = maxCrit
}
- if released == 0 {
- // If we were unable to release anything this may be because there's
- // no free memory available to scavenge. Go to sleep and try again.
- if scavengeSleep(gen, retryDelayNS) {
- // If we successfully slept through the delay, back off exponentially.
- retryDelayNS *= 2
+ // Compute the amount of time to sleep, assuming we want to use at most
+ // scavengePercent of CPU time. Take into account scheduling overheads
+ // that may extend the length of our sleep by multiplying by how far
+ // off we are from the ideal ratio. For example, if we're sleeping too
+ // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time
+ // down.
+ adjust := scavengeEWMA / idealFraction
+ sleepTime := int64(adjust * float64(crit) / (scavengePercent / 100.0))
+
+ // Go to sleep.
+ slept := scavengeSleep(sleepTime)
+
+ // Compute the new ratio.
+ fraction := float64(crit) / float64(crit+slept)
+
+ // Set a lower bound on the fraction.
+ // Due to OS-related anomalies we may "sleep" for an inordinate amount
+ // of time. Let's avoid letting the ratio get out of hand by bounding
+ // the sleep time we use in our EWMA.
+ const minFraction = 1 / 1000
+ if fraction < minFraction {
+ fraction = minFraction
+ }
+
+ // Update scavengeEWMA by merging in the new crit/slept ratio.
+ const alpha = 0.5
+ scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA
+ }
+}
+
+// scavenge scavenges nbytes worth of free pages, starting with the
+// highest address first. Successive calls continue from where it left
+// off until the heap is exhausted. Call resetScavengeAddr to bring it
+// back to the top of the heap.
+//
+// Returns the amount of memory scavenged in bytes.
+//
+// If locked == false, s.mheapLock must not be locked. If locked == true,
+// s.mheapLock must be locked.
+//
+// Must run on the system stack because scavengeOne must run on the
+// system stack.
+//
+//go:systemstack
+func (s *pageAlloc) scavenge(nbytes uintptr, locked bool) uintptr {
+ released := uintptr(0)
+ for released < nbytes {
+ r := s.scavengeOne(nbytes-released, locked)
+ if r == 0 {
+ // Nothing left to scavenge! Give up.
+ break
+ }
+ released += r
+ }
+ return released
+}
+
+// resetScavengeAddr sets the scavenge start address to the top of the heap's
+// address space. This should be called each time the scavenger's pacing
+// changes.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) resetScavengeAddr() {
+ s.scavAddr = chunkBase(s.end) - 1
+}
+
+// scavengeOne starts from s.scavAddr and walks down the heap until it finds
+// a contiguous run of pages to scavenge. It will try to scavenge at most
+// max bytes at once, but may scavenge more to avoid breaking huge pages. Once
+// it scavenges some memory it returns how much it scavenged and updates s.scavAddr
+// appropriately. s.scavAddr must be reset manually and externally.
+//
+// Should it exhaust the heap, it will return 0 and set s.scavAddr to minScavAddr.
+//
+// If locked == false, s.mheapLock must not be locked.
+// If locked == true, s.mheapLock must be locked.
+//
+// Must be run on the system stack because it either acquires the heap lock
+// or executes with the heap lock acquired.
+//
+//go:systemstack
+func (s *pageAlloc) scavengeOne(max uintptr, locked bool) uintptr {
+ // Calculate the maximum number of pages to scavenge.
+ //
+ // This should be alignUp(max, pageSize) / pageSize but max can and will
+ // be ^uintptr(0), so we need to be very careful not to overflow here.
+ // Rather than use alignUp, calculate the number of pages rounded down
+ // first, then add back one if necessary.
+ maxPages := max / pageSize
+ if max%pageSize != 0 {
+ maxPages++
+ }
+
+ // Calculate the minimum number of pages we can scavenge.
+ //
+ // Because we can only scavenge whole physical pages, we must
+ // ensure that we scavenge at least minPages each time, aligned
+ // to minPages*pageSize.
+ minPages := physPageSize / pageSize
+ if minPages < 1 {
+ minPages = 1
+ }
+
+ // Helpers for locking and unlocking only if locked == false.
+ lockHeap := func() {
+ if !locked {
+ lock(s.mheapLock)
+ }
+ }
+ unlockHeap := func() {
+ if !locked {
+ unlock(s.mheapLock)
+ }
+ }
+
+ lockHeap()
+ ci := chunkIndex(s.scavAddr)
+ if ci < s.start {
+ unlockHeap()
+ return 0
+ }
+
+ // Check the chunk containing the scav addr, starting at the addr
+ // and see if there are any free and unscavenged pages.
+ if s.summary[len(s.summary)-1][ci].max() >= uint(minPages) {
+ // We only bother looking for a candidate if there at least
+ // minPages free pages at all. It's important that we only
+ // continue if the summary says we can because that's how
+ // we can tell if parts of the address space are unused.
+ // See the comment on s.chunks in mpagealloc.go.
+ base, npages := s.chunkOf(ci).findScavengeCandidate(chunkPageIndex(s.scavAddr), minPages, maxPages)
+
+ // If we found something, scavenge it and return!
+ if npages != 0 {
+ s.scavengeRangeLocked(ci, base, npages)
+ unlockHeap()
+ return uintptr(npages) * pageSize
+ }
+ }
+
+ // getInUseRange returns the highest range in the
+ // intersection of [0, addr] and s.inUse.
+ //
+ // s.mheapLock must be held.
+ getInUseRange := func(addr uintptr) addrRange {
+ top := s.inUse.findSucc(addr)
+ if top == 0 {
+ return addrRange{}
+ }
+ r := s.inUse.ranges[top-1]
+ // addr is inclusive, so treat it as such when
+ // updating the limit, which is exclusive.
+ if r.limit > addr+1 {
+ r.limit = addr + 1
+ }
+ return r
+ }
+
+ // Slow path: iterate optimistically over the in-use address space
+ // looking for any free and unscavenged page. If we think we see something,
+ // lock and verify it!
+ //
+ // We iterate over the address space by taking ranges from inUse.
+newRange:
+ for {
+ r := getInUseRange(s.scavAddr)
+ if r.size() == 0 {
+ break
+ }
+ unlockHeap()
+
+ // Iterate over all of the chunks described by r.
+ // Note that r.limit is the exclusive upper bound, but what
+ // we want is the top chunk instead, inclusive, so subtract 1.
+ bot, top := chunkIndex(r.base), chunkIndex(r.limit-1)
+ for i := top; i >= bot; i-- {
+ // If this chunk is totally in-use or has no unscavenged pages, don't bother
+ // doing a more sophisticated check.
+ //
+ // Note we're accessing the summary and the chunks without a lock, but
+ // that's fine. We're being optimistic anyway.
+
+ // Check quickly if there are enough free pages at all.
+ if s.summary[len(s.summary)-1][i].max() < uint(minPages) {
+ continue
}
- continue
+
+ // Run over the chunk looking harder for a candidate. Again, we could
+ // race with a lot of different pieces of code, but we're just being
+ // optimistic. Make sure we load the l2 pointer atomically though, to
+ // avoid races with heap growth. It may or may not be possible to also
+ // see a nil pointer in this case if we do race with heap growth, but
+ // just defensively ignore the nils. This operation is optimistic anyway.
+ l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
+ if l2 == nil || !l2[i.l2()].hasScavengeCandidate(minPages) {
+ continue
+ }
+
+ // We found a candidate, so let's lock and verify it.
+ lockHeap()
+
+ // Find, verify, and scavenge if we can.
+ chunk := s.chunkOf(i)
+ base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
+ if npages > 0 {
+ // We found memory to scavenge! Mark the bits and report that up.
+ // scavengeRangeLocked will update scavAddr for us, also.
+ s.scavengeRangeLocked(i, base, npages)
+ unlockHeap()
+ return uintptr(npages) * pageSize
+ }
+
+ // We were fooled, let's take this opportunity to move the scavAddr
+ // all the way down to where we searched as scavenged for future calls
+ // and keep iterating. Then, go get a new range.
+ s.scavAddr = chunkBase(i-1) + pallocChunkPages*pageSize - 1
+ continue newRange
}
- retryDelayNS = minSleepNS
+ lockHeap()
+
+ // Move the scavenger down the heap, past everything we just searched.
+ // Since we don't check if scavAddr moved while twe let go of the heap lock,
+ // it's possible that it moved down and we're moving it up here. This
+ // raciness could result in us searching parts of the heap unnecessarily.
+ // TODO(mknyszek): Remove this racy behavior through explicit address
+ // space reservations, which are difficult to do with just scavAddr.
+ s.scavAddr = r.base - 1
+ }
+ // We reached the end of the in-use address space and couldn't find anything,
+ // so signal that there's nothing left to scavenge.
+ s.scavAddr = minScavAddr
+ unlockHeap()
- if ttnext > 0 && ttnext > minSleepNS {
- // If there's an appreciable amount of time until the next scavenging
- // goal, just sleep. We'll get woken up if anything changes and this
- // way we avoid spinning.
- scavengeSleep(gen, ttnext)
- continue
+ return 0
+}
+
+// scavengeRangeLocked scavenges the given region of memory.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) {
+ s.chunkOf(ci).scavenged.setRange(base, npages)
+
+ // Compute the full address for the start of the range.
+ addr := chunkBase(ci) + uintptr(base)*pageSize
+
+ // Update the scav pointer.
+ s.scavAddr = addr - 1
+
+ // Only perform the actual scavenging if we're not in a test.
+ // It's dangerous to do so otherwise.
+ if s.test {
+ return
+ }
+ sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
+
+ // Update global accounting only when not in test, otherwise
+ // the runtime's accounting will be wrong.
+ mSysStatInc(&memstats.heap_released, uintptr(npages)*pageSize)
+}
+
+// fillAligned returns x but with all zeroes in m-aligned
+// groups of m bits set to 1 if any bit in the group is non-zero.
+//
+// For example, fillAligned(0x0100a3, 8) == 0xff00ff.
+//
+// Note that if m == 1, this is a no-op.
+//
+// m must be a power of 2 <= maxPagesPerPhysPage.
+func fillAligned(x uint64, m uint) uint64 {
+ apply := func(x uint64, c uint64) uint64 {
+ // The technique used it here is derived from
+ // https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ // and extended for more than just bytes (like nibbles
+ // and uint16s) by using an appropriate constant.
+ //
+ // To summarize the technique, quoting from that page:
+ // "[It] works by first zeroing the high bits of the [8]
+ // bytes in the word. Subsequently, it adds a number that
+ // will result in an overflow to the high bit of a byte if
+ // any of the low bits were initially set. Next the high
+ // bits of the original word are ORed with these values;
+ // thus, the high bit of a byte is set iff any bit in the
+ // byte was set. Finally, we determine if any of these high
+ // bits are zero by ORing with ones everywhere except the
+ // high bits and inverting the result."
+ return ^((((x & c) + c) | x) | c)
+ }
+ // Transform x to contain a 1 bit at the top of each m-aligned
+ // group of m zero bits.
+ switch m {
+ case 1:
+ return x
+ case 2:
+ x = apply(x, 0x5555555555555555)
+ case 4:
+ x = apply(x, 0x7777777777777777)
+ case 8:
+ x = apply(x, 0x7f7f7f7f7f7f7f7f)
+ case 16:
+ x = apply(x, 0x7fff7fff7fff7fff)
+ case 32:
+ x = apply(x, 0x7fffffff7fffffff)
+ case 64: // == maxPagesPerPhysPage
+ x = apply(x, 0x7fffffffffffffff)
+ default:
+ throw("bad m value")
+ }
+ // Now, the top bit of each m-aligned group in x is set
+ // that group was all zero in the original x.
+
+ // From each group of m bits subtract 1.
+ // Because we know only the top bits of each
+ // m-aligned group are set, we know this will
+ // set each group to have all the bits set except
+ // the top bit, so just OR with the original
+ // result to set all the bits.
+ return ^((x - (x >> (m - 1))) | x)
+}
+
+// hasScavengeCandidate returns true if there's any min-page-aligned groups of
+// min pages of free-and-unscavenged memory in the region represented by this
+// pallocData.
+//
+// min must be a non-zero power of 2 <= maxPagesPerPhysPage.
+func (m *pallocData) hasScavengeCandidate(min uintptr) bool {
+ if min&(min-1) != 0 || min == 0 {
+ print("runtime: min = ", min, "\n")
+ throw("min must be a non-zero power of 2")
+ } else if min > maxPagesPerPhysPage {
+ print("runtime: min = ", min, "\n")
+ throw("min too large")
+ }
+
+ // The goal of this search is to see if the chunk contains any free and unscavenged memory.
+ for i := len(m.scavenged) - 1; i >= 0; i-- {
+ // 1s are scavenged OR non-free => 0s are unscavenged AND free
+ //
+ // TODO(mknyszek): Consider splitting up fillAligned into two
+ // functions, since here we technically could get by with just
+ // the first half of its computation. It'll save a few instructions
+ // but adds some additional code complexity.
+ x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
+
+ // Quickly skip over chunks of non-free or scavenged pages.
+ if x != ^uint64(0) {
+ return true
+ }
+ }
+ return false
+}
+
+// findScavengeCandidate returns a start index and a size for this pallocData
+// segment which represents a contiguous region of free and unscavenged memory.
+//
+// searchIdx indicates the page index within this chunk to start the search, but
+// note that findScavengeCandidate searches backwards through the pallocData. As a
+// a result, it will return the highest scavenge candidate in address order.
+//
+// min indicates a hard minimum size and alignment for runs of pages. That is,
+// findScavengeCandidate will not return a region smaller than min pages in size,
+// or that is min pages or greater in size but not aligned to min. min must be
+// a non-zero power of 2 <= maxPagesPerPhysPage.
+//
+// max is a hint for how big of a region is desired. If max >= pallocChunkPages, then
+// findScavengeCandidate effectively returns entire free and unscavenged regions.
+// If max < pallocChunkPages, it may truncate the returned region such that size is
+// max. However, findScavengeCandidate may still return a larger region if, for
+// example, it chooses to preserve huge pages, or if max is not aligned to min (it
+// will round up). That is, even if max is small, the returned size is not guaranteed
+// to be equal to max. max is allowed to be less than min, in which case it is as if
+// max == min.
+func (m *pallocData) findScavengeCandidate(searchIdx uint, min, max uintptr) (uint, uint) {
+ if min&(min-1) != 0 || min == 0 {
+ print("runtime: min = ", min, "\n")
+ throw("min must be a non-zero power of 2")
+ } else if min > maxPagesPerPhysPage {
+ print("runtime: min = ", min, "\n")
+ throw("min too large")
+ }
+ // max may not be min-aligned, so we might accidentally truncate to
+ // a max value which causes us to return a non-min-aligned value.
+ // To prevent this, align max up to a multiple of min (which is always
+ // a power of 2). This also prevents max from ever being less than
+ // min, unless it's zero, so handle that explicitly.
+ if max == 0 {
+ max = min
+ } else {
+ max = alignUp(max, min)
+ }
+
+ i := int(searchIdx / 64)
+ // Start by quickly skipping over blocks of non-free or scavenged pages.
+ for ; i >= 0; i-- {
+ // 1s are scavenged OR non-free => 0s are unscavenged AND free
+ x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
+ if x != ^uint64(0) {
+ break
+ }
+ }
+ if i < 0 {
+ // Failed to find any free/unscavenged pages.
+ return 0, 0
+ }
+ // We have something in the 64-bit chunk at i, but it could
+ // extend further. Loop until we find the extent of it.
+
+ // 1s are scavenged OR non-free => 0s are unscavenged AND free
+ x := fillAligned(m.scavenged[i]|m.pallocBits[i], uint(min))
+ z1 := uint(sys.LeadingZeros64(^x))
+ run, end := uint(0), uint(i)*64+(64-z1)
+ if x<<z1 != 0 {
+ // After shifting out z1 bits, we still have 1s,
+ // so the run ends inside this word.
+ run = uint(sys.LeadingZeros64(x << z1))
+ } else {
+ // After shifting out z1 bits, we have no more 1s.
+ // This means the run extends to the bottom of the
+ // word so it may extend into further words.
+ run = 64 - z1
+ for j := i - 1; j >= 0; j-- {
+ x := fillAligned(m.scavenged[j]|m.pallocBits[j], uint(min))
+ run += uint(sys.LeadingZeros64(x))
+ if x != 0 {
+ // The run stopped in this word.
+ break
+ }
}
+ }
- // Give something else a chance to run, no locks are held.
- Gosched()
+ // Split the run we found if it's larger than max but hold on to
+ // our original length, since we may need it later.
+ size := run
+ if size > uint(max) {
+ size = uint(max)
+ }
+ start := end - size
+
+ // Each huge page is guaranteed to fit in a single palloc chunk.
+ //
+ // TODO(mknyszek): Support larger huge page sizes.
+ // TODO(mknyszek): Consider taking pages-per-huge-page as a parameter
+ // so we can write tests for this.
+ if physHugePageSize > pageSize && physHugePageSize > physPageSize {
+ // We have huge pages, so let's ensure we don't break one by scavenging
+ // over a huge page boundary. If the range [start, start+size) overlaps with
+ // a free-and-unscavenged huge page, we want to grow the region we scavenge
+ // to include that huge page.
+
+ // Compute the huge page boundary above our candidate.
+ pagesPerHugePage := uintptr(physHugePageSize / pageSize)
+ hugePageAbove := uint(alignUp(uintptr(start), pagesPerHugePage))
+
+ // If that boundary is within our current candidate, then we may be breaking
+ // a huge page.
+ if hugePageAbove <= end {
+ // Compute the huge page boundary below our candidate.
+ hugePageBelow := uint(alignDown(uintptr(start), pagesPerHugePage))
+
+ if hugePageBelow >= end-run {
+ // We're in danger of breaking apart a huge page since start+size crosses
+ // a huge page boundary and rounding down start to the nearest huge
+ // page boundary is included in the full run we found. Include the entire
+ // huge page in the bound by rounding down to the huge page size.
+ size = size + (start - hugePageBelow)
+ start = hugePageBelow
+ }
+ }
}
+ return start, size
}
diff --git a/libgo/go/runtime/mgcscavenge_test.go b/libgo/go/runtime/mgcscavenge_test.go
new file mode 100644
index 0000000..518d5ab
--- /dev/null
+++ b/libgo/go/runtime/mgcscavenge_test.go
@@ -0,0 +1,419 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "math/rand"
+ . "runtime"
+ "testing"
+)
+
+// makePallocData produces an initialized PallocData by setting
+// the ranges of described in alloc and scavenge.
+func makePallocData(alloc, scavenged []BitRange) *PallocData {
+ b := new(PallocData)
+ for _, v := range alloc {
+ if v.N == 0 {
+ // Skip N==0. It's harmless and allocRange doesn't
+ // handle this case.
+ continue
+ }
+ b.AllocRange(v.I, v.N)
+ }
+ for _, v := range scavenged {
+ if v.N == 0 {
+ // See the previous loop.
+ continue
+ }
+ b.ScavengedSetRange(v.I, v.N)
+ }
+ return b
+}
+
+func TestFillAligned(t *testing.T) {
+ fillAlignedSlow := func(x uint64, m uint) uint64 {
+ if m == 1 {
+ return x
+ }
+ out := uint64(0)
+ for i := uint(0); i < 64; i += m {
+ for j := uint(0); j < m; j++ {
+ if x&(uint64(1)<<(i+j)) != 0 {
+ out |= ((uint64(1) << m) - 1) << i
+ break
+ }
+ }
+ }
+ return out
+ }
+ check := func(x uint64, m uint) {
+ want := fillAlignedSlow(x, m)
+ if got := FillAligned(x, m); got != want {
+ t.Logf("got: %064b", got)
+ t.Logf("want: %064b", want)
+ t.Errorf("bad fillAligned(%016x, %d)", x, m)
+ }
+ }
+ for m := uint(1); m <= 64; m *= 2 {
+ tests := []uint64{
+ 0x0000000000000000,
+ 0x00000000ffffffff,
+ 0xffffffff00000000,
+ 0x8000000000000001,
+ 0xf00000000000000f,
+ 0xf00000010050000f,
+ 0xffffffffffffffff,
+ 0x0000000000000001,
+ 0x0000000000000002,
+ 0x0000000000000008,
+ uint64(1) << (m - 1),
+ uint64(1) << m,
+ // Try a few fixed arbitrary examples.
+ 0xb02b9effcf137016,
+ 0x3975a076a9fbff18,
+ 0x0f8c88ec3b81506e,
+ 0x60f14d80ef2fa0e6,
+ }
+ for _, test := range tests {
+ check(test, m)
+ }
+ for i := 0; i < 1000; i++ {
+ // Try a pseudo-random numbers.
+ check(rand.Uint64(), m)
+
+ if m > 1 {
+ // For m != 1, let's construct a slightly more interesting
+ // random test. Generate a bitmap which is either 0 or
+ // randomly set bits for each m-aligned group of m bits.
+ val := uint64(0)
+ for n := uint(0); n < 64; n += m {
+ // For each group of m bits, flip a coin:
+ // * Leave them as zero.
+ // * Set them randomly.
+ if rand.Uint64()%2 == 0 {
+ val |= (rand.Uint64() & ((1 << m) - 1)) << n
+ }
+ }
+ check(val, m)
+ }
+ }
+ }
+}
+
+func TestPallocDataFindScavengeCandidate(t *testing.T) {
+ type test struct {
+ alloc, scavenged []BitRange
+ min, max uintptr
+ want BitRange
+ }
+ tests := map[string]test{
+ "MixedMin1": {
+ alloc: []BitRange{{0, 40}, {42, PallocChunkPages - 42}},
+ scavenged: []BitRange{{0, 41}, {42, PallocChunkPages - 42}},
+ min: 1,
+ max: PallocChunkPages,
+ want: BitRange{41, 1},
+ },
+ "MultiMin1": {
+ alloc: []BitRange{{0, 63}, {65, 20}, {87, PallocChunkPages - 87}},
+ scavenged: []BitRange{{86, 1}},
+ min: 1,
+ max: PallocChunkPages,
+ want: BitRange{85, 1},
+ },
+ }
+ // Try out different page minimums.
+ for m := uintptr(1); m <= 64; m *= 2 {
+ suffix := fmt.Sprintf("Min%d", m)
+ tests["AllFree"+suffix] = test{
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, PallocChunkPages},
+ }
+ tests["AllScavenged"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, 0},
+ }
+ tests["NoneFree"+suffix] = test{
+ alloc: []BitRange{{0, PallocChunkPages}},
+ scavenged: []BitRange{{PallocChunkPages / 2, PallocChunkPages / 2}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, 0},
+ }
+ tests["StartFree"+suffix] = test{
+ alloc: []BitRange{{uint(m), PallocChunkPages - uint(m)}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, uint(m)},
+ }
+ tests["StartFree"+suffix] = test{
+ alloc: []BitRange{{uint(m), PallocChunkPages - uint(m)}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, uint(m)},
+ }
+ tests["EndFree"+suffix] = test{
+ alloc: []BitRange{{0, PallocChunkPages - uint(m)}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ tests["Straddle64"+suffix] = test{
+ alloc: []BitRange{{0, 64 - uint(m)}, {64 + uint(m), PallocChunkPages - (64 + uint(m))}},
+ min: m,
+ max: 2 * m,
+ want: BitRange{64 - uint(m), 2 * uint(m)},
+ }
+ tests["BottomEdge64WithFull"+suffix] = test{
+ alloc: []BitRange{{64, 64}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
+ scavenged: []BitRange{{1, 10}},
+ min: m,
+ max: 3 * m,
+ want: BitRange{128, 3 * uint(m)},
+ }
+ tests["BottomEdge64WithPocket"+suffix] = test{
+ alloc: []BitRange{{64, 62}, {127, 1}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
+ scavenged: []BitRange{{1, 10}},
+ min: m,
+ max: 3 * m,
+ want: BitRange{128, 3 * uint(m)},
+ }
+ tests["Max0"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages - uint(m)}},
+ min: m,
+ max: 0,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ if m <= 8 {
+ tests["OneFree"] = test{
+ alloc: []BitRange{{0, 40}, {40 + uint(m), PallocChunkPages - (40 + uint(m))}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{40, uint(m)},
+ }
+ tests["OneScavenged"] = test{
+ alloc: []BitRange{{0, 40}, {40 + uint(m), PallocChunkPages - (40 + uint(m))}},
+ scavenged: []BitRange{{40, 1}},
+ min: m,
+ max: PallocChunkPages,
+ want: BitRange{0, 0},
+ }
+ }
+ if m > 1 {
+ tests["MaxUnaligned"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages - uint(m*2-1)}},
+ min: m,
+ max: m - 2,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ tests["SkipSmall"+suffix] = test{
+ alloc: []BitRange{{0, 64 - uint(m)}, {64, 5}, {70, 11}, {82, PallocChunkPages - 82}},
+ min: m,
+ max: m,
+ want: BitRange{64 - uint(m), uint(m)},
+ }
+ tests["SkipMisaligned"+suffix] = test{
+ alloc: []BitRange{{0, 64 - uint(m)}, {64, 63}, {127 + uint(m), PallocChunkPages - (127 + uint(m))}},
+ min: m,
+ max: m,
+ want: BitRange{64 - uint(m), uint(m)},
+ }
+ tests["MaxLessThan"+suffix] = test{
+ scavenged: []BitRange{{0, PallocChunkPages - uint(m)}},
+ min: m,
+ max: 1,
+ want: BitRange{PallocChunkPages - uint(m), uint(m)},
+ }
+ }
+ }
+ if PhysHugePageSize > uintptr(PageSize) {
+ // Check hugepage preserving behavior.
+ bits := uint(PhysHugePageSize / uintptr(PageSize))
+ tests["PreserveHugePageBottom"] = test{
+ alloc: []BitRange{{bits + 2, PallocChunkPages - (bits + 2)}},
+ min: 1,
+ max: 3, // Make it so that max would have us try to break the huge page.
+ want: BitRange{0, bits + 2},
+ }
+ if 3*bits < PallocChunkPages {
+ // We need at least 3 huge pages in a chunk for this test to make sense.
+ tests["PreserveHugePageMiddle"] = test{
+ alloc: []BitRange{{0, bits - 10}, {2*bits + 10, PallocChunkPages - (2*bits + 10)}},
+ min: 1,
+ max: 12, // Make it so that max would have us try to break the huge page.
+ want: BitRange{bits, bits + 10},
+ }
+ }
+ tests["PreserveHugePageTop"] = test{
+ alloc: []BitRange{{0, PallocChunkPages - bits}},
+ min: 1,
+ max: 1, // Even one page would break a huge page in this case.
+ want: BitRange{PallocChunkPages - bits, bits},
+ }
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocData(v.alloc, v.scavenged)
+ start, size := b.FindScavengeCandidate(PallocChunkPages-1, v.min, v.max)
+ got := BitRange{start, size}
+ if !(got.N == 0 && v.want.N == 0) && got != v.want {
+ t.Fatalf("candidate mismatch: got %v, want %v", got, v.want)
+ }
+ })
+ }
+}
+
+// Tests end-to-end scavenging on a pageAlloc.
+func TestPageAllocScavenge(t *testing.T) {
+ type test struct {
+ request, expect uintptr
+ }
+ minPages := PhysPageSize / PageSize
+ if minPages < 1 {
+ minPages = 1
+ }
+ tests := map[string]struct {
+ beforeAlloc map[ChunkIdx][]BitRange
+ beforeScav map[ChunkIdx][]BitRange
+ expect []test
+ afterScav map[ChunkIdx][]BitRange
+ }{
+ "AllFreeUnscavExhaust": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ },
+ expect: []test{
+ {^uintptr(0), 3 * PallocChunkPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "NoneFreeUnscavExhaust": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {},
+ },
+ expect: []test{
+ {^uintptr(0), 0},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {},
+ },
+ },
+ "ScavHighestPageFirst": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {1, minPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(minPages)}},
+ },
+ },
+ "ScavMultiple": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {minPages * PageSize, minPages * PageSize},
+ {minPages * PageSize, minPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ "ScavMultiple2": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {2 * minPages * PageSize, 2 * minPages * PageSize},
+ {minPages * PageSize, minPages * PageSize},
+ {minPages * PageSize, minPages * PageSize},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "ScavDiscontiguous": {
+ beforeAlloc: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 0xe: {},
+ },
+ beforeScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{uint(minPages), PallocChunkPages - uint(2*minPages)}},
+ BaseChunkIdx + 0xe: {{uint(2 * minPages), PallocChunkPages - uint(2*minPages)}},
+ },
+ expect: []test{
+ {2 * minPages * PageSize, 2 * minPages * PageSize},
+ {^uintptr(0), 2 * minPages * PageSize},
+ {^uintptr(0), 0},
+ },
+ afterScav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xe: {{0, PallocChunkPages}},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ runTest := func(t *testing.T, locked bool) {
+ b := NewPageAlloc(v.beforeAlloc, v.beforeScav)
+ defer FreePageAlloc(b)
+
+ for iter, h := range v.expect {
+ if got := b.Scavenge(h.request, locked); got != h.expect {
+ t.Fatalf("bad scavenge #%d: want %d, got %d", iter+1, h.expect, got)
+ }
+ }
+ want := NewPageAlloc(v.beforeAlloc, v.afterScav)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ }
+ t.Run(name, func(t *testing.T) {
+ runTest(t, false)
+ })
+ t.Run(name+"Locked", func(t *testing.T) {
+ runTest(t, true)
+ })
+ }
+}
diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go
index c1c6e65..1e959a4 100644
--- a/libgo/go/runtime/mgcsweep.go
+++ b/libgo/go/runtime/mgcsweep.go
@@ -116,12 +116,12 @@ func sweepone() uintptr {
atomic.Store(&mheap_.sweepdone, 1)
break
}
- if s.state != mSpanInUse {
+ if state := s.state.get(); state != mSpanInUse {
// This can happen if direct sweeping already
// swept this span, but in that case the sweep
// generation should always be up-to-date.
if !(s.sweepgen == sg || s.sweepgen == sg+3) {
- print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n")
+ print("runtime: bad span s.state=", state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n")
throw("non in-use span in unswept list")
}
continue
@@ -213,8 +213,8 @@ func (s *mspan) sweep(preserve bool) bool {
throw("mspan.sweep: m is not locked")
}
sweepgen := mheap_.sweepgen
- if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
- print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("mspan.sweep: bad span state")
}
@@ -353,8 +353,8 @@ func (s *mspan) sweep(preserve bool) bool {
if freeToHeap || nfreed == 0 {
// The span must be in our exclusive ownership until we update sweepgen,
// check for potential races.
- if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
- print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ if state := s.state.get(); state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("mspan.sweep: state=", state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
throw("mspan.sweep: bad span state after sweep")
}
// Serialization point.
@@ -388,7 +388,7 @@ func (s *mspan) sweep(preserve bool) bool {
s.limit = 0 // prevent mlookup from finding this span
sysFault(unsafe.Pointer(s.base()), size)
} else {
- mheap_.freeSpan(s, true)
+ mheap_.freeSpan(s)
}
c.local_nlargefree++
c.local_largefree += size
diff --git a/libgo/go/runtime/mgcsweepbuf.go b/libgo/go/runtime/mgcsweepbuf.go
index 0491f7c..7828822 100644
--- a/libgo/go/runtime/mgcsweepbuf.go
+++ b/libgo/go/runtime/mgcsweepbuf.go
@@ -111,8 +111,9 @@ retry:
unlock(&b.spineLock)
}
- // We have a block. Insert the span.
- block.spans[bottom] = s
+ // We have a block. Insert the span atomically, since there may be
+ // concurrent readers via the block API.
+ atomic.StorepNoWB(unsafe.Pointer(&block.spans[bottom]), unsafe.Pointer(s))
}
// pop removes and returns a span from buffer b, or nil if b is empty.
@@ -147,7 +148,9 @@ func (b *gcSweepBuf) numBlocks() int {
}
// block returns the spans in the i'th block of buffer b. block is
-// safe to call concurrently with push.
+// safe to call concurrently with push. The block may contain nil
+// pointers that must be ignored, and each entry in the block must be
+// loaded atomically.
func (b *gcSweepBuf) block(i int) []*mspan {
// Perform bounds check before loading spine address since
// push ensures the allocated length is at least spineLen.
@@ -169,11 +172,5 @@ func (b *gcSweepBuf) block(i int) []*mspan {
} else {
spans = block.spans[:bottom]
}
-
- // push may have reserved a slot but not filled it yet, so
- // trim away unused entries.
- for len(spans) > 0 && spans[len(spans)-1] == nil {
- spans = spans[:len(spans)-1]
- }
return spans
}
diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go
index 89d1e0e..a1b61ad 100644
--- a/libgo/go/runtime/mgcwork.go
+++ b/libgo/go/runtime/mgcwork.go
@@ -126,12 +126,12 @@ func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) {
if debugCachedWork {
alreadyFailed := w.putGen == w.pauseGen
w.putGen = w.pauseGen
- if m := getg().m; m.locks > 0 || m.mallocing != 0 || m.preemptoff != "" || m.p.ptr().status != _Prunning {
+ if !canPreemptM(getg().m) {
// If we were to spin, the runtime may
- // deadlock: the condition above prevents
- // preemption (see newstack), which could
- // prevent gcMarkDone from finishing the
- // ragged barrier and releasing the spin.
+ // deadlock. Since we can't be preempted, the
+ // spin could prevent gcMarkDone from
+ // finishing the ragged barrier, which is what
+ // releases us from the spin.
return
}
for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go
index cd01b3f..f40589a 100644
--- a/libgo/go/runtime/mheap.go
+++ b/libgo/go/runtime/mheap.go
@@ -15,10 +15,19 @@ import (
"unsafe"
)
-// minPhysPageSize is a lower-bound on the physical page size. The
-// true physical page size may be larger than this. In contrast,
-// sys.PhysPageSize is an upper-bound on the physical page size.
-const minPhysPageSize = 4096
+const (
+ // minPhysPageSize is a lower-bound on the physical page size. The
+ // true physical page size may be larger than this. In contrast,
+ // sys.PhysPageSize is an upper-bound on the physical page size.
+ minPhysPageSize = 4096
+
+ // maxPhysPageSize is the maximum page size the runtime supports.
+ maxPhysPageSize = 512 << 10
+
+ // maxPhysHugePageSize sets an upper-bound on the maximum huge page size
+ // that the runtime supports.
+ maxPhysHugePageSize = pallocChunkBytes
+)
// Main malloc heap.
// The heap itself is the "free" and "scav" treaps,
@@ -32,10 +41,10 @@ type mheap struct {
// lock must only be acquired on the system stack, otherwise a g
// could self-deadlock if its stack grows with the lock held.
lock mutex
- free mTreap // free spans
- sweepgen uint32 // sweep generation, see comment in mspan
- sweepdone uint32 // all spans are swept
- sweepers uint32 // number of active sweepone calls
+ pages pageAlloc // page allocation data structure
+ sweepgen uint32 // sweep generation, see comment in mspan; written during STW
+ sweepdone uint32 // all spans are swept
+ sweepers uint32 // number of active sweepone calls
// allspans is a slice of all mspans ever created. Each mspan
// appears exactly once.
@@ -81,7 +90,7 @@ type mheap struct {
// accounting for current progress. If we could only adjust
// the slope, it would create a discontinuity in debt if any
// progress has already been made.
- pagesInUse uint64 // pages of spans in stats mSpanInUse; R/W with mheap.lock
+ pagesInUse uint64 // pages of spans in stats mSpanInUse; updated atomically
pagesSwept uint64 // pages swept this cycle; updated atomically
pagesSweptBasis uint64 // pagesSwept to use as the origin of the sweep ratio; updated atomically
sweepHeapLiveBasis uint64 // value of heap_live to use as the origin of sweep ratio; written with lock, read without
@@ -89,24 +98,10 @@ type mheap struct {
// TODO(austin): pagesInUse should be a uintptr, but the 386
// compiler can't 8-byte align fields.
- // Scavenger pacing parameters
- //
- // The two basis parameters and the scavenge ratio parallel the proportional
- // sweeping implementation, the primary differences being that:
- // * Scavenging concerns itself with RSS, estimated as heapRetained()
- // * Rather than pacing the scavenger to the GC, it is paced to a
- // time-based rate computed in gcPaceScavenger.
- //
- // scavengeRetainedGoal represents our goal RSS.
- //
- // All fields must be accessed with lock.
- //
- // TODO(mknyszek): Consider abstracting the basis fields and the scavenge ratio
- // into its own type so that this logic may be shared with proportional sweeping.
- scavengeTimeBasis int64
- scavengeRetainedBasis uint64
- scavengeBytesPerNS float64
- scavengeRetainedGoal uint64
+ // scavengeGoal is the amount of total retained heap memory (measured by
+ // heapRetained) that the runtime will try to maintain by returning memory
+ // to the OS.
+ scavengeGoal uint64
// Page reclaimer state
@@ -185,6 +180,12 @@ type mheap struct {
// simply blocking GC (by disabling preemption).
sweepArenas []arenaIdx
+ // curArena is the arena that the heap is currently growing
+ // into. This should always be physPageSize-aligned.
+ curArena struct {
+ base, end uintptr
+ }
+
_ uint32 // ensure 64-bit alignment of central
// central free lists for small size classes.
@@ -199,7 +200,6 @@ type mheap struct {
spanalloc fixalloc // allocator for span*
cachealloc fixalloc // allocator for mcache*
- treapalloc fixalloc // allocator for treapNodes*
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
specialprofilealloc fixalloc // allocator for specialprofile*
speciallock mutex // lock for special record allocators.
@@ -213,10 +213,6 @@ var mheap_ mheap
// A heapArena stores metadata for a heap arena. heapArenas are stored
// outside of the Go heap and accessed via the mheap_.arenas index.
//
-// This gets allocated directly from the OS, so ideally it should be a
-// multiple of the system page size. For example, avoid adding small
-// fields.
-//
//go:notinheap
type heapArena struct {
// bitmap stores the pointer/scalar bitmap for the words in
@@ -242,7 +238,7 @@ type heapArena struct {
// but only the bit corresponding to the first page in each
// span is used.
//
- // Writes are protected by mheap_.lock.
+ // Reads and writes are atomic.
pageInUse [pagesPerArena / 8]uint8
// pageMarks is a bitmap that indicates which spans have any
@@ -259,6 +255,18 @@ type heapArena struct {
// faster scanning, but we don't have 64-bit atomic bit
// operations.
pageMarks [pagesPerArena / 8]uint8
+
+ // zeroedBase marks the first byte of the first page in this
+ // arena which hasn't been used yet and is therefore already
+ // zero. zeroedBase is relative to the arena base.
+ // Increases monotonically until it hits heapArenaBytes.
+ //
+ // This field is sufficient to determine if an allocation
+ // needs to be zeroed because the page allocator follows an
+ // address-ordered first-fit policy.
+ //
+ // Read atomically and written with an atomic CAS.
+ zeroedBase uintptr
}
// arenaHint is a hint for where to grow the heap arenas. See
@@ -298,13 +306,20 @@ type arenaHint struct {
// * During GC (gcphase != _GCoff), a span *must not* transition from
// manual or in-use to free. Because concurrent GC may read a pointer
// and then look up its span, the span state must be monotonic.
+//
+// Setting mspan.state to mSpanInUse or mSpanManual must be done
+// atomically and only after all other span fields are valid.
+// Likewise, if inspecting a span is contingent on it being
+// mSpanInUse, the state should be loaded atomically and checked
+// before depending on other fields. This allows the garbage collector
+// to safely deal with potentially invalid pointers, since resolving
+// such pointers may race with a span being allocated.
type mSpanState uint8
const (
mSpanDead mSpanState = iota
mSpanInUse // allocated for garbage collected heap
mSpanManual // allocated for manual management (e.g., stack allocator)
- mSpanFree
)
// mSpanStateNames are the names of the span states, indexed by
@@ -316,6 +331,21 @@ var mSpanStateNames = []string{
"mSpanFree",
}
+// mSpanStateBox holds an mSpanState and provides atomic operations on
+// it. This is a separate type to disallow accidental comparison or
+// assignment with mSpanState.
+type mSpanStateBox struct {
+ s mSpanState
+}
+
+func (b *mSpanStateBox) set(s mSpanState) {
+ atomic.Store8((*uint8)(&b.s), uint8(s))
+}
+
+func (b *mSpanStateBox) get() mSpanState {
+ return mSpanState(atomic.Load8((*uint8)(&b.s)))
+}
+
// mSpanList heads a linked list of spans.
//
//go:notinheap
@@ -397,19 +427,18 @@ type mspan struct {
// h->sweepgen is incremented by 2 after every GC
sweepgen uint32
- divMul uint16 // for divide by elemsize - divMagic.mul
- baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
- allocCount uint16 // number of allocated objects
- spanclass spanClass // size class and noscan (uint8)
- state mSpanState // mspaninuse etc
- needzero uint8 // needs to be zeroed before allocation
- divShift uint8 // for divide by elemsize - divMagic.shift
- divShift2 uint8 // for divide by elemsize - divMagic.shift2
- scavenged bool // whether this span has had its pages released to the OS
- elemsize uintptr // computed from sizeclass or from npages
- limit uintptr // end of data in span
- speciallock mutex // guards specials list
- specials *special // linked list of special records sorted by offset.
+ divMul uint16 // for divide by elemsize - divMagic.mul
+ baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
+ allocCount uint16 // number of allocated objects
+ spanclass spanClass // size class and noscan (uint8)
+ state mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
+ needzero uint8 // needs to be zeroed before allocation
+ divShift uint8 // for divide by elemsize - divMagic.shift
+ divShift2 uint8 // for divide by elemsize - divMagic.shift2
+ elemsize uintptr // computed from sizeclass or from npages
+ limit uintptr // end of data in span
+ speciallock mutex // guards specials list
+ specials *special // linked list of special records sorted by offset.
}
func (s *mspan) base() uintptr {
@@ -425,181 +454,6 @@ func (s *mspan) layout() (size, n, total uintptr) {
return
}
-// physPageBounds returns the start and end of the span
-// rounded in to the physical page size.
-func (s *mspan) physPageBounds() (uintptr, uintptr) {
- start := s.base()
- end := start + s.npages<<_PageShift
- if physPageSize > _PageSize {
- // Round start and end in.
- start = (start + physPageSize - 1) &^ (physPageSize - 1)
- end &^= physPageSize - 1
- }
- return start, end
-}
-
-func (h *mheap) coalesce(s *mspan) {
- // merge is a helper which merges other into s, deletes references to other
- // in heap metadata, and then discards it. other must be adjacent to s.
- merge := func(a, b, other *mspan) {
- // Caller must ensure a.startAddr < b.startAddr and that either a or
- // b is s. a and b must be adjacent. other is whichever of the two is
- // not s.
-
- if pageSize < physPageSize && a.scavenged && b.scavenged {
- // If we're merging two scavenged spans on systems where
- // pageSize < physPageSize, then their boundary should always be on
- // a physical page boundary, due to the realignment that happens
- // during coalescing. Throw if this case is no longer true, which
- // means the implementation should probably be changed to scavenge
- // along the boundary.
- _, start := a.physPageBounds()
- end, _ := b.physPageBounds()
- if start != end {
- println("runtime: a.base=", hex(a.base()), "a.npages=", a.npages)
- println("runtime: b.base=", hex(b.base()), "b.npages=", b.npages)
- println("runtime: physPageSize=", physPageSize, "pageSize=", pageSize)
- throw("neighboring scavenged spans boundary is not a physical page boundary")
- }
- }
-
- // Adjust s via base and npages and also in heap metadata.
- s.npages += other.npages
- s.needzero |= other.needzero
- if a == s {
- h.setSpan(s.base()+s.npages*pageSize-1, s)
- } else {
- s.startAddr = other.startAddr
- h.setSpan(s.base(), s)
- }
-
- // The size is potentially changing so the treap needs to delete adjacent nodes and
- // insert back as a combined node.
- h.free.removeSpan(other)
- other.state = mSpanDead
- h.spanalloc.free(unsafe.Pointer(other))
- }
-
- // realign is a helper which shrinks other and grows s such that their
- // boundary is on a physical page boundary.
- realign := func(a, b, other *mspan) {
- // Caller must ensure a.startAddr < b.startAddr and that either a or
- // b is s. a and b must be adjacent. other is whichever of the two is
- // not s.
-
- // If pageSize >= physPageSize then spans are always aligned
- // to physical page boundaries, so just exit.
- if pageSize >= physPageSize {
- return
- }
- // Since we're resizing other, we must remove it from the treap.
- h.free.removeSpan(other)
-
- // Round boundary to the nearest physical page size, toward the
- // scavenged span.
- boundary := b.startAddr
- if a.scavenged {
- boundary &^= (physPageSize - 1)
- } else {
- boundary = (boundary + physPageSize - 1) &^ (physPageSize - 1)
- }
- a.npages = (boundary - a.startAddr) / pageSize
- b.npages = (b.startAddr + b.npages*pageSize - boundary) / pageSize
- b.startAddr = boundary
-
- h.setSpan(boundary-1, a)
- h.setSpan(boundary, b)
-
- // Re-insert other now that it has a new size.
- h.free.insert(other)
- }
-
- hpMiddle := s.hugePages()
-
- // Coalesce with earlier, later spans.
- var hpBefore uintptr
- if before := spanOf(s.base() - 1); before != nil && before.state == mSpanFree {
- if s.scavenged == before.scavenged {
- hpBefore = before.hugePages()
- merge(before, s, before)
- } else {
- realign(before, s, before)
- }
- }
-
- // Now check to see if next (greater addresses) span is free and can be coalesced.
- var hpAfter uintptr
- if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == mSpanFree {
- if s.scavenged == after.scavenged {
- hpAfter = after.hugePages()
- merge(s, after, after)
- } else {
- realign(s, after, after)
- }
- }
- if !s.scavenged && s.hugePages() > hpBefore+hpMiddle+hpAfter {
- // If s has grown such that it now may contain more huge pages than it
- // and its now-coalesced neighbors did before, then mark the whole region
- // as huge-page-backable.
- //
- // Otherwise, on systems where we break up huge pages (like Linux)
- // s may not be backed by huge pages because it could be made up of
- // pieces which are broken up in the underlying VMA. The primary issue
- // with this is that it can lead to a poor estimate of the amount of
- // free memory backed by huge pages for determining the scavenging rate.
- //
- // TODO(mknyszek): Measure the performance characteristics of sysHugePage
- // and determine whether it makes sense to only sysHugePage on the pages
- // that matter, or if it's better to just mark the whole region.
- sysHugePage(unsafe.Pointer(s.base()), s.npages*pageSize)
- }
-}
-
-// hugePages returns the number of aligned physical huge pages in the memory
-// regioned owned by this mspan.
-func (s *mspan) hugePages() uintptr {
- if physHugePageSize == 0 || s.npages < physHugePageSize/pageSize {
- return 0
- }
- start := s.base()
- end := start + s.npages*pageSize
- if physHugePageSize > pageSize {
- // Round start and end in.
- start = (start + physHugePageSize - 1) &^ (physHugePageSize - 1)
- end &^= physHugePageSize - 1
- }
- if start < end {
- return (end - start) >> physHugePageShift
- }
- return 0
-}
-
-func (s *mspan) scavenge() uintptr {
- // start and end must be rounded in, otherwise madvise
- // will round them *out* and release more memory
- // than we want.
- start, end := s.physPageBounds()
- if end <= start {
- // start and end don't span a whole physical page.
- return 0
- }
- released := end - start
- memstats.heap_released += uint64(released)
- s.scavenged = true
- sysUnused(unsafe.Pointer(start), released)
- return released
-}
-
-// released returns the number of bytes in this span
-// which were returned back to the OS.
-func (s *mspan) released() uintptr {
- if !s.scavenged {
- return 0
- }
- start, end := s.physPageBounds()
- return end - start
-}
-
// recordspan adds a newly allocated span to h.allspans.
//
// This only happens the first time a span is allocated from
@@ -726,7 +580,7 @@ func inHeapOrStack(b uintptr) bool {
if s == nil || b < s.base() {
return false
}
- switch s.state {
+ switch s.state.get() {
case mSpanInUse, mSpanManual:
return b < s.limit
default:
@@ -793,9 +647,12 @@ func spanOfUnchecked(p uintptr) *mspan {
//go:nosplit
func spanOfHeap(p uintptr) *mspan {
s := spanOf(p)
- // If p is not allocated, it may point to a stale span, so we
- // have to check the span's bounds and state.
- if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse {
+ // s is nil if it's never been allocated. Otherwise, we check
+ // its state first because we don't trust this pointer, so we
+ // have to synchronize with span initialization. Then, it's
+ // still possible we picked up a stale span pointer, so we
+ // have to check the span's bounds.
+ if s == nil || s.state.get() != mSpanInUse || p < s.base() || p >= s.limit {
return nil
}
return s
@@ -813,7 +670,6 @@ func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8)
// Initialize the heap.
func (h *mheap) init() {
- h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
@@ -834,6 +690,8 @@ func (h *mheap) init() {
for i := range h.central {
h.central[i].mcentral.init(spanClass(i))
}
+
+ h.pages.init(&h.lock, &memstats.gc_sys)
}
// reclaim sweeps and reclaims at least npage pages into the heap.
@@ -954,7 +812,7 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
// Scan this bitmap chunk for spans that are in-use
// but have no marked objects on them.
for i := range inUse {
- inUseUnmarked := inUse[i] &^ marked[i]
+ inUseUnmarked := atomic.Load8(&inUse[i]) &^ marked[i]
if inUseUnmarked == 0 {
continue
}
@@ -973,7 +831,7 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
// spans were freed when we dropped the
// lock and we don't want to get stale
// pointers from the spans array.
- inUseUnmarked = inUse[i] &^ marked[i]
+ inUseUnmarked = atomic.Load8(&inUse[i]) &^ marked[i]
}
}
}
@@ -990,100 +848,23 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
return nFreed
}
-// alloc_m is the internal implementation of mheap.alloc.
-//
-// alloc_m must run on the system stack because it locks the heap, so
-// any stack growth during alloc_m would self-deadlock.
-//
-//go:systemstack
-func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
- _g_ := getg()
-
- // To prevent excessive heap growth, before allocating n pages
- // we need to sweep and reclaim at least n pages.
- if h.sweepdone == 0 {
- h.reclaim(npage)
- }
-
- lock(&h.lock)
- // transfer stats from cache to global
- memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
- _g_.m.mcache.local_scan = 0
- memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
- _g_.m.mcache.local_tinyallocs = 0
-
- s := h.allocSpanLocked(npage, &memstats.heap_inuse)
- if s != nil {
- // Record span info, because gc needs to be
- // able to map interior pointer to containing span.
- atomic.Store(&s.sweepgen, h.sweepgen)
- h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
- s.state = mSpanInUse
- s.allocCount = 0
- s.spanclass = spanclass
- if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
- s.elemsize = s.npages << _PageShift
- s.divShift = 0
- s.divMul = 0
- s.divShift2 = 0
- s.baseMask = 0
- } else {
- s.elemsize = uintptr(class_to_size[sizeclass])
- m := &class_to_divmagic[sizeclass]
- s.divShift = m.shift
- s.divMul = m.mul
- s.divShift2 = m.shift2
- s.baseMask = m.baseMask
- }
-
- // Mark in-use span in arena page bitmap.
- arena, pageIdx, pageMask := pageIndexOf(s.base())
- arena.pageInUse[pageIdx] |= pageMask
-
- // update stats, sweep lists
- h.pagesInUse += uint64(npage)
- if large {
- memstats.heap_objects++
- mheap_.largealloc += uint64(s.elemsize)
- mheap_.nlargealloc++
- atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
- }
- }
- // heap_scan and heap_live were updated.
- if gcBlackenEnabled != 0 {
- gcController.revise()
- }
-
- if trace.enabled {
- traceHeapAlloc()
- }
-
- // h.spans is accessed concurrently without synchronization
- // from other threads. Hence, there must be a store/store
- // barrier here to ensure the writes to h.spans above happen
- // before the caller can publish a pointer p to an object
- // allocated from s. As soon as this happens, the garbage
- // collector running on another processor could read p and
- // look up s in h.spans. The unlock acts as the barrier to
- // order these writes. On the read side, the data dependency
- // between p and the index in h.spans orders the reads.
- unlock(&h.lock)
- return s
-}
-
// alloc allocates a new span of npage pages from the GC'd heap.
//
-// Either large must be true or spanclass must indicates the span's
-// size class and scannability.
+// spanclass indicates the span's size class and scannability.
//
// If needzero is true, the memory for the returned span will be zeroed.
-func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan {
+func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
// Don't do any operations that lock the heap on the G stack.
// It might trigger stack growth, and the stack growth code needs
// to be able to allocate heap.
var s *mspan
systemstack(func() {
- s = h.alloc_m(npage, spanclass, large)
+ // To prevent excessive heap growth, before allocating n pages
+ // we need to sweep and reclaim at least n pages.
+ if h.sweepdone == 0 {
+ h.reclaim(npages)
+ }
+ s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse)
})
if s != nil {
@@ -1105,35 +886,12 @@ func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero b
// The memory backing the returned span may not be zeroed if
// span.needzero is set.
//
-// allocManual must be called on the system stack because it acquires
-// the heap lock. See mheap for details.
+// allocManual must be called on the system stack because it may
+// acquire the heap lock via allocSpan. See mheap for details.
//
//go:systemstack
-func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
- lock(&h.lock)
- s := h.allocSpanLocked(npage, stat)
- if s != nil {
- s.state = mSpanManual
- s.manualFreeList = 0
- s.allocCount = 0
- s.spanclass = 0
- s.nelems = 0
- s.elemsize = 0
- s.limit = s.base() + s.npages<<_PageShift
- // Manually managed memory doesn't count toward heap_sys.
- memstats.heap_sys -= uint64(s.npages << _PageShift)
- }
-
- // This unlock acts as a release barrier. See mheap.alloc_m.
- unlock(&h.lock)
-
- return s
-}
-
-// setSpan modifies the span map so spanOf(base) is s.
-func (h *mheap) setSpan(base uintptr, s *mspan) {
- ai := arenaIndex(base)
- h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s
+func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan {
+ return h.allocSpan(npages, true, 0, stat)
}
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
@@ -1152,93 +910,357 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
}
}
-// Allocates a span of the given size. h must be locked.
-// The returned span has been removed from the
-// free structures, but its state is still mSpanFree.
-func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
- t := h.free.find(npage)
- if t.valid() {
- goto HaveSpan
+// allocNeedsZero checks if the region of address space [base, base+npage*pageSize),
+// assumed to be allocated, needs to be zeroed, updating heap arena metadata for
+// future allocations.
+//
+// This must be called each time pages are allocated from the heap, even if the page
+// allocator can otherwise prove the memory it's allocating is already zero because
+// they're fresh from the operating system. It updates heapArena metadata that is
+// critical for future page allocations.
+//
+// There are no locking constraints on this method.
+func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
+ for npage > 0 {
+ ai := arenaIndex(base)
+ ha := h.arenas[ai.l1()][ai.l2()]
+
+ zeroedBase := atomic.Loaduintptr(&ha.zeroedBase)
+ arenaBase := base % heapArenaBytes
+ if arenaBase < zeroedBase {
+ // We extended into the non-zeroed part of the
+ // arena, so this region needs to be zeroed before use.
+ //
+ // zeroedBase is monotonically increasing, so if we see this now then
+ // we can be sure we need to zero this memory region.
+ //
+ // We still need to update zeroedBase for this arena, and
+ // potentially more arenas.
+ needZero = true
+ }
+ // We may observe arenaBase > zeroedBase if we're racing with one or more
+ // allocations which are acquiring memory directly before us in the address
+ // space. But, because we know no one else is acquiring *this* memory, it's
+ // still safe to not zero.
+
+ // Compute how far into the arena we extend into, capped
+ // at heapArenaBytes.
+ arenaLimit := arenaBase + npage*pageSize
+ if arenaLimit > heapArenaBytes {
+ arenaLimit = heapArenaBytes
+ }
+ // Increase ha.zeroedBase so it's >= arenaLimit.
+ // We may be racing with other updates.
+ for arenaLimit > zeroedBase {
+ if atomic.Casuintptr(&ha.zeroedBase, zeroedBase, arenaLimit) {
+ break
+ }
+ zeroedBase = atomic.Loaduintptr(&ha.zeroedBase)
+ // Sanity check zeroedBase.
+ if zeroedBase <= arenaLimit && zeroedBase > arenaBase {
+ // The zeroedBase moved into the space we were trying to
+ // claim. That's very bad, and indicates someone allocated
+ // the same region we did.
+ throw("potentially overlapping in-use allocations detected")
+ }
+ }
+
+ // Move base forward and subtract from npage to move into
+ // the next arena, or finish.
+ base += arenaLimit - arenaBase
+ npage -= (arenaLimit - arenaBase) / pageSize
}
- if !h.grow(npage) {
+ return
+}
+
+// tryAllocMSpan attempts to allocate an mspan object from
+// the P-local cache, but may fail.
+//
+// h need not be locked.
+//
+// This caller must ensure that its P won't change underneath
+// it during this function. Currently to ensure that we enforce
+// that the function is run on the system stack, because that's
+// the only place it is used now. In the future, this requirement
+// may be relaxed if its use is necessary elsewhere.
+//
+//go:systemstack
+func (h *mheap) tryAllocMSpan() *mspan {
+ pp := getg().m.p.ptr()
+ // If we don't have a p or the cache is empty, we can't do
+ // anything here.
+ if pp == nil || pp.mspancache.len == 0 {
return nil
}
- t = h.free.find(npage)
- if t.valid() {
- goto HaveSpan
+ // Pull off the last entry in the cache.
+ s := pp.mspancache.buf[pp.mspancache.len-1]
+ pp.mspancache.len--
+ return s
+}
+
+// allocMSpanLocked allocates an mspan object.
+//
+// h must be locked.
+//
+// allocMSpanLocked must be called on the system stack because
+// its caller holds the heap lock. See mheap for details.
+// Running on the system stack also ensures that we won't
+// switch Ps during this function. See tryAllocMSpan for details.
+//
+//go:systemstack
+func (h *mheap) allocMSpanLocked() *mspan {
+ pp := getg().m.p.ptr()
+ if pp == nil {
+ // We don't have a p so just do the normal thing.
+ return (*mspan)(h.spanalloc.alloc())
+ }
+ // Refill the cache if necessary.
+ if pp.mspancache.len == 0 {
+ const refillCount = len(pp.mspancache.buf) / 2
+ for i := 0; i < refillCount; i++ {
+ pp.mspancache.buf[i] = (*mspan)(h.spanalloc.alloc())
+ }
+ pp.mspancache.len = refillCount
}
- throw("grew heap, but no adequate free span found")
+ // Pull off the last entry in the cache.
+ s := pp.mspancache.buf[pp.mspancache.len-1]
+ pp.mspancache.len--
+ return s
+}
-HaveSpan:
- s := t.span()
- if s.state != mSpanFree {
- throw("candidate mspan for allocation is not free")
- }
-
- // First, subtract any memory that was released back to
- // the OS from s. We will add back what's left if necessary.
- memstats.heap_released -= uint64(s.released())
-
- if s.npages == npage {
- h.free.erase(t)
- } else if s.npages > npage {
- // Trim off the lower bits and make that our new span.
- // Do this in-place since this operation does not
- // affect the original span's location in the treap.
- n := (*mspan)(h.spanalloc.alloc())
- h.free.mutate(t, func(s *mspan) {
- n.init(s.base(), npage)
- s.npages -= npage
- s.startAddr = s.base() + npage*pageSize
- h.setSpan(s.base()-1, n)
- h.setSpan(s.base(), s)
- h.setSpan(n.base(), n)
- n.needzero = s.needzero
- // n may not be big enough to actually be scavenged, but that's fine.
- // We still want it to appear to be scavenged so that we can do the
- // right bookkeeping later on in this function (i.e. sysUsed).
- n.scavenged = s.scavenged
- // Check if s is still scavenged.
- if s.scavenged {
- start, end := s.physPageBounds()
- if start < end {
- memstats.heap_released += uint64(end - start)
- } else {
- s.scavenged = false
- }
+// freeMSpanLocked free an mspan object.
+//
+// h must be locked.
+//
+// freeMSpanLocked must be called on the system stack because
+// its caller holds the heap lock. See mheap for details.
+// Running on the system stack also ensures that we won't
+// switch Ps during this function. See tryAllocMSpan for details.
+//
+//go:systemstack
+func (h *mheap) freeMSpanLocked(s *mspan) {
+ pp := getg().m.p.ptr()
+ // First try to free the mspan directly to the cache.
+ if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
+ pp.mspancache.buf[pp.mspancache.len] = s
+ pp.mspancache.len++
+ return
+ }
+ // Failing that (or if we don't have a p), just free it to
+ // the heap.
+ h.spanalloc.free(unsafe.Pointer(s))
+}
+
+// allocSpan allocates an mspan which owns npages worth of memory.
+//
+// If manual == false, allocSpan allocates a heap span of class spanclass
+// and updates heap accounting. If manual == true, allocSpan allocates a
+// manually-managed span (spanclass is ignored), and the caller is
+// responsible for any accounting related to its use of the span. Either
+// way, allocSpan will atomically add the bytes in the newly allocated
+// span to *sysStat.
+//
+// The returned span is fully initialized.
+//
+// h must not be locked.
+//
+// allocSpan must be called on the system stack both because it acquires
+// the heap lock and because it must block GC transitions.
+//
+//go:systemstack
+func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) {
+ // Function-global state.
+ gp := getg()
+ base, scav := uintptr(0), uintptr(0)
+
+ // If the allocation is small enough, try the page cache!
+ pp := gp.m.p.ptr()
+ if pp != nil && npages < pageCachePages/4 {
+ c := &pp.pcache
+
+ // If the cache is empty, refill it.
+ if c.empty() {
+ lock(&h.lock)
+ *c = h.pages.allocToCache()
+ unlock(&h.lock)
+ }
+
+ // Try to allocate from the cache.
+ base, scav = c.alloc(npages)
+ if base != 0 {
+ s = h.tryAllocMSpan()
+
+ if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
+ goto HaveSpan
}
- })
- s = n
- } else {
- throw("candidate mspan for allocation is too small")
+ // We're either running duing GC, failed to acquire a mspan,
+ // or the allocation is for a large object. This means we
+ // have to lock the heap and do a bunch of extra work,
+ // so go down the HaveBaseLocked path.
+ //
+ // We must do this during GC to avoid skew with heap_scan
+ // since we flush mcache stats whenever we lock.
+ //
+ // TODO(mknyszek): It would be nice to not have to
+ // lock the heap if it's a large allocation, but
+ // it's fine for now. The critical section here is
+ // short and large object allocations are relatively
+ // infrequent.
+ }
}
- // "Unscavenge" s only AFTER splitting so that
- // we only sysUsed whatever we actually need.
- if s.scavenged {
+
+ // For one reason or another, we couldn't get the
+ // whole job done without the heap lock.
+ lock(&h.lock)
+
+ if base == 0 {
+ // Try to acquire a base address.
+ base, scav = h.pages.alloc(npages)
+ if base == 0 {
+ if !h.grow(npages) {
+ unlock(&h.lock)
+ return nil
+ }
+ base, scav = h.pages.alloc(npages)
+ if base == 0 {
+ throw("grew heap, but no adequate free space found")
+ }
+ }
+ }
+ if s == nil {
+ // We failed to get an mspan earlier, so grab
+ // one now that we have the heap lock.
+ s = h.allocMSpanLocked()
+ }
+ if !manual {
+ // This is a heap span, so we should do some additional accounting
+ // which may only be done with the heap locked.
+
+ // Transfer stats from mcache to global.
+ memstats.heap_scan += uint64(gp.m.mcache.local_scan)
+ gp.m.mcache.local_scan = 0
+ memstats.tinyallocs += uint64(gp.m.mcache.local_tinyallocs)
+ gp.m.mcache.local_tinyallocs = 0
+
+ // Do some additional accounting if it's a large allocation.
+ if spanclass.sizeclass() == 0 {
+ mheap_.largealloc += uint64(npages * pageSize)
+ mheap_.nlargealloc++
+ atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
+ }
+
+ // Either heap_live or heap_scan could have been updated.
+ if gcBlackenEnabled != 0 {
+ gcController.revise()
+ }
+ }
+ unlock(&h.lock)
+
+HaveSpan:
+ // At this point, both s != nil and base != 0, and the heap
+ // lock is no longer held. Initialize the span.
+ s.init(base, npages)
+ if h.allocNeedsZero(base, npages) {
+ s.needzero = 1
+ }
+ nbytes := npages * pageSize
+ if manual {
+ s.manualFreeList = 0
+ s.nelems = 0
+ s.limit = s.base() + s.npages*pageSize
+ // Manually managed memory doesn't count toward heap_sys.
+ mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
+ s.state.set(mSpanManual)
+ } else {
+ // We must set span properties before the span is published anywhere
+ // since we're not holding the heap lock.
+ s.spanclass = spanclass
+ if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
+ s.elemsize = nbytes
+ s.nelems = 1
+
+ s.divShift = 0
+ s.divMul = 0
+ s.divShift2 = 0
+ s.baseMask = 0
+ } else {
+ s.elemsize = uintptr(class_to_size[sizeclass])
+ s.nelems = nbytes / s.elemsize
+
+ m := &class_to_divmagic[sizeclass]
+ s.divShift = m.shift
+ s.divMul = m.mul
+ s.divShift2 = m.shift2
+ s.baseMask = m.baseMask
+ }
+
+ // Initialize mark and allocation structures.
+ s.freeindex = 0
+ s.allocCache = ^uint64(0) // all 1s indicating all free.
+ s.gcmarkBits = newMarkBits(s.nelems)
+ s.allocBits = newAllocBits(s.nelems)
+
+ // It's safe to access h.sweepgen without the heap lock because it's
+ // only ever updated with the world stopped and we run on the
+ // systemstack which blocks a STW transition.
+ atomic.Store(&s.sweepgen, h.sweepgen)
+
+ // Now that the span is filled in, set its state. This
+ // is a publication barrier for the other fields in
+ // the span. While valid pointers into this span
+ // should never be visible until the span is returned,
+ // if the garbage collector finds an invalid pointer,
+ // access to the span may race with initialization of
+ // the span. We resolve this race by atomically
+ // setting the state after the span is fully
+ // initialized, and atomically checking the state in
+ // any situation where a pointer is suspect.
+ s.state.set(mSpanInUse)
+ }
+
+ // Commit and account for any scavenged memory that the span now owns.
+ if scav != 0 {
// sysUsed all the pages that are actually available
- // in the span. Note that we don't need to decrement
- // heap_released since we already did so earlier.
- sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift)
- s.scavenged = false
-
- // Since we allocated out of a scavenged span, we just
- // grew the RSS. Mitigate this by scavenging enough free
- // space to make up for it but only if we need to.
- //
- // scavengeLocked may cause coalescing, so prevent
- // coalescing with s by temporarily changing its state.
- s.state = mSpanManual
- h.scavengeIfNeededLocked(s.npages * pageSize)
- s.state = mSpanFree
+ // in the span since some of them might be scavenged.
+ sysUsed(unsafe.Pointer(base), nbytes)
+ mSysStatDec(&memstats.heap_released, scav)
}
+ // Update stats.
+ mSysStatInc(sysStat, nbytes)
+ mSysStatDec(&memstats.heap_idle, nbytes)
- h.setSpans(s.base(), npage, s)
+ // Publish the span in various locations.
- *stat += uint64(npage << _PageShift)
- memstats.heap_idle -= uint64(npage << _PageShift)
+ // This is safe to call without the lock held because the slots
+ // related to this span will only every be read or modified by
+ // this thread until pointers into the span are published or
+ // pageInUse is updated.
+ h.setSpans(s.base(), npages, s)
- if s.inList() {
- throw("still in list")
+ if !manual {
+ // Add to swept in-use list.
+ //
+ // This publishes the span to root marking.
+ //
+ // h.sweepgen is guaranteed to only change during STW,
+ // and preemption is disabled in the page allocator.
+ h.sweepSpans[h.sweepgen/2%2].push(s)
+
+ // Mark in-use span in arena page bitmap.
+ //
+ // This publishes the span to the page sweeper, so
+ // it's imperative that the span be completely initialized
+ // prior to this line.
+ arena, pageIdx, pageMask := pageIndexOf(s.base())
+ atomic.Or8(&arena.pageInUse[pageIdx], pageMask)
+
+ // Update related page sweeper stats.
+ atomic.Xadd64(&h.pagesInUse, int64(npages))
+
+ if trace.enabled {
+ // Trace that a heap alloc occurred.
+ traceHeapAlloc()
+ }
}
return s
}
@@ -1248,37 +1270,73 @@ HaveSpan:
//
// h must be locked.
func (h *mheap) grow(npage uintptr) bool {
- ask := npage << _PageShift
- v, size := h.sysAlloc(ask)
- if v == nil {
- print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
- return false
- }
+ // We must grow the heap in whole palloc chunks.
+ ask := alignUp(npage, pallocChunkPages) * pageSize
+
+ totalGrowth := uintptr(0)
+ nBase := alignUp(h.curArena.base+ask, physPageSize)
+ if nBase > h.curArena.end {
+ // Not enough room in the current arena. Allocate more
+ // arena space. This may not be contiguous with the
+ // current arena, so we have to request the full ask.
+ av, asize := h.sysAlloc(ask)
+ if av == nil {
+ print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+ return false
+ }
+
+ if uintptr(av) == h.curArena.end {
+ // The new space is contiguous with the old
+ // space, so just extend the current space.
+ h.curArena.end = uintptr(av) + asize
+ } else {
+ // The new space is discontiguous. Track what
+ // remains of the current space and switch to
+ // the new space. This should be rare.
+ if size := h.curArena.end - h.curArena.base; size != 0 {
+ h.pages.grow(h.curArena.base, size)
+ totalGrowth += size
+ }
+ // Switch to the new space.
+ h.curArena.base = uintptr(av)
+ h.curArena.end = uintptr(av) + asize
+ }
- // Create a fake "in use" span and free it, so that the
- // right accounting and coalescing happens.
- s := (*mspan)(h.spanalloc.alloc())
- s.init(uintptr(v), size/pageSize)
- h.setSpans(s.base(), s.npages, s)
- s.state = mSpanFree
- memstats.heap_idle += uint64(size)
- // (*mheap).sysAlloc returns untouched/uncommitted memory.
- s.scavenged = true
- // s is always aligned to the heap arena size which is always > physPageSize,
- // so its totally safe to just add directly to heap_released. Coalescing,
- // if possible, will also always be correct in terms of accounting, because
- // s.base() must be a physical page boundary.
- memstats.heap_released += uint64(size)
- h.coalesce(s)
- h.free.insert(s)
+ // The memory just allocated counts as both released
+ // and idle, even though it's not yet backed by spans.
+ //
+ // The allocation is always aligned to the heap arena
+ // size which is always > physPageSize, so its safe to
+ // just add directly to heap_released.
+ mSysStatInc(&memstats.heap_released, asize)
+ mSysStatInc(&memstats.heap_idle, asize)
+
+ // Recalculate nBase
+ nBase = alignUp(h.curArena.base+ask, physPageSize)
+ }
+
+ // Grow into the current arena.
+ v := h.curArena.base
+ h.curArena.base = nBase
+ h.pages.grow(v, nBase-v)
+ totalGrowth += nBase - v
+
+ // We just caused a heap growth, so scavenge down what will soon be used.
+ // By scavenging inline we deal with the failure to allocate out of
+ // memory fragments by scavenging the memory fragments that are least
+ // likely to be re-used.
+ if retained := heapRetained(); retained+uint64(totalGrowth) > h.scavengeGoal {
+ todo := totalGrowth
+ if overage := uintptr(retained + uint64(totalGrowth) - h.scavengeGoal); todo > overage {
+ todo = overage
+ }
+ h.pages.scavenge(todo, true)
+ }
return true
}
// Free the span back into the heap.
-//
-// large must match the value of large passed to mheap.alloc. This is
-// used for accounting.
-func (h *mheap) freeSpan(s *mspan, large bool) {
+func (h *mheap) freeSpan(s *mspan) {
systemstack(func() {
mp := getg().m
lock(&h.lock)
@@ -1292,10 +1350,6 @@ func (h *mheap) freeSpan(s *mspan, large bool) {
bytes := s.npages << _PageShift
msanfree(base, bytes)
}
- if large {
- // Match accounting done in mheap.alloc.
- memstats.heap_objects--
- }
if gcBlackenEnabled != 0 {
// heap_scan changed.
gcController.revise()
@@ -1319,14 +1373,14 @@ func (h *mheap) freeSpan(s *mspan, large bool) {
func (h *mheap) freeManual(s *mspan, stat *uint64) {
s.needzero = 1
lock(&h.lock)
- *stat -= uint64(s.npages << _PageShift)
- memstats.heap_sys += uint64(s.npages << _PageShift)
+ mSysStatDec(stat, s.npages*pageSize)
+ mSysStatInc(&memstats.heap_sys, s.npages*pageSize)
h.freeSpanLocked(s, false, true)
unlock(&h.lock)
}
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) {
- switch s.state {
+ switch s.state.get() {
case mSpanManual:
if s.allocCount != 0 {
throw("mheap.freeSpanLocked - invalid stack free")
@@ -1336,140 +1390,28 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) {
print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
throw("mheap.freeSpanLocked - invalid free")
}
- h.pagesInUse -= uint64(s.npages)
+ atomic.Xadd64(&h.pagesInUse, -int64(s.npages))
// Clear in-use bit in arena page bitmap.
arena, pageIdx, pageMask := pageIndexOf(s.base())
- arena.pageInUse[pageIdx] &^= pageMask
+ atomic.And8(&arena.pageInUse[pageIdx], ^pageMask)
default:
throw("mheap.freeSpanLocked - invalid span state")
}
if acctinuse {
- memstats.heap_inuse -= uint64(s.npages << _PageShift)
+ mSysStatDec(&memstats.heap_inuse, s.npages*pageSize)
}
if acctidle {
- memstats.heap_idle += uint64(s.npages << _PageShift)
+ mSysStatInc(&memstats.heap_idle, s.npages*pageSize)
}
- s.state = mSpanFree
-
- // Coalesce span with neighbors.
- h.coalesce(s)
- // Insert s into the treap.
- h.free.insert(s)
-}
+ // Mark the space as free.
+ h.pages.free(s.base(), s.npages)
-// scavengeSplit takes t.span() and attempts to split off a span containing size
-// (in bytes) worth of physical pages from the back.
-//
-// The split point is only approximately defined by size since the split point
-// is aligned to physPageSize and pageSize every time. If physHugePageSize is
-// non-zero and the split point would break apart a huge page in the span, then
-// the split point is also aligned to physHugePageSize.
-//
-// If the desired split point ends up at the base of s, or if size is obviously
-// much larger than s, then a split is not possible and this method returns nil.
-// Otherwise if a split occurred it returns the newly-created span.
-func (h *mheap) scavengeSplit(t treapIter, size uintptr) *mspan {
- s := t.span()
- start, end := s.physPageBounds()
- if end <= start || end-start <= size {
- // Size covers the whole span.
- return nil
- }
- // The span is bigger than what we need, so compute the base for the new
- // span if we decide to split.
- base := end - size
- // Round down to the next physical or logical page, whichever is bigger.
- base &^= (physPageSize - 1) | (pageSize - 1)
- if base <= start {
- return nil
- }
- if physHugePageSize > pageSize && base&^(physHugePageSize-1) >= start {
- // We're in danger of breaking apart a huge page, so include the entire
- // huge page in the bound by rounding down to the huge page size.
- // base should still be aligned to pageSize.
- base &^= physHugePageSize - 1
- }
- if base == start {
- // After all that we rounded base down to s.base(), so no need to split.
- return nil
- }
- if base < start {
- print("runtime: base=", base, ", s.npages=", s.npages, ", s.base()=", s.base(), ", size=", size, "\n")
- print("runtime: physPageSize=", physPageSize, ", physHugePageSize=", physHugePageSize, "\n")
- throw("bad span split base")
- }
-
- // Split s in-place, removing from the back.
- n := (*mspan)(h.spanalloc.alloc())
- nbytes := s.base() + s.npages*pageSize - base
- h.free.mutate(t, func(s *mspan) {
- n.init(base, nbytes/pageSize)
- s.npages -= nbytes / pageSize
- h.setSpan(n.base()-1, s)
- h.setSpan(n.base(), n)
- h.setSpan(n.base()+nbytes-1, n)
- n.needzero = s.needzero
- n.state = s.state
- })
- return n
-}
-
-// scavengeLocked scavenges nbytes worth of spans in the free treap by
-// starting from the span with the highest base address and working down.
-// It then takes those spans and places them in scav.
-//
-// Returns the amount of memory scavenged in bytes. h must be locked.
-func (h *mheap) scavengeLocked(nbytes uintptr) uintptr {
- released := uintptr(0)
- // Iterate over spans with huge pages first, then spans without.
- const mask = treapIterScav | treapIterHuge
- for _, match := range []treapIterType{treapIterHuge, 0} {
- // Iterate over the treap backwards (from highest address to lowest address)
- // scavenging spans until we've reached our quota of nbytes.
- for t := h.free.end(mask, match); released < nbytes && t.valid(); {
- s := t.span()
- start, end := s.physPageBounds()
- if start >= end {
- // This span doesn't cover at least one physical page, so skip it.
- t = t.prev()
- continue
- }
- n := t.prev()
- if span := h.scavengeSplit(t, nbytes-released); span != nil {
- s = span
- } else {
- h.free.erase(t)
- }
- released += s.scavenge()
- // Now that s is scavenged, we must eagerly coalesce it
- // with its neighbors to prevent having two spans with
- // the same scavenged state adjacent to each other.
- h.coalesce(s)
- t = n
- h.free.insert(s)
- }
- }
- return released
-}
-
-// scavengeIfNeededLocked calls scavengeLocked if we're currently above the
-// scavenge goal in order to prevent the mutator from out-running the
-// the scavenger.
-//
-// h must be locked.
-func (h *mheap) scavengeIfNeededLocked(size uintptr) {
- if r := heapRetained(); r+uint64(size) > h.scavengeRetainedGoal {
- todo := uint64(size)
- // If we're only going to go a little bit over, just request what
- // we actually need done.
- if overage := r + uint64(size) - h.scavengeRetainedGoal; overage < todo {
- todo = overage
- }
- h.scavengeLocked(uintptr(todo))
- }
+ // Free the span structure. We no longer have a use for it.
+ s.state.set(mSpanDead)
+ h.freeMSpanLocked(s)
}
// scavengeAll visits each node in the free treap and scavenges the
@@ -1477,12 +1419,14 @@ func (h *mheap) scavengeIfNeededLocked(size uintptr) {
// unscav and adds it into scav before continuing.
func (h *mheap) scavengeAll() {
// Disallow malloc or panic while holding the heap lock. We do
- // this here because this is an non-mallocgc entry-point to
+ // this here because this is a non-mallocgc entry-point to
// the mheap API.
gp := getg()
gp.m.mallocing++
lock(&h.lock)
- released := h.scavengeLocked(^uintptr(0))
+ // Reset the scavenger address so we have access to the whole heap.
+ h.pages.resetScavengeAddr()
+ released := h.pages.scavenge(^uintptr(0), true)
unlock(&h.lock)
gp.m.mallocing--
@@ -1511,14 +1455,13 @@ func (span *mspan) init(base uintptr, npages uintptr) {
span.allocCount = 0
span.spanclass = 0
span.elemsize = 0
- span.state = mSpanDead
- span.scavenged = false
span.speciallock.key = 0
span.specials = nil
span.needzero = 0
span.freeindex = 0
span.allocBits = nil
span.gcmarkBits = nil
+ span.state.set(mSpanDead)
}
func (span *mspan) inList() bool {
diff --git a/libgo/go/runtime/mkpreempt.go b/libgo/go/runtime/mkpreempt.go
new file mode 100644
index 0000000..615ec18
--- /dev/null
+++ b/libgo/go/runtime/mkpreempt.go
@@ -0,0 +1,522 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// mkpreempt generates the asyncPreempt functions for each
+// architecture.
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "strings"
+)
+
+// Copied from cmd/compile/internal/ssa/gen/*Ops.go
+
+var regNames386 = []string{
+ "AX",
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+ "X0",
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
+}
+
+var regNamesAMD64 = []string{
+ "AX",
+ "CX",
+ "DX",
+ "BX",
+ "SP",
+ "BP",
+ "SI",
+ "DI",
+ "R8",
+ "R9",
+ "R10",
+ "R11",
+ "R12",
+ "R13",
+ "R14",
+ "R15",
+ "X0",
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
+ "X8",
+ "X9",
+ "X10",
+ "X11",
+ "X12",
+ "X13",
+ "X14",
+ "X15",
+}
+
+var out io.Writer
+
+var arches = map[string]func(){
+ "386": gen386,
+ "amd64": genAMD64,
+ "arm": genARM,
+ "arm64": genARM64,
+ "mips64x": func() { genMIPS(true) },
+ "mipsx": func() { genMIPS(false) },
+ "ppc64x": genPPC64,
+ "s390x": genS390X,
+ "wasm": genWasm,
+}
+var beLe = map[string]bool{"mips64x": true, "mipsx": true, "ppc64x": true}
+
+func main() {
+ flag.Parse()
+ if flag.NArg() > 0 {
+ out = os.Stdout
+ for _, arch := range flag.Args() {
+ gen, ok := arches[arch]
+ if !ok {
+ log.Fatalf("unknown arch %s", arch)
+ }
+ header(arch)
+ gen()
+ }
+ return
+ }
+
+ for arch, gen := range arches {
+ f, err := os.Create(fmt.Sprintf("preempt_%s.s", arch))
+ if err != nil {
+ log.Fatal(err)
+ }
+ out = f
+ header(arch)
+ gen()
+ if err := f.Close(); err != nil {
+ log.Fatal(err)
+ }
+ }
+}
+
+func header(arch string) {
+ fmt.Fprintf(out, "// Code generated by mkpreempt.go; DO NOT EDIT.\n\n")
+ if beLe[arch] {
+ base := arch[:len(arch)-1]
+ fmt.Fprintf(out, "// +build %s %sle\n\n", base, base)
+ }
+ fmt.Fprintf(out, "#include \"go_asm.h\"\n")
+ fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
+ fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
+}
+
+func p(f string, args ...interface{}) {
+ fmted := fmt.Sprintf(f, args...)
+ fmt.Fprintf(out, "\t%s\n", strings.Replace(fmted, "\n", "\n\t", -1))
+}
+
+func label(l string) {
+ fmt.Fprintf(out, "%s\n", l)
+}
+
+type layout struct {
+ stack int
+ regs []regPos
+ sp string // stack pointer register
+}
+
+type regPos struct {
+ pos int
+
+ op string
+ reg string
+
+ // If this register requires special save and restore, these
+ // give those operations with a %d placeholder for the stack
+ // offset.
+ save, restore string
+}
+
+func (l *layout) add(op, reg string, size int) {
+ l.regs = append(l.regs, regPos{op: op, reg: reg, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) addSpecial(save, restore string, size int) {
+ l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack})
+ l.stack += size
+}
+
+func (l *layout) save() {
+ for _, reg := range l.regs {
+ if reg.save != "" {
+ p(reg.save, reg.pos)
+ } else {
+ p("%s %s, %d(%s)", reg.op, reg.reg, reg.pos, l.sp)
+ }
+ }
+}
+
+func (l *layout) restore() {
+ for i := len(l.regs) - 1; i >= 0; i-- {
+ reg := l.regs[i]
+ if reg.restore != "" {
+ p(reg.restore, reg.pos)
+ } else {
+ p("%s %d(%s), %s", reg.op, reg.pos, l.sp, reg.reg)
+ }
+ }
+}
+
+func gen386() {
+ p("PUSHFL")
+
+ // Save general purpose registers.
+ var l = layout{sp: "SP"}
+ for _, reg := range regNames386 {
+ if reg == "SP" || strings.HasPrefix(reg, "X") {
+ continue
+ }
+ l.add("MOVL", reg, 4)
+ }
+
+ // Save the 387 state.
+ l.addSpecial(
+ "FSAVE %d(SP)\nFLDCW runtime·controlWord64(SB)",
+ "FRSTOR %d(SP)",
+ 108)
+
+ // Save SSE state only if supported.
+ lSSE := layout{stack: l.stack, sp: "SP"}
+ for i := 0; i < 8; i++ {
+ lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16)
+ }
+
+ p("ADJSP $%d", lSSE.stack)
+ p("NOP SP")
+ l.save()
+ p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
+ lSSE.save()
+ label("nosse:")
+ p("CALL ·asyncPreempt2(SB)")
+ p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
+ lSSE.restore()
+ label("nosse2:")
+ l.restore()
+ p("ADJSP $%d", -lSSE.stack)
+
+ p("POPFL")
+ p("RET")
+}
+
+func genAMD64() {
+ // Assign stack offsets.
+ var l = layout{sp: "SP"}
+ for _, reg := range regNamesAMD64 {
+ if reg == "SP" || reg == "BP" {
+ continue
+ }
+ if strings.HasPrefix(reg, "X") {
+ l.add("MOVUPS", reg, 16)
+ } else {
+ l.add("MOVQ", reg, 8)
+ }
+ }
+
+ // TODO: MXCSR register?
+
+ p("PUSHQ BP")
+ p("MOVQ SP, BP")
+ p("// Save flags before clobbering them")
+ p("PUSHFQ")
+ p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
+ p("ADJSP $%d", l.stack)
+ p("// But vet doesn't know ADJSP, so suppress vet stack checking")
+ p("NOP SP")
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+ p("ADJSP $%d", -l.stack)
+ p("POPFQ")
+ p("POPQ BP")
+ p("RET")
+}
+
+func genARM() {
+ // Add integer registers R0-R12.
+ // R13 (SP), R14 (LR), R15 (PC) are special and not saved here.
+ var l = layout{sp: "R13", stack: 4} // add LR slot
+ for i := 0; i <= 12; i++ {
+ reg := fmt.Sprintf("R%d", i)
+ if i == 10 {
+ continue // R10 is g register, no need to save/restore
+ }
+ l.add("MOVW", reg, 4)
+ }
+ // Add flag register.
+ l.addSpecial(
+ "MOVW CPSR, R0\nMOVW R0, %d(R13)",
+ "MOVW %d(R13), R0\nMOVW R0, CPSR",
+ 4)
+
+ // Add floating point registers F0-F15 and flag register.
+ var lfp = layout{stack: l.stack, sp: "R13"}
+ lfp.addSpecial(
+ "MOVW FPCR, R0\nMOVW R0, %d(R13)",
+ "MOVW %d(R13), R0\nMOVW R0, FPCR",
+ 4)
+ for i := 0; i <= 15; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ lfp.add("MOVD", reg, 8)
+ }
+
+ p("MOVW.W R14, -%d(R13)", lfp.stack) // allocate frame, save LR
+ l.save()
+ p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp") // test goarm, and skip FP registers if goarm=5.
+ lfp.save()
+ label("nofp:")
+ p("CALL ·asyncPreempt2(SB)")
+ p("MOVB ·goarm(SB), R0\nCMP $6, R0\nBLT nofp2") // test goarm, and skip FP registers if goarm=5.
+ lfp.restore()
+ label("nofp2:")
+ l.restore()
+
+ p("MOVW %d(R13), R14", lfp.stack) // sigctxt.pushCall pushes LR on stack, restore it
+ p("MOVW.P %d(R13), R15", lfp.stack+4) // load PC, pop frame (including the space pushed by sigctxt.pushCall)
+ p("UNDEF") // shouldn't get here
+}
+
+func genARM64() {
+ // Add integer registers R0-R26
+ // R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
+ // and not saved here.
+ var l = layout{sp: "RSP", stack: 8} // add slot to save PC of interrupted instruction
+ for i := 0; i <= 26; i++ {
+ if i == 18 {
+ continue // R18 is not used, skip
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add("MOVD", reg, 8)
+ }
+ // Add flag registers.
+ l.addSpecial(
+ "MOVD NZCV, R0\nMOVD R0, %d(RSP)",
+ "MOVD %d(RSP), R0\nMOVD R0, NZCV",
+ 8)
+ l.addSpecial(
+ "MOVD FPSR, R0\nMOVD R0, %d(RSP)",
+ "MOVD %d(RSP), R0\nMOVD R0, FPSR",
+ 8)
+ // TODO: FPCR? I don't think we'll change it, so no need to save.
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+ if l.stack%16 != 0 {
+ l.stack += 8 // SP needs 16-byte alignment
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p("MOVD R30, %d(RSP)", -l.stack)
+ p("SUB $%d, RSP", l.stack)
+ p("#ifdef GOOS_linux")
+ p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
+ p("SUB $8, RSP, R29") // set up new frame pointer
+ p("#endif")
+ // On darwin, save the LR again after decrementing SP. We run the
+ // signal handler on the G stack (as it doesn't support SA_ONSTACK),
+ // so any writes below SP may be clobbered.
+ p("#ifdef GOOS_darwin")
+ p("MOVD R30, (RSP)")
+ p("#endif")
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("#ifdef GOOS_linux")
+ p("MOVD -8(RSP), R29") // restore frame pointer
+ p("#endif")
+ p("MOVD (RSP), R27") // load PC to REGTMP
+ p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R27)")
+}
+
+func genMIPS(_64bit bool) {
+ mov := "MOVW"
+ movf := "MOVF"
+ add := "ADD"
+ sub := "SUB"
+ r28 := "R28"
+ regsize := 4
+ if _64bit {
+ mov = "MOVV"
+ movf = "MOVD"
+ add = "ADDV"
+ sub = "SUBV"
+ r28 = "RSB"
+ regsize = 8
+ }
+
+ // Add integer registers R1-R22, R24-R25, R28
+ // R0 (zero), R23 (REGTMP), R29 (SP), R30 (g), R31 (LR) are special,
+ // and not saved here. R26 and R27 are reserved by kernel and not used.
+ var l = layout{sp: "R29", stack: regsize} // add slot to save PC of interrupted instruction (in LR)
+ for i := 1; i <= 25; i++ {
+ if i == 23 {
+ continue // R23 is REGTMP
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add(mov, reg, regsize)
+ }
+ l.add(mov, r28, regsize)
+ l.addSpecial(
+ mov+" HI, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, HI",
+ regsize)
+ l.addSpecial(
+ mov+" LO, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, LO",
+ regsize)
+ // Add floating point control/status register FCR31 (FCR0-FCR30 are irrelevant)
+ l.addSpecial(
+ mov+" FCR31, R1\n"+mov+" R1, %d(R29)",
+ mov+" %d(R29), R1\n"+mov+" R1, FCR31",
+ regsize)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add(movf, reg, regsize)
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR)
+ p(mov+" R31, -%d(R29)", l.stack)
+ p(sub+" $%d, R29", l.stack)
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p(mov+" %d(R29), R31", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p(mov + " (R29), R23") // load PC to REGTMP
+ p(add+" $%d, R29", l.stack+regsize) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (R23)")
+}
+
+func genPPC64() {
+ // Add integer registers R3-R29
+ // R0 (zero), R1 (SP), R30 (g) are special and not saved here.
+ // R2 (TOC pointer in PIC mode), R12 (function entry address in PIC mode) have been saved in sigctxt.pushCall.
+ // R31 (REGTMP) will be saved manually.
+ var l = layout{sp: "R1", stack: 32 + 8} // MinFrameSize on PPC64, plus one word for saving R31
+ for i := 3; i <= 29; i++ {
+ if i == 12 || i == 13 {
+ // R12 has been saved in sigctxt.pushCall.
+ // R13 is TLS pointer, not used by Go code. we must NOT
+ // restore it, otherwise if we parked and resumed on a
+ // different thread we'll mess up TLS addresses.
+ continue
+ }
+ reg := fmt.Sprintf("R%d", i)
+ l.add("MOVD", reg, 8)
+ }
+ l.addSpecial(
+ "MOVW CR, R31\nMOVW R31, %d(R1)",
+ "MOVW %d(R1), R31\nMOVFL R31, $0xff", // this is MOVW R31, CR
+ 8) // CR is 4-byte wide, but just keep the alignment
+ l.addSpecial(
+ "MOVD XER, R31\nMOVD R31, %d(R1)",
+ "MOVD %d(R1), R31\nMOVD R31, XER",
+ 8)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 31; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+ // Add floating point control/status register FPSCR.
+ l.addSpecial(
+ "MOVFL FPSCR, F0\nFMOVD F0, %d(R1)",
+ "FMOVD %d(R1), F0\nMOVFL F0, FPSCR",
+ 8)
+
+ p("MOVD R31, -%d(R1)", l.stack-32) // save R31 first, we'll use R31 for saving LR
+ p("MOVD LR, R31")
+ p("MOVDU R31, -%d(R1)", l.stack) // allocate frame, save PC of interrupted instruction (in LR)
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(R1), R31", l.stack) // sigctxt.pushCall has pushed LR, R2, R12 (at interrupt) on stack, restore them
+ p("MOVD R31, LR")
+ p("MOVD %d(R1), R2", l.stack+8)
+ p("MOVD %d(R1), R12", l.stack+16)
+ p("MOVD (R1), R31") // load PC to CTR
+ p("MOVD R31, CTR")
+ p("MOVD 32(R1), R31") // restore R31
+ p("ADD $%d, R1", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("JMP (CTR)")
+}
+
+func genS390X() {
+ // Add integer registers R0-R12
+ // R13 (g), R14 (LR), R15 (SP) are special, and not saved here.
+ // Saving R10 (REGTMP) is not necessary, but it is saved anyway.
+ var l = layout{sp: "R15", stack: 16} // add slot to save PC of interrupted instruction and flags
+ l.addSpecial(
+ "STMG R0, R12, %d(R15)",
+ "LMG %d(R15), R0, R12",
+ 13*8)
+ // Add floating point registers F0-F31.
+ for i := 0; i <= 15; i++ {
+ reg := fmt.Sprintf("F%d", i)
+ l.add("FMOVD", reg, 8)
+ }
+
+ // allocate frame, save PC of interrupted instruction (in LR) and flags (condition code)
+ p("IPM R10") // save flags upfront, as ADD will clobber flags
+ p("MOVD R14, -%d(R15)", l.stack)
+ p("ADD $-%d, R15", l.stack)
+ p("MOVW R10, 8(R15)") // save flags
+
+ l.save()
+ p("CALL ·asyncPreempt2(SB)")
+ l.restore()
+
+ p("MOVD %d(R15), R14", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("ADD $%d, R15", l.stack+8) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("MOVWZ -%d(R15), R10", l.stack) // load flags to REGTMP
+ p("TMLH R10, $(3<<12)") // restore flags
+ p("MOVD -%d(R15), R10", l.stack+8) // load PC to REGTMP
+ p("JMP (R10)")
+}
+
+func genWasm() {
+ p("// No async preemption on wasm")
+ p("UNDEF")
+}
+
+func notImplemented() {
+ p("// Not implemented yet")
+ p("JMP ·abort(SB)")
+}
diff --git a/libgo/go/runtime/mpagealloc.go b/libgo/go/runtime/mpagealloc.go
new file mode 100644
index 0000000..572e6a9
--- /dev/null
+++ b/libgo/go/runtime/mpagealloc.go
@@ -0,0 +1,938 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Page allocator.
+//
+// The page allocator manages mapped pages (defined by pageSize, NOT
+// physPageSize) for allocation and re-use. It is embedded into mheap.
+//
+// Pages are managed using a bitmap that is sharded into chunks.
+// In the bitmap, 1 means in-use, and 0 means free. The bitmap spans the
+// process's address space. Chunks are managed in a sparse-array-style structure
+// similar to mheap.arenas, since the bitmap may be large on some systems.
+//
+// The bitmap is efficiently searched by using a radix tree in combination
+// with fast bit-wise intrinsics. Allocation is performed using an address-ordered
+// first-fit approach.
+//
+// Each entry in the radix tree is a summary that describes three properties of
+// a particular region of the address space: the number of contiguous free pages
+// at the start and end of the region it represents, and the maximum number of
+// contiguous free pages found anywhere in that region.
+//
+// Each level of the radix tree is stored as one contiguous array, which represents
+// a different granularity of subdivision of the processes' address space. Thus, this
+// radix tree is actually implicit in these large arrays, as opposed to having explicit
+// dynamically-allocated pointer-based node structures. Naturally, these arrays may be
+// quite large for system with large address spaces, so in these cases they are mapped
+// into memory as needed. The leaf summaries of the tree correspond to a bitmap chunk.
+//
+// The root level (referred to as L0 and index 0 in pageAlloc.summary) has each
+// summary represent the largest section of address space (16 GiB on 64-bit systems),
+// with each subsequent level representing successively smaller subsections until we
+// reach the finest granularity at the leaves, a chunk.
+//
+// More specifically, each summary in each level (except for leaf summaries)
+// represents some number of entries in the following level. For example, each
+// summary in the root level may represent a 16 GiB region of address space,
+// and in the next level there could be 8 corresponding entries which represent 2
+// GiB subsections of that 16 GiB region, each of which could correspond to 8
+// entries in the next level which each represent 256 MiB regions, and so on.
+//
+// Thus, this design only scales to heaps so large, but can always be extended to
+// larger heaps by simply adding levels to the radix tree, which mostly costs
+// additional virtual address space. The choice of managing large arrays also means
+// that a large amount of virtual address space may be reserved by the runtime.
+
+package runtime
+
+import (
+ "runtime/internal/atomic"
+ "unsafe"
+)
+
+const (
+ // The size of a bitmap chunk, i.e. the amount of bits (that is, pages) to consider
+ // in the bitmap at once.
+ pallocChunkPages = 1 << logPallocChunkPages
+ pallocChunkBytes = pallocChunkPages * pageSize
+ logPallocChunkPages = 9
+ logPallocChunkBytes = logPallocChunkPages + pageShift
+
+ // The number of radix bits for each level.
+ //
+ // The value of 3 is chosen such that the block of summaries we need to scan at
+ // each level fits in 64 bytes (2^3 summaries * 8 bytes per summary), which is
+ // close to the L1 cache line width on many systems. Also, a value of 3 fits 4 tree
+ // levels perfectly into the 21-bit pallocBits summary field at the root level.
+ //
+ // The following equation explains how each of the constants relate:
+ // summaryL0Bits + (summaryLevels-1)*summaryLevelBits + logPallocChunkBytes = heapAddrBits
+ //
+ // summaryLevels is an architecture-dependent value defined in mpagealloc_*.go.
+ summaryLevelBits = 3
+ summaryL0Bits = heapAddrBits - logPallocChunkBytes - (summaryLevels-1)*summaryLevelBits
+
+ // pallocChunksL2Bits is the number of bits of the chunk index number
+ // covered by the second level of the chunks map.
+ //
+ // See (*pageAlloc).chunks for more details. Update the documentation
+ // there should this change.
+ pallocChunksL2Bits = heapAddrBits - logPallocChunkBytes - pallocChunksL1Bits
+ pallocChunksL1Shift = pallocChunksL2Bits
+
+ // Maximum searchAddr value, which indicates that the heap has no free space.
+ //
+ // We subtract arenaBaseOffset because we want this to represent the maximum
+ // value in the shifted address space, but searchAddr is stored as a regular
+ // memory address. See arenaBaseOffset for details.
+ maxSearchAddr = ^uintptr(0) - arenaBaseOffset
+
+ // Minimum scavAddr value, which indicates that the scavenger is done.
+ //
+ // minScavAddr + arenaBaseOffset == 0
+ minScavAddr = (^arenaBaseOffset + 1) & uintptrMask
+)
+
+// Global chunk index.
+//
+// Represents an index into the leaf level of the radix tree.
+// Similar to arenaIndex, except instead of arenas, it divides the address
+// space into chunks.
+type chunkIdx uint
+
+// chunkIndex returns the global index of the palloc chunk containing the
+// pointer p.
+func chunkIndex(p uintptr) chunkIdx {
+ return chunkIdx((p + arenaBaseOffset) / pallocChunkBytes)
+}
+
+// chunkIndex returns the base address of the palloc chunk at index ci.
+func chunkBase(ci chunkIdx) uintptr {
+ return uintptr(ci)*pallocChunkBytes - arenaBaseOffset
+}
+
+// chunkPageIndex computes the index of the page that contains p,
+// relative to the chunk which contains p.
+func chunkPageIndex(p uintptr) uint {
+ return uint(p % pallocChunkBytes / pageSize)
+}
+
+// l1 returns the index into the first level of (*pageAlloc).chunks.
+func (i chunkIdx) l1() uint {
+ if pallocChunksL1Bits == 0 {
+ // Let the compiler optimize this away if there's no
+ // L1 map.
+ return 0
+ } else {
+ return uint(i) >> pallocChunksL1Shift
+ }
+}
+
+// l2 returns the index into the second level of (*pageAlloc).chunks.
+func (i chunkIdx) l2() uint {
+ if pallocChunksL1Bits == 0 {
+ return uint(i)
+ } else {
+ return uint(i) & (1<<pallocChunksL2Bits - 1)
+ }
+}
+
+// addrsToSummaryRange converts base and limit pointers into a range
+// of entries for the given summary level.
+//
+// The returned range is inclusive on the lower bound and exclusive on
+// the upper bound.
+func addrsToSummaryRange(level int, base, limit uintptr) (lo int, hi int) {
+ // This is slightly more nuanced than just a shift for the exclusive
+ // upper-bound. Note that the exclusive upper bound may be within a
+ // summary at this level, meaning if we just do the obvious computation
+ // hi will end up being an inclusive upper bound. Unfortunately, just
+ // adding 1 to that is too broad since we might be on the very edge of
+ // of a summary's max page count boundary for this level
+ // (1 << levelLogPages[level]). So, make limit an inclusive upper bound
+ // then shift, then add 1, so we get an exclusive upper bound at the end.
+ lo = int((base + arenaBaseOffset) >> levelShift[level])
+ hi = int(((limit-1)+arenaBaseOffset)>>levelShift[level]) + 1
+ return
+}
+
+// blockAlignSummaryRange aligns indices into the given level to that
+// level's block width (1 << levelBits[level]). It assumes lo is inclusive
+// and hi is exclusive, and so aligns them down and up respectively.
+func blockAlignSummaryRange(level int, lo, hi int) (int, int) {
+ e := uintptr(1) << levelBits[level]
+ return int(alignDown(uintptr(lo), e)), int(alignUp(uintptr(hi), e))
+}
+
+type pageAlloc struct {
+ // Radix tree of summaries.
+ //
+ // Each slice's cap represents the whole memory reservation.
+ // Each slice's len reflects the allocator's maximum known
+ // mapped heap address for that level.
+ //
+ // The backing store of each summary level is reserved in init
+ // and may or may not be committed in grow (small address spaces
+ // may commit all the memory in init).
+ //
+ // The purpose of keeping len <= cap is to enforce bounds checks
+ // on the top end of the slice so that instead of an unknown
+ // runtime segmentation fault, we get a much friendlier out-of-bounds
+ // error.
+ //
+ // To iterate over a summary level, use inUse to determine which ranges
+ // are currently available. Otherwise one might try to access
+ // memory which is only Reserved which may result in a hard fault.
+ //
+ // We may still get segmentation faults < len since some of that
+ // memory may not be committed yet.
+ summary [summaryLevels][]pallocSum
+
+ // chunks is a slice of bitmap chunks.
+ //
+ // The total size of chunks is quite large on most 64-bit platforms
+ // (O(GiB) or more) if flattened, so rather than making one large mapping
+ // (which has problems on some platforms, even when PROT_NONE) we use a
+ // two-level sparse array approach similar to the arena index in mheap.
+ //
+ // To find the chunk containing a memory address `a`, do:
+ // chunkOf(chunkIndex(a))
+ //
+ // Below is a table describing the configuration for chunks for various
+ // heapAddrBits supported by the runtime.
+ //
+ // heapAddrBits | L1 Bits | L2 Bits | L2 Entry Size
+ // ------------------------------------------------
+ // 32 | 0 | 10 | 128 KiB
+ // 33 (iOS) | 0 | 11 | 256 KiB
+ // 48 | 13 | 13 | 1 MiB
+ //
+ // There's no reason to use the L1 part of chunks on 32-bit, the
+ // address space is small so the L2 is small. For platforms with a
+ // 48-bit address space, we pick the L1 such that the L2 is 1 MiB
+ // in size, which is a good balance between low granularity without
+ // making the impact on BSS too high (note the L1 is stored directly
+ // in pageAlloc).
+ //
+ // To iterate over the bitmap, use inUse to determine which ranges
+ // are currently available. Otherwise one might iterate over unused
+ // ranges.
+ //
+ // TODO(mknyszek): Consider changing the definition of the bitmap
+ // such that 1 means free and 0 means in-use so that summaries and
+ // the bitmaps align better on zero-values.
+ chunks [1 << pallocChunksL1Bits]*[1 << pallocChunksL2Bits]pallocData
+
+ // The address to start an allocation search with.
+ //
+ // When added with arenaBaseOffset, we guarantee that
+ // all valid heap addresses (when also added with
+ // arenaBaseOffset) below this value are allocated and
+ // not worth searching.
+ //
+ // Note that adding in arenaBaseOffset transforms addresses
+ // to a new address space with a linear view of the full address
+ // space on architectures with segmented address spaces.
+ searchAddr uintptr
+
+ // The address to start a scavenge candidate search with.
+ scavAddr uintptr
+
+ // start and end represent the chunk indices
+ // which pageAlloc knows about. It assumes
+ // chunks in the range [start, end) are
+ // currently ready to use.
+ start, end chunkIdx
+
+ // inUse is a slice of ranges of address space which are
+ // known by the page allocator to be currently in-use (passed
+ // to grow).
+ //
+ // This field is currently unused on 32-bit architectures but
+ // is harmless to track. We care much more about having a
+ // contiguous heap in these cases and take additional measures
+ // to ensure that, so in nearly all cases this should have just
+ // 1 element.
+ //
+ // All access is protected by the mheapLock.
+ inUse addrRanges
+
+ // mheap_.lock. This level of indirection makes it possible
+ // to test pageAlloc indepedently of the runtime allocator.
+ mheapLock *mutex
+
+ // sysStat is the runtime memstat to update when new system
+ // memory is committed by the pageAlloc for allocation metadata.
+ sysStat *uint64
+
+ // Whether or not this struct is being used in tests.
+ test bool
+}
+
+func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
+ if levelLogPages[0] > logMaxPackedValue {
+ // We can't represent 1<<levelLogPages[0] pages, the maximum number
+ // of pages we need to represent at the root level, in a summary, which
+ // is a big problem. Throw.
+ print("runtime: root level max pages = ", 1<<levelLogPages[0], "\n")
+ print("runtime: summary max pages = ", maxPackedValue, "\n")
+ throw("root level max pages doesn't fit in summary")
+ }
+ s.sysStat = sysStat
+
+ // Initialize s.inUse.
+ s.inUse.init(sysStat)
+
+ // System-dependent initialization.
+ s.sysInit()
+
+ // Start with the searchAddr in a state indicating there's no free memory.
+ s.searchAddr = maxSearchAddr
+
+ // Start with the scavAddr in a state indicating there's nothing more to do.
+ s.scavAddr = minScavAddr
+
+ // Set the mheapLock.
+ s.mheapLock = mheapLock
+}
+
+// compareSearchAddrTo compares an address against s.searchAddr in a linearized
+// view of the address space on systems with discontinuous process address spaces.
+// This linearized view is the same one generated by chunkIndex and arenaIndex,
+// done by adding arenaBaseOffset.
+//
+// On systems without a discontinuous address space, it's just a normal comparison.
+//
+// Returns < 0 if addr is less than s.searchAddr in the linearized address space.
+// Returns > 0 if addr is greater than s.searchAddr in the linearized address space.
+// Returns 0 if addr and s.searchAddr are equal.
+func (s *pageAlloc) compareSearchAddrTo(addr uintptr) int {
+ // Compare with arenaBaseOffset added because it gives us a linear, contiguous view
+ // of the heap on architectures with signed address spaces.
+ lAddr := addr + arenaBaseOffset
+ lSearchAddr := s.searchAddr + arenaBaseOffset
+ if lAddr < lSearchAddr {
+ return -1
+ } else if lAddr > lSearchAddr {
+ return 1
+ }
+ return 0
+}
+
+// chunkOf returns the chunk at the given chunk index.
+func (s *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
+ return &s.chunks[ci.l1()][ci.l2()]
+}
+
+// grow sets up the metadata for the address range [base, base+size).
+// It may allocate metadata, in which case *s.sysStat will be updated.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) grow(base, size uintptr) {
+ // Round up to chunks, since we can't deal with increments smaller
+ // than chunks. Also, sysGrow expects aligned values.
+ limit := alignUp(base+size, pallocChunkBytes)
+ base = alignDown(base, pallocChunkBytes)
+
+ // Grow the summary levels in a system-dependent manner.
+ // We just update a bunch of additional metadata here.
+ s.sysGrow(base, limit)
+
+ // Update s.start and s.end.
+ // If no growth happened yet, start == 0. This is generally
+ // safe since the zero page is unmapped.
+ firstGrowth := s.start == 0
+ start, end := chunkIndex(base), chunkIndex(limit)
+ if firstGrowth || start < s.start {
+ s.start = start
+ }
+ if end > s.end {
+ s.end = end
+ }
+ // Note that [base, limit) will never overlap with any existing
+ // range inUse because grow only ever adds never-used memory
+ // regions to the page allocator.
+ s.inUse.add(addrRange{base, limit})
+
+ // A grow operation is a lot like a free operation, so if our
+ // chunk ends up below the (linearized) s.searchAddr, update
+ // s.searchAddr to the new address, just like in free.
+ if s.compareSearchAddrTo(base) < 0 {
+ s.searchAddr = base
+ }
+
+ // Add entries into chunks, which is sparse, if needed. Then,
+ // initialize the bitmap.
+ //
+ // Newly-grown memory is always considered scavenged.
+ // Set all the bits in the scavenged bitmaps high.
+ for c := chunkIndex(base); c < chunkIndex(limit); c++ {
+ if s.chunks[c.l1()] == nil {
+ // Create the necessary l2 entry.
+ //
+ // Store it atomically to avoid races with readers which
+ // don't acquire the heap lock.
+ r := sysAlloc(unsafe.Sizeof(*s.chunks[0]), s.sysStat)
+ atomic.StorepNoWB(unsafe.Pointer(&s.chunks[c.l1()]), r)
+ }
+ s.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
+ }
+
+ // Update summaries accordingly. The grow acts like a free, so
+ // we need to ensure this newly-free memory is visible in the
+ // summaries.
+ s.update(base, size/pageSize, true, false)
+}
+
+// update updates heap metadata. It must be called each time the bitmap
+// is updated.
+//
+// If contig is true, update does some optimizations assuming that there was
+// a contiguous allocation or free between addr and addr+npages. alloc indicates
+// whether the operation performed was an allocation or a free.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+ // base, limit, start, and end are inclusive.
+ limit := base + npages*pageSize - 1
+ sc, ec := chunkIndex(base), chunkIndex(limit)
+
+ // Handle updating the lowest level first.
+ if sc == ec {
+ // Fast path: the allocation doesn't span more than one chunk,
+ // so update this one and if the summary didn't change, return.
+ x := s.summary[len(s.summary)-1][sc]
+ y := s.chunkOf(sc).summarize()
+ if x == y {
+ return
+ }
+ s.summary[len(s.summary)-1][sc] = y
+ } else if contig {
+ // Slow contiguous path: the allocation spans more than one chunk
+ // and at least one summary is guaranteed to change.
+ summary := s.summary[len(s.summary)-1]
+
+ // Update the summary for chunk sc.
+ summary[sc] = s.chunkOf(sc).summarize()
+
+ // Update the summaries for chunks in between, which are
+ // either totally allocated or freed.
+ whole := s.summary[len(s.summary)-1][sc+1 : ec]
+ if alloc {
+ // Should optimize into a memclr.
+ for i := range whole {
+ whole[i] = 0
+ }
+ } else {
+ for i := range whole {
+ whole[i] = freeChunkSum
+ }
+ }
+
+ // Update the summary for chunk ec.
+ summary[ec] = s.chunkOf(ec).summarize()
+ } else {
+ // Slow general path: the allocation spans more than one chunk
+ // and at least one summary is guaranteed to change.
+ //
+ // We can't assume a contiguous allocation happened, so walk over
+ // every chunk in the range and manually recompute the summary.
+ summary := s.summary[len(s.summary)-1]
+ for c := sc; c <= ec; c++ {
+ summary[c] = s.chunkOf(c).summarize()
+ }
+ }
+
+ // Walk up the radix tree and update the summaries appropriately.
+ changed := true
+ for l := len(s.summary) - 2; l >= 0 && changed; l-- {
+ // Update summaries at level l from summaries at level l+1.
+ changed = false
+
+ // "Constants" for the previous level which we
+ // need to compute the summary from that level.
+ logEntriesPerBlock := levelBits[l+1]
+ logMaxPages := levelLogPages[l+1]
+
+ // lo and hi describe all the parts of the level we need to look at.
+ lo, hi := addrsToSummaryRange(l, base, limit+1)
+
+ // Iterate over each block, updating the corresponding summary in the less-granular level.
+ for i := lo; i < hi; i++ {
+ children := s.summary[l+1][i<<logEntriesPerBlock : (i+1)<<logEntriesPerBlock]
+ sum := mergeSummaries(children, logMaxPages)
+ old := s.summary[l][i]
+ if old != sum {
+ changed = true
+ s.summary[l][i] = sum
+ }
+ }
+ }
+}
+
+// allocRange marks the range of memory [base, base+npages*pageSize) as
+// allocated. It also updates the summaries to reflect the newly-updated
+// bitmap.
+//
+// Returns the amount of scavenged memory in bytes present in the
+// allocated range.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) allocRange(base, npages uintptr) uintptr {
+ limit := base + npages*pageSize - 1
+ sc, ec := chunkIndex(base), chunkIndex(limit)
+ si, ei := chunkPageIndex(base), chunkPageIndex(limit)
+
+ scav := uint(0)
+ if sc == ec {
+ // The range doesn't cross any chunk boundaries.
+ chunk := s.chunkOf(sc)
+ scav += chunk.scavenged.popcntRange(si, ei+1-si)
+ chunk.allocRange(si, ei+1-si)
+ } else {
+ // The range crosses at least one chunk boundary.
+ chunk := s.chunkOf(sc)
+ scav += chunk.scavenged.popcntRange(si, pallocChunkPages-si)
+ chunk.allocRange(si, pallocChunkPages-si)
+ for c := sc + 1; c < ec; c++ {
+ chunk := s.chunkOf(c)
+ scav += chunk.scavenged.popcntRange(0, pallocChunkPages)
+ chunk.allocAll()
+ }
+ chunk = s.chunkOf(ec)
+ scav += chunk.scavenged.popcntRange(0, ei+1)
+ chunk.allocRange(0, ei+1)
+ }
+ s.update(base, npages, true, true)
+ return uintptr(scav) * pageSize
+}
+
+// find searches for the first (address-ordered) contiguous free region of
+// npages in size and returns a base address for that region.
+//
+// It uses s.searchAddr to prune its search and assumes that no palloc chunks
+// below chunkIndex(s.searchAddr) contain any free memory at all.
+//
+// find also computes and returns a candidate s.searchAddr, which may or
+// may not prune more of the address space than s.searchAddr already does.
+//
+// find represents the slow path and the full radix tree search.
+//
+// Returns a base address of 0 on failure, in which case the candidate
+// searchAddr returned is invalid and must be ignored.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) find(npages uintptr) (uintptr, uintptr) {
+ // Search algorithm.
+ //
+ // This algorithm walks each level l of the radix tree from the root level
+ // to the leaf level. It iterates over at most 1 << levelBits[l] of entries
+ // in a given level in the radix tree, and uses the summary information to
+ // find either:
+ // 1) That a given subtree contains a large enough contiguous region, at
+ // which point it continues iterating on the next level, or
+ // 2) That there are enough contiguous boundary-crossing bits to satisfy
+ // the allocation, at which point it knows exactly where to start
+ // allocating from.
+ //
+ // i tracks the index into the current level l's structure for the
+ // contiguous 1 << levelBits[l] entries we're actually interested in.
+ //
+ // NOTE: Technically this search could allocate a region which crosses
+ // the arenaBaseOffset boundary, which when arenaBaseOffset != 0, is
+ // a discontinuity. However, the only way this could happen is if the
+ // page at the zero address is mapped, and this is impossible on
+ // every system we support where arenaBaseOffset != 0. So, the
+ // discontinuity is already encoded in the fact that the OS will never
+ // map the zero page for us, and this function doesn't try to handle
+ // this case in any way.
+
+ // i is the beginning of the block of entries we're searching at the
+ // current level.
+ i := 0
+
+ // firstFree is the region of address space that we are certain to
+ // find the first free page in the heap. base and bound are the inclusive
+ // bounds of this window, and both are addresses in the linearized, contiguous
+ // view of the address space (with arenaBaseOffset pre-added). At each level,
+ // this window is narrowed as we find the memory region containing the
+ // first free page of memory. To begin with, the range reflects the
+ // full process address space.
+ //
+ // firstFree is updated by calling foundFree each time free space in the
+ // heap is discovered.
+ //
+ // At the end of the search, base-arenaBaseOffset is the best new
+ // searchAddr we could deduce in this search.
+ firstFree := struct {
+ base, bound uintptr
+ }{
+ base: 0,
+ bound: (1<<heapAddrBits - 1),
+ }
+ // foundFree takes the given address range [addr, addr+size) and
+ // updates firstFree if it is a narrower range. The input range must
+ // either be fully contained within firstFree or not overlap with it
+ // at all.
+ //
+ // This way, we'll record the first summary we find with any free
+ // pages on the root level and narrow that down if we descend into
+ // that summary. But as soon as we need to iterate beyond that summary
+ // in a level to find a large enough range, we'll stop narrowing.
+ foundFree := func(addr, size uintptr) {
+ if firstFree.base <= addr && addr+size-1 <= firstFree.bound {
+ // This range fits within the current firstFree window, so narrow
+ // down the firstFree window to the base and bound of this range.
+ firstFree.base = addr
+ firstFree.bound = addr + size - 1
+ } else if !(addr+size-1 < firstFree.base || addr > firstFree.bound) {
+ // This range only partially overlaps with the firstFree range,
+ // so throw.
+ print("runtime: addr = ", hex(addr), ", size = ", size, "\n")
+ print("runtime: base = ", hex(firstFree.base), ", bound = ", hex(firstFree.bound), "\n")
+ throw("range partially overlaps")
+ }
+ }
+
+ // lastSum is the summary which we saw on the previous level that made us
+ // move on to the next level. Used to print additional information in the
+ // case of a catastrophic failure.
+ // lastSumIdx is that summary's index in the previous level.
+ lastSum := packPallocSum(0, 0, 0)
+ lastSumIdx := -1
+
+nextLevel:
+ for l := 0; l < len(s.summary); l++ {
+ // For the root level, entriesPerBlock is the whole level.
+ entriesPerBlock := 1 << levelBits[l]
+ logMaxPages := levelLogPages[l]
+
+ // We've moved into a new level, so let's update i to our new
+ // starting index. This is a no-op for level 0.
+ i <<= levelBits[l]
+
+ // Slice out the block of entries we care about.
+ entries := s.summary[l][i : i+entriesPerBlock]
+
+ // Determine j0, the first index we should start iterating from.
+ // The searchAddr may help us eliminate iterations if we followed the
+ // searchAddr on the previous level or we're on the root leve, in which
+ // case the searchAddr should be the same as i after levelShift.
+ j0 := 0
+ if searchIdx := int((s.searchAddr + arenaBaseOffset) >> levelShift[l]); searchIdx&^(entriesPerBlock-1) == i {
+ j0 = searchIdx & (entriesPerBlock - 1)
+ }
+
+ // Run over the level entries looking for
+ // a contiguous run of at least npages either
+ // within an entry or across entries.
+ //
+ // base contains the page index (relative to
+ // the first entry's first page) of the currently
+ // considered run of consecutive pages.
+ //
+ // size contains the size of the currently considered
+ // run of consecutive pages.
+ var base, size uint
+ for j := j0; j < len(entries); j++ {
+ sum := entries[j]
+ if sum == 0 {
+ // A full entry means we broke any streak and
+ // that we should skip it altogether.
+ size = 0
+ continue
+ }
+
+ // We've encountered a non-zero summary which means
+ // free memory, so update firstFree.
+ foundFree(uintptr((i+j)<<levelShift[l]), (uintptr(1)<<logMaxPages)*pageSize)
+
+ s := sum.start()
+ if size+s >= uint(npages) {
+ // If size == 0 we don't have a run yet,
+ // which means base isn't valid. So, set
+ // base to the first page in this block.
+ if size == 0 {
+ base = uint(j) << logMaxPages
+ }
+ // We hit npages; we're done!
+ size += s
+ break
+ }
+ if sum.max() >= uint(npages) {
+ // The entry itself contains npages contiguous
+ // free pages, so continue on the next level
+ // to find that run.
+ i += j
+ lastSumIdx = i
+ lastSum = sum
+ continue nextLevel
+ }
+ if size == 0 || s < 1<<logMaxPages {
+ // We either don't have a current run started, or this entry
+ // isn't totally free (meaning we can't continue the current
+ // one), so try to begin a new run by setting size and base
+ // based on sum.end.
+ size = sum.end()
+ base = uint(j+1)<<logMaxPages - size
+ continue
+ }
+ // The entry is completely free, so continue the run.
+ size += 1 << logMaxPages
+ }
+ if size >= uint(npages) {
+ // We found a sufficiently large run of free pages straddling
+ // some boundary, so compute the address and return it.
+ addr := uintptr(i<<levelShift[l]) - arenaBaseOffset + uintptr(base)*pageSize
+ return addr, firstFree.base - arenaBaseOffset
+ }
+ if l == 0 {
+ // We're at level zero, so that means we've exhausted our search.
+ return 0, maxSearchAddr
+ }
+
+ // We're not at level zero, and we exhausted the level we were looking in.
+ // This means that either our calculations were wrong or the level above
+ // lied to us. In either case, dump some useful state and throw.
+ print("runtime: summary[", l-1, "][", lastSumIdx, "] = ", lastSum.start(), ", ", lastSum.max(), ", ", lastSum.end(), "\n")
+ print("runtime: level = ", l, ", npages = ", npages, ", j0 = ", j0, "\n")
+ print("runtime: s.searchAddr = ", hex(s.searchAddr), ", i = ", i, "\n")
+ print("runtime: levelShift[level] = ", levelShift[l], ", levelBits[level] = ", levelBits[l], "\n")
+ for j := 0; j < len(entries); j++ {
+ sum := entries[j]
+ print("runtime: summary[", l, "][", i+j, "] = (", sum.start(), ", ", sum.max(), ", ", sum.end(), ")\n")
+ }
+ throw("bad summary data")
+ }
+
+ // Since we've gotten to this point, that means we haven't found a
+ // sufficiently-sized free region straddling some boundary (chunk or larger).
+ // This means the last summary we inspected must have had a large enough "max"
+ // value, so look inside the chunk to find a suitable run.
+ //
+ // After iterating over all levels, i must contain a chunk index which
+ // is what the final level represents.
+ ci := chunkIdx(i)
+ j, searchIdx := s.chunkOf(ci).find(npages, 0)
+ if j < 0 {
+ // We couldn't find any space in this chunk despite the summaries telling
+ // us it should be there. There's likely a bug, so dump some state and throw.
+ sum := s.summary[len(s.summary)-1][i]
+ print("runtime: summary[", len(s.summary)-1, "][", i, "] = (", sum.start(), ", ", sum.max(), ", ", sum.end(), ")\n")
+ print("runtime: npages = ", npages, "\n")
+ throw("bad summary data")
+ }
+
+ // Compute the address at which the free space starts.
+ addr := chunkBase(ci) + uintptr(j)*pageSize
+
+ // Since we actually searched the chunk, we may have
+ // found an even narrower free window.
+ searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize
+ foundFree(searchAddr+arenaBaseOffset, chunkBase(ci+1)-searchAddr)
+ return addr, firstFree.base - arenaBaseOffset
+}
+
+// alloc allocates npages worth of memory from the page heap, returning the base
+// address for the allocation and the amount of scavenged memory in bytes
+// contained in the region [base address, base address + npages*pageSize).
+//
+// Returns a 0 base address on failure, in which case other returned values
+// should be ignored.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
+ // If the searchAddr refers to a region which has a higher address than
+ // any known chunk, then we know we're out of memory.
+ if chunkIndex(s.searchAddr) >= s.end {
+ return 0, 0
+ }
+
+ // If npages has a chance of fitting in the chunk where the searchAddr is,
+ // search it directly.
+ searchAddr := uintptr(0)
+ if pallocChunkPages-chunkPageIndex(s.searchAddr) >= uint(npages) {
+ // npages is guaranteed to be no greater than pallocChunkPages here.
+ i := chunkIndex(s.searchAddr)
+ if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) {
+ j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr))
+ if j < 0 {
+ print("runtime: max = ", max, ", npages = ", npages, "\n")
+ print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr), ", s.searchAddr = ", hex(s.searchAddr), "\n")
+ throw("bad summary data")
+ }
+ addr = chunkBase(i) + uintptr(j)*pageSize
+ searchAddr = chunkBase(i) + uintptr(searchIdx)*pageSize
+ goto Found
+ }
+ }
+ // We failed to use a searchAddr for one reason or another, so try
+ // the slow path.
+ addr, searchAddr = s.find(npages)
+ if addr == 0 {
+ if npages == 1 {
+ // We failed to find a single free page, the smallest unit
+ // of allocation. This means we know the heap is completely
+ // exhausted. Otherwise, the heap still might have free
+ // space in it, just not enough contiguous space to
+ // accommodate npages.
+ s.searchAddr = maxSearchAddr
+ }
+ return 0, 0
+ }
+Found:
+ // Go ahead and actually mark the bits now that we have an address.
+ scav = s.allocRange(addr, npages)
+
+ // If we found a higher (linearized) searchAddr, we know that all the
+ // heap memory before that searchAddr in a linear address space is
+ // allocated, so bump s.searchAddr up to the new one.
+ if s.compareSearchAddrTo(searchAddr) > 0 {
+ s.searchAddr = searchAddr
+ }
+ return addr, scav
+}
+
+// free returns npages worth of memory starting at base back to the page heap.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) free(base, npages uintptr) {
+ // If we're freeing pages below the (linearized) s.searchAddr, update searchAddr.
+ if s.compareSearchAddrTo(base) < 0 {
+ s.searchAddr = base
+ }
+ if npages == 1 {
+ // Fast path: we're clearing a single bit, and we know exactly
+ // where it is, so mark it directly.
+ i := chunkIndex(base)
+ s.chunkOf(i).free1(chunkPageIndex(base))
+ } else {
+ // Slow path: we're clearing more bits so we may need to iterate.
+ limit := base + npages*pageSize - 1
+ sc, ec := chunkIndex(base), chunkIndex(limit)
+ si, ei := chunkPageIndex(base), chunkPageIndex(limit)
+
+ if sc == ec {
+ // The range doesn't cross any chunk boundaries.
+ s.chunkOf(sc).free(si, ei+1-si)
+ } else {
+ // The range crosses at least one chunk boundary.
+ s.chunkOf(sc).free(si, pallocChunkPages-si)
+ for c := sc + 1; c < ec; c++ {
+ s.chunkOf(c).freeAll()
+ }
+ s.chunkOf(ec).free(0, ei+1)
+ }
+ }
+ s.update(base, npages, true, false)
+}
+
+const (
+ pallocSumBytes = unsafe.Sizeof(pallocSum(0))
+
+ // maxPackedValue is the maximum value that any of the three fields in
+ // the pallocSum may take on.
+ maxPackedValue = 1 << logMaxPackedValue
+ logMaxPackedValue = logPallocChunkPages + (summaryLevels-1)*summaryLevelBits
+
+ freeChunkSum = pallocSum(uint64(pallocChunkPages) |
+ uint64(pallocChunkPages<<logMaxPackedValue) |
+ uint64(pallocChunkPages<<(2*logMaxPackedValue)))
+)
+
+// pallocSum is a packed summary type which packs three numbers: start, max,
+// and end into a single 8-byte value. Each of these values are a summary of
+// a bitmap and are thus counts, each of which may have a maximum value of
+// 2^21 - 1, or all three may be equal to 2^21. The latter case is represented
+// by just setting the 64th bit.
+type pallocSum uint64
+
+// packPallocSum takes a start, max, and end value and produces a pallocSum.
+func packPallocSum(start, max, end uint) pallocSum {
+ if max == maxPackedValue {
+ return pallocSum(uint64(1 << 63))
+ }
+ return pallocSum((uint64(start) & (maxPackedValue - 1)) |
+ ((uint64(max) & (maxPackedValue - 1)) << logMaxPackedValue) |
+ ((uint64(end) & (maxPackedValue - 1)) << (2 * logMaxPackedValue)))
+}
+
+// start extracts the start value from a packed sum.
+func (p pallocSum) start() uint {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue
+ }
+ return uint(uint64(p) & (maxPackedValue - 1))
+}
+
+// max extracts the max value from a packed sum.
+func (p pallocSum) max() uint {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue
+ }
+ return uint((uint64(p) >> logMaxPackedValue) & (maxPackedValue - 1))
+}
+
+// end extracts the end value from a packed sum.
+func (p pallocSum) end() uint {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue
+ }
+ return uint((uint64(p) >> (2 * logMaxPackedValue)) & (maxPackedValue - 1))
+}
+
+// unpack unpacks all three values from the summary.
+func (p pallocSum) unpack() (uint, uint, uint) {
+ if uint64(p)&uint64(1<<63) != 0 {
+ return maxPackedValue, maxPackedValue, maxPackedValue
+ }
+ return uint(uint64(p) & (maxPackedValue - 1)),
+ uint((uint64(p) >> logMaxPackedValue) & (maxPackedValue - 1)),
+ uint((uint64(p) >> (2 * logMaxPackedValue)) & (maxPackedValue - 1))
+}
+
+// mergeSummaries merges consecutive summaries which may each represent at
+// most 1 << logMaxPagesPerSum pages each together into one.
+func mergeSummaries(sums []pallocSum, logMaxPagesPerSum uint) pallocSum {
+ // Merge the summaries in sums into one.
+ //
+ // We do this by keeping a running summary representing the merged
+ // summaries of sums[:i] in start, max, and end.
+ start, max, end := sums[0].unpack()
+ for i := 1; i < len(sums); i++ {
+ // Merge in sums[i].
+ si, mi, ei := sums[i].unpack()
+
+ // Merge in sums[i].start only if the running summary is
+ // completely free, otherwise this summary's start
+ // plays no role in the combined sum.
+ if start == uint(i)<<logMaxPagesPerSum {
+ start += si
+ }
+
+ // Recompute the max value of the running sum by looking
+ // across the boundary between the running sum and sums[i]
+ // and at the max sums[i], taking the greatest of those two
+ // and the max of the running sum.
+ if end+si > max {
+ max = end + si
+ }
+ if mi > max {
+ max = mi
+ }
+
+ // Merge in end by checking if this new summary is totally
+ // free. If it is, then we want to extend the running sum's
+ // end by the new summary. If not, then we have some alloc'd
+ // pages in there and we just want to take the end value in
+ // sums[i].
+ if ei == 1<<logMaxPagesPerSum {
+ end += 1 << logMaxPagesPerSum
+ } else {
+ end = ei
+ }
+ }
+ return packPallocSum(start, max, end)
+}
diff --git a/libgo/go/runtime/mpagealloc_32bit.go b/libgo/go/runtime/mpagealloc_32bit.go
new file mode 100644
index 0000000..d18970c
--- /dev/null
+++ b/libgo/go/runtime/mpagealloc_32bit.go
@@ -0,0 +1,116 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 arm mips mipsle wasm darwin,arm64 amd64p32 armbe m68k mips64p32 mips64p32le nios2 ppc s390 sh shbe sparc
+
+// wasm is a treated as a 32-bit architecture for the purposes of the page
+// allocator, even though it has 64-bit pointers. This is because any wasm
+// pointer always has its top 32 bits as zero, so the effective heap address
+// space is only 2^32 bytes in size (see heapAddrBits).
+
+// darwin/arm64 is treated as a 32-bit architecture for the purposes of the
+// page allocator, even though it has 64-bit pointers and a 33-bit address
+// space (see heapAddrBits). The 33 bit address space cannot be rounded up
+// to 64 bits because there are too many summary levels to fit in just 33
+// bits.
+
+package runtime
+
+import "unsafe"
+
+const (
+ // The number of levels in the radix tree.
+ summaryLevels = 4
+
+ // Constants for testing.
+ pageAlloc32Bit = 1
+ pageAlloc64Bit = 0
+
+ // Number of bits needed to represent all indices into the L1 of the
+ // chunks map.
+ //
+ // See (*pageAlloc).chunks for more details. Update the documentation
+ // there should this number change.
+ pallocChunksL1Bits = 0
+)
+
+// See comment in mpagealloc_64bit.go.
+var levelBits = [summaryLevels]uint{
+ summaryL0Bits,
+ summaryLevelBits,
+ summaryLevelBits,
+ summaryLevelBits,
+}
+
+// See comment in mpagealloc_64bit.go.
+var levelShift = [summaryLevels]uint{
+ heapAddrBits - summaryL0Bits,
+ heapAddrBits - summaryL0Bits - 1*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 2*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 3*summaryLevelBits,
+}
+
+// See comment in mpagealloc_64bit.go.
+var levelLogPages = [summaryLevels]uint{
+ logPallocChunkPages + 3*summaryLevelBits,
+ logPallocChunkPages + 2*summaryLevelBits,
+ logPallocChunkPages + 1*summaryLevelBits,
+ logPallocChunkPages,
+}
+
+// See mpagealloc_64bit.go for details.
+func (s *pageAlloc) sysInit() {
+ // Calculate how much memory all our entries will take up.
+ //
+ // This should be around 12 KiB or less.
+ totalSize := uintptr(0)
+ for l := 0; l < summaryLevels; l++ {
+ totalSize += (uintptr(1) << (heapAddrBits - levelShift[l])) * pallocSumBytes
+ }
+ totalSize = alignUp(totalSize, physPageSize)
+
+ // Reserve memory for all levels in one go. There shouldn't be much for 32-bit.
+ reservation := sysReserve(nil, totalSize)
+ if reservation == nil {
+ throw("failed to reserve page summary memory")
+ }
+ // There isn't much. Just map it and mark it as used immediately.
+ sysMap(reservation, totalSize, s.sysStat)
+ sysUsed(reservation, totalSize)
+
+ // Iterate over the reservation and cut it up into slices.
+ //
+ // Maintain i as the byte offset from reservation where
+ // the new slice should start.
+ for l, shift := range levelShift {
+ entries := 1 << (heapAddrBits - shift)
+
+ // Put this reservation into a slice.
+ sl := notInHeapSlice{(*notInHeap)(reservation), 0, entries}
+ s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+
+ reservation = add(reservation, uintptr(entries)*pallocSumBytes)
+ }
+}
+
+// See mpagealloc_64bit.go for details.
+func (s *pageAlloc) sysGrow(base, limit uintptr) {
+ if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+ print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+ throw("sysGrow bounds not aligned to pallocChunkBytes")
+ }
+
+ // Walk up the tree and update the summary slices.
+ for l := len(s.summary) - 1; l >= 0; l-- {
+ // Figure out what part of the summary array this new address space needs.
+ // Note that we need to align the ranges to the block width (1<<levelBits[l])
+ // at this level because the full block is needed to compute the summary for
+ // the next level.
+ lo, hi := addrsToSummaryRange(l, base, limit)
+ _, hi = blockAlignSummaryRange(l, lo, hi)
+ if hi > len(s.summary[l]) {
+ s.summary[l] = s.summary[l][:hi]
+ }
+ }
+}
diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go
new file mode 100644
index 0000000..dd44da1
--- /dev/null
+++ b/libgo/go/runtime/mpagealloc_64bit.go
@@ -0,0 +1,180 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le s390x arm64be alpha sparc64 ia64 riscv64
+
+// See mpagealloc_32bit.go for why darwin/arm64 is excluded here.
+
+package runtime
+
+import "unsafe"
+
+const (
+ // The number of levels in the radix tree.
+ summaryLevels = 5
+
+ // Constants for testing.
+ pageAlloc32Bit = 0
+ pageAlloc64Bit = 1
+
+ // Number of bits needed to represent all indices into the L1 of the
+ // chunks map.
+ //
+ // See (*pageAlloc).chunks for more details. Update the documentation
+ // there should this number change.
+ pallocChunksL1Bits = 13
+)
+
+// levelBits is the number of bits in the radix for a given level in the super summary
+// structure.
+//
+// The sum of all the entries of levelBits should equal heapAddrBits.
+var levelBits = [summaryLevels]uint{
+ summaryL0Bits,
+ summaryLevelBits,
+ summaryLevelBits,
+ summaryLevelBits,
+ summaryLevelBits,
+}
+
+// levelShift is the number of bits to shift to acquire the radix for a given level
+// in the super summary structure.
+//
+// With levelShift, one can compute the index of the summary at level l related to a
+// pointer p by doing:
+// p >> levelShift[l]
+var levelShift = [summaryLevels]uint{
+ heapAddrBits - summaryL0Bits,
+ heapAddrBits - summaryL0Bits - 1*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 2*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 3*summaryLevelBits,
+ heapAddrBits - summaryL0Bits - 4*summaryLevelBits,
+}
+
+// levelLogPages is log2 the maximum number of runtime pages in the address space
+// a summary in the given level represents.
+//
+// The leaf level always represents exactly log2 of 1 chunk's worth of pages.
+var levelLogPages = [summaryLevels]uint{
+ logPallocChunkPages + 4*summaryLevelBits,
+ logPallocChunkPages + 3*summaryLevelBits,
+ logPallocChunkPages + 2*summaryLevelBits,
+ logPallocChunkPages + 1*summaryLevelBits,
+ logPallocChunkPages,
+}
+
+// sysInit performs architecture-dependent initialization of fields
+// in pageAlloc. pageAlloc should be uninitialized except for sysStat
+// if any runtime statistic should be updated.
+func (s *pageAlloc) sysInit() {
+ // Reserve memory for each level. This will get mapped in
+ // as R/W by setArenas.
+ for l, shift := range levelShift {
+ entries := 1 << (heapAddrBits - shift)
+
+ // Reserve b bytes of memory anywhere in the address space.
+ b := alignUp(uintptr(entries)*pallocSumBytes, physPageSize)
+ r := sysReserve(nil, b)
+ if r == nil {
+ throw("failed to reserve page summary memory")
+ }
+
+ // Put this reservation into a slice.
+ sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
+ s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+ }
+}
+
+// sysGrow performs architecture-dependent operations on heap
+// growth for the page allocator, such as mapping in new memory
+// for summaries. It also updates the length of the slices in
+// s.summary.
+//
+// base is the base of the newly-added heap memory and limit is
+// the first address past the end of the newly-added heap memory.
+// Both must be aligned to pallocChunkBytes.
+//
+// The caller must update s.start and s.end after calling sysGrow.
+func (s *pageAlloc) sysGrow(base, limit uintptr) {
+ if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+ print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+ throw("sysGrow bounds not aligned to pallocChunkBytes")
+ }
+
+ // addrRangeToSummaryRange converts a range of addresses into a range
+ // of summary indices which must be mapped to support those addresses
+ // in the summary range.
+ addrRangeToSummaryRange := func(level int, r addrRange) (int, int) {
+ sumIdxBase, sumIdxLimit := addrsToSummaryRange(level, r.base, r.limit)
+ return blockAlignSummaryRange(level, sumIdxBase, sumIdxLimit)
+ }
+
+ // summaryRangeToSumAddrRange converts a range of indices in any
+ // level of s.summary into page-aligned addresses which cover that
+ // range of indices.
+ summaryRangeToSumAddrRange := func(level, sumIdxBase, sumIdxLimit int) addrRange {
+ baseOffset := alignDown(uintptr(sumIdxBase)*pallocSumBytes, physPageSize)
+ limitOffset := alignUp(uintptr(sumIdxLimit)*pallocSumBytes, physPageSize)
+ base := unsafe.Pointer(&s.summary[level][0])
+ return addrRange{
+ uintptr(add(base, baseOffset)),
+ uintptr(add(base, limitOffset)),
+ }
+ }
+
+ // addrRangeToSumAddrRange is a convienience function that converts
+ // an address range r to the address range of the given summary level
+ // that stores the summaries for r.
+ addrRangeToSumAddrRange := func(level int, r addrRange) addrRange {
+ sumIdxBase, sumIdxLimit := addrRangeToSummaryRange(level, r)
+ return summaryRangeToSumAddrRange(level, sumIdxBase, sumIdxLimit)
+ }
+
+ // Find the first inUse index which is strictly greater than base.
+ //
+ // Because this function will never be asked remap the same memory
+ // twice, this index is effectively the index at which we would insert
+ // this new growth, and base will never overlap/be contained within
+ // any existing range.
+ //
+ // This will be used to look at what memory in the summary array is already
+ // mapped before and after this new range.
+ inUseIndex := s.inUse.findSucc(base)
+
+ // Walk up the radix tree and map summaries in as needed.
+ for l := range s.summary {
+ // Figure out what part of the summary array this new address space needs.
+ needIdxBase, needIdxLimit := addrRangeToSummaryRange(l, addrRange{base, limit})
+
+ // Update the summary slices with a new upper-bound. This ensures
+ // we get tight bounds checks on at least the top bound.
+ //
+ // We must do this regardless of whether we map new memory.
+ if needIdxLimit > len(s.summary[l]) {
+ s.summary[l] = s.summary[l][:needIdxLimit]
+ }
+
+ // Compute the needed address range in the summary array for level l.
+ need := summaryRangeToSumAddrRange(l, needIdxBase, needIdxLimit)
+
+ // Prune need down to what needs to be newly mapped. Some parts of it may
+ // already be mapped by what inUse describes due to page alignment requirements
+ // for mapping. prune's invariants are guaranteed by the fact that this
+ // function will never be asked to remap the same memory twice.
+ if inUseIndex > 0 {
+ need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex-1]))
+ }
+ if inUseIndex < len(s.inUse.ranges) {
+ need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex]))
+ }
+ // It's possible that after our pruning above, there's nothing new to map.
+ if need.size() == 0 {
+ continue
+ }
+
+ // Map and commit need.
+ sysMap(unsafe.Pointer(need.base), need.size(), s.sysStat)
+ sysUsed(unsafe.Pointer(need.base), need.size())
+ }
+}
diff --git a/libgo/go/runtime/mpagealloc_test.go b/libgo/go/runtime/mpagealloc_test.go
new file mode 100644
index 0000000..6c48296
--- /dev/null
+++ b/libgo/go/runtime/mpagealloc_test.go
@@ -0,0 +1,921 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ . "runtime"
+ "testing"
+)
+
+func checkPageAlloc(t *testing.T, want, got *PageAlloc) {
+ // Ensure start and end are correct.
+ wantStart, wantEnd := want.Bounds()
+ gotStart, gotEnd := got.Bounds()
+ if gotStart != wantStart {
+ t.Fatalf("start values not equal: got %d, want %d", gotStart, wantStart)
+ }
+ if gotEnd != wantEnd {
+ t.Fatalf("end values not equal: got %d, want %d", gotEnd, wantEnd)
+ }
+
+ for i := gotStart; i < gotEnd; i++ {
+ // Check the bitmaps. Note that we may have nil data.
+ gb, wb := got.PallocData(i), want.PallocData(i)
+ if gb == nil && wb == nil {
+ continue
+ }
+ if (gb == nil && wb != nil) || (gb != nil && wb == nil) {
+ t.Errorf("chunk %d nilness mismatch", i)
+ }
+ if !checkPallocBits(t, gb.PallocBits(), wb.PallocBits()) {
+ t.Logf("in chunk %d (mallocBits)", i)
+ }
+ if !checkPallocBits(t, gb.Scavenged(), wb.Scavenged()) {
+ t.Logf("in chunk %d (scavenged)", i)
+ }
+ }
+ // TODO(mknyszek): Verify summaries too?
+}
+
+func TestPageAllocGrow(t *testing.T) {
+ type test struct {
+ chunks []ChunkIdx
+ inUse []AddrRange
+ }
+ tests := map[string]test{
+ "One": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ },
+ },
+ "Contiguous2": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+ },
+ },
+ "Contiguous5": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ BaseChunkIdx + 2,
+ BaseChunkIdx + 3,
+ BaseChunkIdx + 4,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)},
+ },
+ },
+ "Discontiguous": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 2,
+ BaseChunkIdx + 4,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ {PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
+ {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+ },
+ },
+ "Mixed": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ BaseChunkIdx + 2,
+ BaseChunkIdx + 4,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)},
+ {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+ },
+ },
+ "WildlyDiscontiguous": {
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 1,
+ BaseChunkIdx + 0x10,
+ BaseChunkIdx + 0x21,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)},
+ {PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)},
+ {PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)},
+ },
+ },
+ "ManyDiscontiguous": {
+ // The initial cap is 16. Test 33 ranges, to exercise the growth path (twice).
+ chunks: []ChunkIdx{
+ BaseChunkIdx, BaseChunkIdx + 2, BaseChunkIdx + 4, BaseChunkIdx + 6,
+ BaseChunkIdx + 8, BaseChunkIdx + 10, BaseChunkIdx + 12, BaseChunkIdx + 14,
+ BaseChunkIdx + 16, BaseChunkIdx + 18, BaseChunkIdx + 20, BaseChunkIdx + 22,
+ BaseChunkIdx + 24, BaseChunkIdx + 26, BaseChunkIdx + 28, BaseChunkIdx + 30,
+ BaseChunkIdx + 32, BaseChunkIdx + 34, BaseChunkIdx + 36, BaseChunkIdx + 38,
+ BaseChunkIdx + 40, BaseChunkIdx + 42, BaseChunkIdx + 44, BaseChunkIdx + 46,
+ BaseChunkIdx + 48, BaseChunkIdx + 50, BaseChunkIdx + 52, BaseChunkIdx + 54,
+ BaseChunkIdx + 56, BaseChunkIdx + 58, BaseChunkIdx + 60, BaseChunkIdx + 62,
+ BaseChunkIdx + 64,
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ {PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)},
+ {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)},
+ {PageBase(BaseChunkIdx+6, 0), PageBase(BaseChunkIdx+7, 0)},
+ {PageBase(BaseChunkIdx+8, 0), PageBase(BaseChunkIdx+9, 0)},
+ {PageBase(BaseChunkIdx+10, 0), PageBase(BaseChunkIdx+11, 0)},
+ {PageBase(BaseChunkIdx+12, 0), PageBase(BaseChunkIdx+13, 0)},
+ {PageBase(BaseChunkIdx+14, 0), PageBase(BaseChunkIdx+15, 0)},
+ {PageBase(BaseChunkIdx+16, 0), PageBase(BaseChunkIdx+17, 0)},
+ {PageBase(BaseChunkIdx+18, 0), PageBase(BaseChunkIdx+19, 0)},
+ {PageBase(BaseChunkIdx+20, 0), PageBase(BaseChunkIdx+21, 0)},
+ {PageBase(BaseChunkIdx+22, 0), PageBase(BaseChunkIdx+23, 0)},
+ {PageBase(BaseChunkIdx+24, 0), PageBase(BaseChunkIdx+25, 0)},
+ {PageBase(BaseChunkIdx+26, 0), PageBase(BaseChunkIdx+27, 0)},
+ {PageBase(BaseChunkIdx+28, 0), PageBase(BaseChunkIdx+29, 0)},
+ {PageBase(BaseChunkIdx+30, 0), PageBase(BaseChunkIdx+31, 0)},
+ {PageBase(BaseChunkIdx+32, 0), PageBase(BaseChunkIdx+33, 0)},
+ {PageBase(BaseChunkIdx+34, 0), PageBase(BaseChunkIdx+35, 0)},
+ {PageBase(BaseChunkIdx+36, 0), PageBase(BaseChunkIdx+37, 0)},
+ {PageBase(BaseChunkIdx+38, 0), PageBase(BaseChunkIdx+39, 0)},
+ {PageBase(BaseChunkIdx+40, 0), PageBase(BaseChunkIdx+41, 0)},
+ {PageBase(BaseChunkIdx+42, 0), PageBase(BaseChunkIdx+43, 0)},
+ {PageBase(BaseChunkIdx+44, 0), PageBase(BaseChunkIdx+45, 0)},
+ {PageBase(BaseChunkIdx+46, 0), PageBase(BaseChunkIdx+47, 0)},
+ {PageBase(BaseChunkIdx+48, 0), PageBase(BaseChunkIdx+49, 0)},
+ {PageBase(BaseChunkIdx+50, 0), PageBase(BaseChunkIdx+51, 0)},
+ {PageBase(BaseChunkIdx+52, 0), PageBase(BaseChunkIdx+53, 0)},
+ {PageBase(BaseChunkIdx+54, 0), PageBase(BaseChunkIdx+55, 0)},
+ {PageBase(BaseChunkIdx+56, 0), PageBase(BaseChunkIdx+57, 0)},
+ {PageBase(BaseChunkIdx+58, 0), PageBase(BaseChunkIdx+59, 0)},
+ {PageBase(BaseChunkIdx+60, 0), PageBase(BaseChunkIdx+61, 0)},
+ {PageBase(BaseChunkIdx+62, 0), PageBase(BaseChunkIdx+63, 0)},
+ {PageBase(BaseChunkIdx+64, 0), PageBase(BaseChunkIdx+65, 0)},
+ },
+ },
+ }
+ if PageAlloc64Bit != 0 {
+ tests["ExtremelyDiscontiguous"] = test{
+ chunks: []ChunkIdx{
+ BaseChunkIdx,
+ BaseChunkIdx + 0x100000, // constant translates to O(TiB)
+ },
+ inUse: []AddrRange{
+ {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)},
+ {PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)},
+ },
+ }
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ // By creating a new pageAlloc, we will
+ // grow it for each chunk defined in x.
+ x := make(map[ChunkIdx][]BitRange)
+ for _, c := range v.chunks {
+ x[c] = []BitRange{}
+ }
+ b := NewPageAlloc(x, nil)
+ defer FreePageAlloc(b)
+
+ got := b.InUse()
+ want := v.inUse
+
+ // Check for mismatches.
+ if len(got) != len(want) {
+ t.Fail()
+ } else {
+ for i := range want {
+ if want[i] != got[i] {
+ t.Fail()
+ break
+ }
+ }
+ }
+ if t.Failed() {
+ t.Logf("found inUse mismatch")
+ t.Logf("got:")
+ for i, r := range got {
+ t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+ }
+ t.Logf("want:")
+ for i, r := range want {
+ t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit)
+ }
+ }
+ })
+ }
+}
+
+func TestPageAllocAlloc(t *testing.T) {
+ type hit struct {
+ npages, base, scav uintptr
+ }
+ tests := map[string]struct {
+ scav map[ChunkIdx][]BitRange
+ before map[ChunkIdx][]BitRange
+ after map[ChunkIdx][]BitRange
+ hits []hit
+ }{
+ "AllFree1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 1}, {2, 2}},
+ },
+ hits: []hit{
+ {1, PageBase(BaseChunkIdx, 0), PageSize},
+ {1, PageBase(BaseChunkIdx, 1), 0},
+ {1, PageBase(BaseChunkIdx, 2), PageSize},
+ {1, PageBase(BaseChunkIdx, 3), PageSize},
+ {1, PageBase(BaseChunkIdx, 4), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 5}},
+ },
+ },
+ "ManyArena1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages - 1}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ hits: []hit{
+ {1, PageBase(BaseChunkIdx+2, PallocChunkPages-1), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "NotContiguous1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 0}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, PallocChunkPages}},
+ },
+ hits: []hit{
+ {1, PageBase(BaseChunkIdx+0xff, 0), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 1}},
+ },
+ },
+ "AllFree2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 3}, {7, 1}},
+ },
+ hits: []hit{
+ {2, PageBase(BaseChunkIdx, 0), 2 * PageSize},
+ {2, PageBase(BaseChunkIdx, 2), PageSize},
+ {2, PageBase(BaseChunkIdx, 4), 0},
+ {2, PageBase(BaseChunkIdx, 6), PageSize},
+ {2, PageBase(BaseChunkIdx, 8), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 10}},
+ },
+ },
+ "Straddle2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages - 1}},
+ BaseChunkIdx + 1: {{1, PallocChunkPages - 1}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{PallocChunkPages - 1, 1}},
+ BaseChunkIdx + 1: {},
+ },
+ hits: []hit{
+ {2, PageBase(BaseChunkIdx, PallocChunkPages-1), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "AllFree5": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 8}, {9, 1}, {17, 5}},
+ },
+ hits: []hit{
+ {5, PageBase(BaseChunkIdx, 0), 5 * PageSize},
+ {5, PageBase(BaseChunkIdx, 5), 4 * PageSize},
+ {5, PageBase(BaseChunkIdx, 10), 0},
+ {5, PageBase(BaseChunkIdx, 15), 3 * PageSize},
+ {5, PageBase(BaseChunkIdx, 20), 2 * PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 25}},
+ },
+ },
+ "AllFree64": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{21, 1}, {63, 65}},
+ },
+ hits: []hit{
+ {64, PageBase(BaseChunkIdx, 0), 2 * PageSize},
+ {64, PageBase(BaseChunkIdx, 64), 64 * PageSize},
+ {64, PageBase(BaseChunkIdx, 128), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 192}},
+ },
+ },
+ "AllFree65": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{129, 1}},
+ },
+ hits: []hit{
+ {65, PageBase(BaseChunkIdx, 0), 0},
+ {65, PageBase(BaseChunkIdx, 65), PageSize},
+ {65, PageBase(BaseChunkIdx, 130), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 195}},
+ },
+ },
+ // TODO(mknyszek): Add tests close to the chunk size.
+ "ExhaustPallocChunkPages-3": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{10, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages - 3, PageBase(BaseChunkIdx, 0), PageSize},
+ {PallocChunkPages - 3, 0, 0},
+ {1, PageBase(BaseChunkIdx, PallocChunkPages-3), 0},
+ {2, PageBase(BaseChunkIdx, PallocChunkPages-2), 0},
+ {1, 0, 0},
+ {PallocChunkPages - 3, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ "AllFreePallocChunkPages": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 1}, {PallocChunkPages - 1, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages, PageBase(BaseChunkIdx, 0), 2 * PageSize},
+ {PallocChunkPages, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {{3, 100}},
+ },
+ hits: []hit{
+ {PallocChunkPages, PageBase(BaseChunkIdx, PallocChunkPages/2), 100 * PageSize},
+ {PallocChunkPages, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages+1": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ hits: []hit{
+ {PallocChunkPages + 1, PageBase(BaseChunkIdx, PallocChunkPages/2), (PallocChunkPages + 1) * PageSize},
+ {PallocChunkPages, 0, 0},
+ {1, PageBase(BaseChunkIdx+1, PallocChunkPages/2+1), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages/2 + 2}},
+ },
+ },
+ "AllFreePallocChunkPages*2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ hits: []hit{
+ {PallocChunkPages * 2, PageBase(BaseChunkIdx, 0), 0},
+ {PallocChunkPages * 2, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ },
+ "NotContiguousPallocChunkPages*2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 0x40: {},
+ BaseChunkIdx + 0x41: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0x40: {},
+ BaseChunkIdx + 0x41: {},
+ },
+ hits: []hit{
+ {PallocChunkPages * 2, PageBase(BaseChunkIdx+0x40, 0), 0},
+ {21, PageBase(BaseChunkIdx, 0), 21 * PageSize},
+ {1, PageBase(BaseChunkIdx, 21), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 22}},
+ BaseChunkIdx + 0x40: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0x41: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages*2": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 7}},
+ BaseChunkIdx + 1: {{3, 5}, {121, 10}},
+ BaseChunkIdx + 2: {{PallocChunkPages/2 + 12, 2}},
+ },
+ hits: []hit{
+ {PallocChunkPages * 2, PageBase(BaseChunkIdx, PallocChunkPages/2), 15 * PageSize},
+ {PallocChunkPages * 2, 0, 0},
+ {1, 0, 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "StraddlePallocChunkPages*5/4": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages * 3 / 4}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages * 3 / 4}},
+ BaseChunkIdx + 3: {{0, 0}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{PallocChunkPages / 2, PallocChunkPages/4 + 1}},
+ BaseChunkIdx + 2: {{PallocChunkPages / 3, 1}},
+ BaseChunkIdx + 3: {{PallocChunkPages * 2 / 3, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages * 5 / 4, PageBase(BaseChunkIdx+2, PallocChunkPages*3/4), PageSize},
+ {PallocChunkPages * 5 / 4, 0, 0},
+ {1, PageBase(BaseChunkIdx+1, PallocChunkPages*3/4), PageSize},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages*3/4 + 1}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ BaseChunkIdx + 3: {{0, PallocChunkPages}},
+ },
+ },
+ "AllFreePallocChunkPages*7+5": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ BaseChunkIdx + 3: {},
+ BaseChunkIdx + 4: {},
+ BaseChunkIdx + 5: {},
+ BaseChunkIdx + 6: {},
+ BaseChunkIdx + 7: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{50, 1}},
+ BaseChunkIdx + 1: {{31, 1}},
+ BaseChunkIdx + 2: {{7, 1}},
+ BaseChunkIdx + 3: {{200, 1}},
+ BaseChunkIdx + 4: {{3, 1}},
+ BaseChunkIdx + 5: {{51, 1}},
+ BaseChunkIdx + 6: {{20, 1}},
+ BaseChunkIdx + 7: {{1, 1}},
+ },
+ hits: []hit{
+ {PallocChunkPages*7 + 5, PageBase(BaseChunkIdx, 0), 8 * PageSize},
+ {PallocChunkPages*7 + 5, 0, 0},
+ {1, PageBase(BaseChunkIdx+7, 5), 0},
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ BaseChunkIdx + 3: {{0, PallocChunkPages}},
+ BaseChunkIdx + 4: {{0, PallocChunkPages}},
+ BaseChunkIdx + 5: {{0, PallocChunkPages}},
+ BaseChunkIdx + 6: {{0, PallocChunkPages}},
+ BaseChunkIdx + 7: {{0, 6}},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.before, v.scav)
+ defer FreePageAlloc(b)
+
+ for iter, i := range v.hits {
+ a, s := b.Alloc(i.npages)
+ if a != i.base {
+ t.Fatalf("bad alloc #%d: want base 0x%x, got 0x%x", iter+1, i.base, a)
+ }
+ if s != i.scav {
+ t.Fatalf("bad alloc #%d: want scav %d, got %d", iter+1, i.scav, s)
+ }
+ }
+ want := NewPageAlloc(v.after, v.scav)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
+
+func TestPageAllocExhaust(t *testing.T) {
+ for _, npages := range []uintptr{1, 2, 3, 4, 5, 8, 16, 64, 1024, 1025, 2048, 2049} {
+ npages := npages
+ t.Run(fmt.Sprintf("%d", npages), func(t *testing.T) {
+ // Construct b.
+ bDesc := make(map[ChunkIdx][]BitRange)
+ for i := ChunkIdx(0); i < 4; i++ {
+ bDesc[BaseChunkIdx+i] = []BitRange{}
+ }
+ b := NewPageAlloc(bDesc, nil)
+ defer FreePageAlloc(b)
+
+ // Allocate into b with npages until we've exhausted the heap.
+ nAlloc := (PallocChunkPages * 4) / int(npages)
+ for i := 0; i < nAlloc; i++ {
+ addr := PageBase(BaseChunkIdx, uint(i)*uint(npages))
+ if a, _ := b.Alloc(npages); a != addr {
+ t.Fatalf("bad alloc #%d: want 0x%x, got 0x%x", i+1, addr, a)
+ }
+ }
+
+ // Check to make sure the next allocation fails.
+ if a, _ := b.Alloc(npages); a != 0 {
+ t.Fatalf("bad alloc #%d: want 0, got 0x%x", nAlloc, a)
+ }
+
+ // Construct what we want the heap to look like now.
+ allocPages := nAlloc * int(npages)
+ wantDesc := make(map[ChunkIdx][]BitRange)
+ for i := ChunkIdx(0); i < 4; i++ {
+ if allocPages >= PallocChunkPages {
+ wantDesc[BaseChunkIdx+i] = []BitRange{{0, PallocChunkPages}}
+ allocPages -= PallocChunkPages
+ } else if allocPages > 0 {
+ wantDesc[BaseChunkIdx+i] = []BitRange{{0, uint(allocPages)}}
+ allocPages = 0
+ } else {
+ wantDesc[BaseChunkIdx+i] = []BitRange{}
+ }
+ }
+ want := NewPageAlloc(wantDesc, nil)
+ defer FreePageAlloc(want)
+
+ // Check to make sure the heap b matches what we want.
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
+
+func TestPageAllocFree(t *testing.T) {
+ tests := map[string]struct {
+ before map[ChunkIdx][]BitRange
+ after map[ChunkIdx][]BitRange
+ npages uintptr
+ frees []uintptr
+ }{
+ "Free1": {
+ npages: 1,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 1),
+ PageBase(BaseChunkIdx, 2),
+ PageBase(BaseChunkIdx, 3),
+ PageBase(BaseChunkIdx, 4),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{5, PallocChunkPages - 5}},
+ },
+ },
+ "ManyArena1": {
+ npages: 1,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ PageBase(BaseChunkIdx+1, 0),
+ PageBase(BaseChunkIdx+2, PallocChunkPages-1),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}, {PallocChunkPages/2 + 1, PallocChunkPages/2 - 1}},
+ BaseChunkIdx + 1: {{1, PallocChunkPages - 1}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages - 1}},
+ },
+ },
+ "Free2": {
+ npages: 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 2),
+ PageBase(BaseChunkIdx, 4),
+ PageBase(BaseChunkIdx, 6),
+ PageBase(BaseChunkIdx, 8),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{10, PallocChunkPages - 10}},
+ },
+ },
+ "Straddle2": {
+ npages: 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{PallocChunkPages - 1, 1}},
+ BaseChunkIdx + 1: {{0, 1}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages-1),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ },
+ "Free5": {
+ npages: 5,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 5),
+ PageBase(BaseChunkIdx, 10),
+ PageBase(BaseChunkIdx, 15),
+ PageBase(BaseChunkIdx, 20),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{25, PallocChunkPages - 25}},
+ },
+ },
+ "Free64": {
+ npages: 64,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 64),
+ PageBase(BaseChunkIdx, 128),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{192, PallocChunkPages - 192}},
+ },
+ },
+ "Free65": {
+ npages: 65,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ PageBase(BaseChunkIdx, 65),
+ PageBase(BaseChunkIdx, 130),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{195, PallocChunkPages - 195}},
+ },
+ },
+ "FreePallocChunkPages": {
+ npages: PallocChunkPages,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ },
+ "StraddlePallocChunkPages": {
+ npages: PallocChunkPages,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages / 2}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ },
+ "StraddlePallocChunkPages+1": {
+ npages: PallocChunkPages + 1,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {{PallocChunkPages/2 + 1, PallocChunkPages/2 - 1}},
+ },
+ },
+ "FreePallocChunkPages*2": {
+ npages: PallocChunkPages * 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ },
+ },
+ "StraddlePallocChunkPages*2": {
+ npages: PallocChunkPages * 2,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, PallocChunkPages/2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages / 2}},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {{PallocChunkPages / 2, PallocChunkPages / 2}},
+ },
+ },
+ "AllFreePallocChunkPages*7+5": {
+ npages: PallocChunkPages*7 + 5,
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ BaseChunkIdx + 3: {{0, PallocChunkPages}},
+ BaseChunkIdx + 4: {{0, PallocChunkPages}},
+ BaseChunkIdx + 5: {{0, PallocChunkPages}},
+ BaseChunkIdx + 6: {{0, PallocChunkPages}},
+ BaseChunkIdx + 7: {{0, PallocChunkPages}},
+ },
+ frees: []uintptr{
+ PageBase(BaseChunkIdx, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ BaseChunkIdx + 3: {},
+ BaseChunkIdx + 4: {},
+ BaseChunkIdx + 5: {},
+ BaseChunkIdx + 6: {},
+ BaseChunkIdx + 7: {{5, PallocChunkPages - 5}},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.before, nil)
+ defer FreePageAlloc(b)
+
+ for _, addr := range v.frees {
+ b.Free(addr, v.npages)
+ }
+ want := NewPageAlloc(v.after, nil)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
+
+func TestPageAllocAllocAndFree(t *testing.T) {
+ type hit struct {
+ alloc bool
+ npages uintptr
+ base uintptr
+ }
+ tests := map[string]struct {
+ init map[ChunkIdx][]BitRange
+ hits []hit
+ }{
+ // TODO(mknyszek): Write more tests here.
+ "Chunks8": {
+ init: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ BaseChunkIdx + 1: {},
+ BaseChunkIdx + 2: {},
+ BaseChunkIdx + 3: {},
+ BaseChunkIdx + 4: {},
+ BaseChunkIdx + 5: {},
+ BaseChunkIdx + 6: {},
+ BaseChunkIdx + 7: {},
+ },
+ hits: []hit{
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {false, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {false, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {false, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ {true, 1, PageBase(BaseChunkIdx, 0)},
+ {false, 1, PageBase(BaseChunkIdx, 0)},
+ {true, PallocChunkPages * 8, PageBase(BaseChunkIdx, 0)},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.init, nil)
+ defer FreePageAlloc(b)
+
+ for iter, i := range v.hits {
+ if i.alloc {
+ if a, _ := b.Alloc(i.npages); a != i.base {
+ t.Fatalf("bad alloc #%d: want 0x%x, got 0x%x", iter+1, i.base, a)
+ }
+ } else {
+ b.Free(i.base, i.npages)
+ }
+ }
+ })
+ }
+}
diff --git a/libgo/go/runtime/mpagecache.go b/libgo/go/runtime/mpagecache.go
new file mode 100644
index 0000000..9fc338b
--- /dev/null
+++ b/libgo/go/runtime/mpagecache.go
@@ -0,0 +1,156 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+const pageCachePages = 8 * unsafe.Sizeof(pageCache{}.cache)
+
+// pageCache represents a per-p cache of pages the allocator can
+// allocate from without a lock. More specifically, it represents
+// a pageCachePages*pageSize chunk of memory with 0 or more free
+// pages in it.
+type pageCache struct {
+ base uintptr // base address of the chunk
+ cache uint64 // 64-bit bitmap representing free pages (1 means free)
+ scav uint64 // 64-bit bitmap representing scavenged pages (1 means scavenged)
+}
+
+// empty returns true if the pageCache has any free pages, and false
+// otherwise.
+func (c *pageCache) empty() bool {
+ return c.cache == 0
+}
+
+// alloc allocates npages from the page cache and is the main entry
+// point for allocation.
+//
+// Returns a base address and the amount of scavenged memory in the
+// allocated region in bytes.
+//
+// Returns a base address of zero on failure, in which case the
+// amount of scavenged memory should be ignored.
+func (c *pageCache) alloc(npages uintptr) (uintptr, uintptr) {
+ if c.cache == 0 {
+ return 0, 0
+ }
+ if npages == 1 {
+ i := uintptr(sys.TrailingZeros64(c.cache))
+ scav := (c.scav >> i) & 1
+ c.cache &^= 1 << i // set bit to mark in-use
+ c.scav &^= 1 << i // clear bit to mark unscavenged
+ return c.base + i*pageSize, uintptr(scav) * pageSize
+ }
+ return c.allocN(npages)
+}
+
+// allocN is a helper which attempts to allocate npages worth of pages
+// from the cache. It represents the general case for allocating from
+// the page cache.
+//
+// Returns a base address and the amount of scavenged memory in the
+// allocated region in bytes.
+func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
+ i := findBitRange64(c.cache, uint(npages))
+ if i >= 64 {
+ return 0, 0
+ }
+ mask := ((uint64(1) << npages) - 1) << i
+ scav := sys.OnesCount64(c.scav & mask)
+ c.cache &^= mask // mark in-use bits
+ c.scav &^= mask // clear scavenged bits
+ return c.base + uintptr(i*pageSize), uintptr(scav) * pageSize
+}
+
+// flush empties out unallocated free pages in the given cache
+// into s. Then, it clears the cache, such that empty returns
+// true.
+//
+// s.mheapLock must be held or the world must be stopped.
+func (c *pageCache) flush(s *pageAlloc) {
+ if c.empty() {
+ return
+ }
+ ci := chunkIndex(c.base)
+ pi := chunkPageIndex(c.base)
+
+ // This method is called very infrequently, so just do the
+ // slower, safer thing by iterating over each bit individually.
+ for i := uint(0); i < 64; i++ {
+ if c.cache&(1<<i) != 0 {
+ s.chunkOf(ci).free1(pi + i)
+ }
+ if c.scav&(1<<i) != 0 {
+ s.chunkOf(ci).scavenged.setRange(pi+i, 1)
+ }
+ }
+ // Since this is a lot like a free, we need to make sure
+ // we update the searchAddr just like free does.
+ if s.compareSearchAddrTo(c.base) < 0 {
+ s.searchAddr = c.base
+ }
+ s.update(c.base, pageCachePages, false, false)
+ *c = pageCache{}
+}
+
+// allocToCache acquires a pageCachePages-aligned chunk of free pages which
+// may not be contiguous, and returns a pageCache structure which owns the
+// chunk.
+//
+// s.mheapLock must be held.
+func (s *pageAlloc) allocToCache() pageCache {
+ // If the searchAddr refers to a region which has a higher address than
+ // any known chunk, then we know we're out of memory.
+ if chunkIndex(s.searchAddr) >= s.end {
+ return pageCache{}
+ }
+ c := pageCache{}
+ ci := chunkIndex(s.searchAddr) // chunk index
+ if s.summary[len(s.summary)-1][ci] != 0 {
+ // Fast path: there's free pages at or near the searchAddr address.
+ chunk := s.chunkOf(ci)
+ j, _ := chunk.find(1, chunkPageIndex(s.searchAddr))
+ if j < 0 {
+ throw("bad summary data")
+ }
+ c = pageCache{
+ base: chunkBase(ci) + alignDown(uintptr(j), 64)*pageSize,
+ cache: ^chunk.pages64(j),
+ scav: chunk.scavenged.block64(j),
+ }
+ } else {
+ // Slow path: the searchAddr address had nothing there, so go find
+ // the first free page the slow way.
+ addr, _ := s.find(1)
+ if addr == 0 {
+ // We failed to find adequate free space, so mark the searchAddr as OoM
+ // and return an empty pageCache.
+ s.searchAddr = maxSearchAddr
+ return pageCache{}
+ }
+ ci := chunkIndex(addr)
+ chunk := s.chunkOf(ci)
+ c = pageCache{
+ base: alignDown(addr, 64*pageSize),
+ cache: ^chunk.pages64(chunkPageIndex(addr)),
+ scav: chunk.scavenged.block64(chunkPageIndex(addr)),
+ }
+ }
+
+ // Set the bits as allocated and clear the scavenged bits.
+ s.allocRange(c.base, pageCachePages)
+
+ // Update as an allocation, but note that it's not contiguous.
+ s.update(c.base, pageCachePages, false, true)
+
+ // We're always searching for the first free page, and we always know the
+ // up to pageCache size bits will be allocated, so we can always move the
+ // searchAddr past the cache.
+ s.searchAddr = c.base + pageSize*pageCachePages
+ return c
+}
diff --git a/libgo/go/runtime/mpagecache_test.go b/libgo/go/runtime/mpagecache_test.go
new file mode 100644
index 0000000..6fdaa04
--- /dev/null
+++ b/libgo/go/runtime/mpagecache_test.go
@@ -0,0 +1,364 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "math/rand"
+ . "runtime"
+ "testing"
+)
+
+func checkPageCache(t *testing.T, got, want PageCache) {
+ if got.Base() != want.Base() {
+ t.Errorf("bad pageCache base: got 0x%x, want 0x%x", got.Base(), want.Base())
+ }
+ if got.Cache() != want.Cache() {
+ t.Errorf("bad pageCache bits: got %016x, want %016x", got.Base(), want.Base())
+ }
+ if got.Scav() != want.Scav() {
+ t.Errorf("bad pageCache scav: got %016x, want %016x", got.Scav(), want.Scav())
+ }
+}
+
+func TestPageCacheAlloc(t *testing.T) {
+ base := PageBase(BaseChunkIdx, 0)
+ type hit struct {
+ npages uintptr
+ base uintptr
+ scav uintptr
+ }
+ tests := map[string]struct {
+ cache PageCache
+ hits []hit
+ }{
+ "Empty": {
+ cache: NewPageCache(base, 0, 0),
+ hits: []hit{
+ {1, 0, 0},
+ {2, 0, 0},
+ {3, 0, 0},
+ {4, 0, 0},
+ {5, 0, 0},
+ {11, 0, 0},
+ {12, 0, 0},
+ {16, 0, 0},
+ {27, 0, 0},
+ {32, 0, 0},
+ {43, 0, 0},
+ {57, 0, 0},
+ {64, 0, 0},
+ {121, 0, 0},
+ },
+ },
+ "Lo1": {
+ cache: NewPageCache(base, 0x1, 0x1),
+ hits: []hit{
+ {1, base, PageSize},
+ {1, 0, 0},
+ {10, 0, 0},
+ },
+ },
+ "Hi1": {
+ cache: NewPageCache(base, 0x1<<63, 0x1),
+ hits: []hit{
+ {1, base + 63*PageSize, 0},
+ {1, 0, 0},
+ {10, 0, 0},
+ },
+ },
+ "Swiss1": {
+ cache: NewPageCache(base, 0x20005555, 0x5505),
+ hits: []hit{
+ {2, 0, 0},
+ {1, base, PageSize},
+ {1, base + 2*PageSize, PageSize},
+ {1, base + 4*PageSize, 0},
+ {1, base + 6*PageSize, 0},
+ {1, base + 8*PageSize, PageSize},
+ {1, base + 10*PageSize, PageSize},
+ {1, base + 12*PageSize, PageSize},
+ {1, base + 14*PageSize, PageSize},
+ {1, base + 29*PageSize, 0},
+ {1, 0, 0},
+ {10, 0, 0},
+ },
+ },
+ "Lo2": {
+ cache: NewPageCache(base, 0x3, 0x2<<62),
+ hits: []hit{
+ {2, base, 0},
+ {2, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "Hi2": {
+ cache: NewPageCache(base, 0x3<<62, 0x3<<62),
+ hits: []hit{
+ {2, base + 62*PageSize, 2 * PageSize},
+ {2, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "Swiss2": {
+ cache: NewPageCache(base, 0x3333<<31, 0x3030<<31),
+ hits: []hit{
+ {2, base + 31*PageSize, 0},
+ {2, base + 35*PageSize, 2 * PageSize},
+ {2, base + 39*PageSize, 0},
+ {2, base + 43*PageSize, 2 * PageSize},
+ {2, 0, 0},
+ },
+ },
+ "Hi53": {
+ cache: NewPageCache(base, ((uint64(1)<<53)-1)<<10, ((uint64(1)<<16)-1)<<10),
+ hits: []hit{
+ {53, base + 10*PageSize, 16 * PageSize},
+ {53, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "Full53": {
+ cache: NewPageCache(base, ^uint64(0), ((uint64(1)<<16)-1)<<10),
+ hits: []hit{
+ {53, base, 16 * PageSize},
+ {53, 0, 0},
+ {1, base + 53*PageSize, 0},
+ },
+ },
+ "Full64": {
+ cache: NewPageCache(base, ^uint64(0), ^uint64(0)),
+ hits: []hit{
+ {64, base, 64 * PageSize},
+ {64, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ "FullMixed": {
+ cache: NewPageCache(base, ^uint64(0), ^uint64(0)),
+ hits: []hit{
+ {5, base, 5 * PageSize},
+ {7, base + 5*PageSize, 7 * PageSize},
+ {1, base + 12*PageSize, 1 * PageSize},
+ {23, base + 13*PageSize, 23 * PageSize},
+ {63, 0, 0},
+ {3, base + 36*PageSize, 3 * PageSize},
+ {3, base + 39*PageSize, 3 * PageSize},
+ {3, base + 42*PageSize, 3 * PageSize},
+ {12, base + 45*PageSize, 12 * PageSize},
+ {11, 0, 0},
+ {4, base + 57*PageSize, 4 * PageSize},
+ {4, 0, 0},
+ {6, 0, 0},
+ {36, 0, 0},
+ {2, base + 61*PageSize, 2 * PageSize},
+ {3, 0, 0},
+ {1, base + 63*PageSize, 1 * PageSize},
+ {4, 0, 0},
+ {2, 0, 0},
+ {62, 0, 0},
+ {1, 0, 0},
+ },
+ },
+ }
+ for name, test := range tests {
+ test := test
+ t.Run(name, func(t *testing.T) {
+ c := test.cache
+ for i, h := range test.hits {
+ b, s := c.Alloc(h.npages)
+ if b != h.base {
+ t.Fatalf("bad alloc base #%d: got 0x%x, want 0x%x", i, b, h.base)
+ }
+ if s != h.scav {
+ t.Fatalf("bad alloc scav #%d: got %d, want %d", i, s, h.scav)
+ }
+ }
+ })
+ }
+}
+
+func TestPageCacheFlush(t *testing.T) {
+ bits64ToBitRanges := func(bits uint64, base uint) []BitRange {
+ var ranges []BitRange
+ start, size := uint(0), uint(0)
+ for i := 0; i < 64; i++ {
+ if bits&(1<<i) != 0 {
+ if size == 0 {
+ start = uint(i) + base
+ }
+ size++
+ } else {
+ if size != 0 {
+ ranges = append(ranges, BitRange{start, size})
+ size = 0
+ }
+ }
+ }
+ if size != 0 {
+ ranges = append(ranges, BitRange{start, size})
+ }
+ return ranges
+ }
+ runTest := func(t *testing.T, base uint, cache, scav uint64) {
+ // Set up the before state.
+ beforeAlloc := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{base, 64}},
+ }
+ beforeScav := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ }
+ b := NewPageAlloc(beforeAlloc, beforeScav)
+ defer FreePageAlloc(b)
+
+ // Create and flush the cache.
+ c := NewPageCache(PageBase(BaseChunkIdx, base), cache, scav)
+ c.Flush(b)
+ if !c.Empty() {
+ t.Errorf("pageCache flush did not clear cache")
+ }
+
+ // Set up the expected after state.
+ afterAlloc := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: bits64ToBitRanges(^cache, base),
+ }
+ afterScav := map[ChunkIdx][]BitRange{
+ BaseChunkIdx: bits64ToBitRanges(scav, base),
+ }
+ want := NewPageAlloc(afterAlloc, afterScav)
+ defer FreePageAlloc(want)
+
+ // Check to see if it worked.
+ checkPageAlloc(t, want, b)
+ }
+
+ // Empty.
+ runTest(t, 0, 0, 0)
+
+ // Full.
+ runTest(t, 0, ^uint64(0), ^uint64(0))
+
+ // Random.
+ for i := 0; i < 100; i++ {
+ // Generate random valid base within a chunk.
+ base := uint(rand.Intn(PallocChunkPages/64)) * 64
+
+ // Generate random cache.
+ cache := rand.Uint64()
+ scav := rand.Uint64() & cache
+
+ // Run the test.
+ runTest(t, base, cache, scav)
+ }
+}
+
+func TestPageAllocAllocToCache(t *testing.T) {
+ tests := map[string]struct {
+ before map[ChunkIdx][]BitRange
+ scav map[ChunkIdx][]BitRange
+ hits []PageCache // expected base addresses and patterns
+ after map[ChunkIdx][]BitRange
+ }{
+ "AllFree": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{1, 1}, {64, 64}},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx, 0), ^uint64(0), 0x2),
+ NewPageCache(PageBase(BaseChunkIdx, 64), ^uint64(0), ^uint64(0)),
+ NewPageCache(PageBase(BaseChunkIdx, 128), ^uint64(0), 0),
+ NewPageCache(PageBase(BaseChunkIdx, 192), ^uint64(0), 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 256}},
+ },
+ },
+ "ManyArena": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages - 64}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx+2, PallocChunkPages-64), ^uint64(0), 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 1: {{0, PallocChunkPages}},
+ BaseChunkIdx + 2: {{0, PallocChunkPages}},
+ },
+ },
+ "NotContiguous": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 0}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{31, 67}},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx+0xff, 0), ^uint64(0), ((uint64(1)<<33)-1)<<31),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ BaseChunkIdx + 0xff: {{0, 64}},
+ },
+ },
+ "First": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 32}, {33, 31}, {96, 32}},
+ },
+ scav: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{1, 4}, {31, 5}, {66, 2}},
+ },
+ hits: []PageCache{
+ NewPageCache(PageBase(BaseChunkIdx, 0), 1<<32, 1<<32),
+ NewPageCache(PageBase(BaseChunkIdx, 64), (uint64(1)<<32)-1, 0x3<<2),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, 128}},
+ },
+ },
+ "Fail": {
+ before: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ hits: []PageCache{
+ NewPageCache(0, 0, 0),
+ NewPageCache(0, 0, 0),
+ NewPageCache(0, 0, 0),
+ },
+ after: map[ChunkIdx][]BitRange{
+ BaseChunkIdx: {{0, PallocChunkPages}},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := NewPageAlloc(v.before, v.scav)
+ defer FreePageAlloc(b)
+
+ for _, expect := range v.hits {
+ checkPageCache(t, b.AllocToCache(), expect)
+ if t.Failed() {
+ return
+ }
+ }
+ want := NewPageAlloc(v.after, v.scav)
+ defer FreePageAlloc(want)
+
+ checkPageAlloc(t, want, b)
+ })
+ }
+}
diff --git a/libgo/go/runtime/mpallocbits.go b/libgo/go/runtime/mpallocbits.go
new file mode 100644
index 0000000..9d01ff8
--- /dev/null
+++ b/libgo/go/runtime/mpallocbits.go
@@ -0,0 +1,394 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+)
+
+// pageBits is a bitmap representing one bit per page in a palloc chunk.
+type pageBits [pallocChunkPages / 64]uint64
+
+// get returns the value of the i'th bit in the bitmap.
+func (b *pageBits) get(i uint) uint {
+ return uint((b[i/64] >> (i % 64)) & 1)
+}
+
+// block64 returns the 64-bit aligned block of bits containing the i'th bit.
+func (b *pageBits) block64(i uint) uint64 {
+ return b[i/64]
+}
+
+// set sets bit i of pageBits.
+func (b *pageBits) set(i uint) {
+ b[i/64] |= 1 << (i % 64)
+}
+
+// setRange sets bits in the range [i, i+n).
+func (b *pageBits) setRange(i, n uint) {
+ _ = b[i/64]
+ if n == 1 {
+ // Fast path for the n == 1 case.
+ b.set(i)
+ return
+ }
+ // Set bits [i, j].
+ j := i + n - 1
+ if i/64 == j/64 {
+ b[i/64] |= ((uint64(1) << n) - 1) << (i % 64)
+ return
+ }
+ _ = b[j/64]
+ // Set leading bits.
+ b[i/64] |= ^uint64(0) << (i % 64)
+ for k := i/64 + 1; k < j/64; k++ {
+ b[k] = ^uint64(0)
+ }
+ // Set trailing bits.
+ b[j/64] |= (uint64(1) << (j%64 + 1)) - 1
+}
+
+// setAll sets all the bits of b.
+func (b *pageBits) setAll() {
+ for i := range b {
+ b[i] = ^uint64(0)
+ }
+}
+
+// clear clears bit i of pageBits.
+func (b *pageBits) clear(i uint) {
+ b[i/64] &^= 1 << (i % 64)
+}
+
+// clearRange clears bits in the range [i, i+n).
+func (b *pageBits) clearRange(i, n uint) {
+ _ = b[i/64]
+ if n == 1 {
+ // Fast path for the n == 1 case.
+ b.clear(i)
+ return
+ }
+ // Clear bits [i, j].
+ j := i + n - 1
+ if i/64 == j/64 {
+ b[i/64] &^= ((uint64(1) << n) - 1) << (i % 64)
+ return
+ }
+ _ = b[j/64]
+ // Clear leading bits.
+ b[i/64] &^= ^uint64(0) << (i % 64)
+ for k := i/64 + 1; k < j/64; k++ {
+ b[k] = 0
+ }
+ // Clear trailing bits.
+ b[j/64] &^= (uint64(1) << (j%64 + 1)) - 1
+}
+
+// clearAll frees all the bits of b.
+func (b *pageBits) clearAll() {
+ for i := range b {
+ b[i] = 0
+ }
+}
+
+// popcntRange counts the number of set bits in the
+// range [i, i+n).
+func (b *pageBits) popcntRange(i, n uint) (s uint) {
+ if n == 1 {
+ return uint((b[i/64] >> (i % 64)) & 1)
+ }
+ _ = b[i/64]
+ j := i + n - 1
+ if i/64 == j/64 {
+ return uint(sys.OnesCount64((b[i/64] >> (i % 64)) & ((1 << n) - 1)))
+ }
+ _ = b[j/64]
+ s += uint(sys.OnesCount64(b[i/64] >> (i % 64)))
+ for k := i/64 + 1; k < j/64; k++ {
+ s += uint(sys.OnesCount64(b[k]))
+ }
+ s += uint(sys.OnesCount64(b[j/64] & ((1 << (j%64 + 1)) - 1)))
+ return
+}
+
+// pallocBits is a bitmap that tracks page allocations for at most one
+// palloc chunk.
+//
+// The precise representation is an implementation detail, but for the
+// sake of documentation, 0s are free pages and 1s are allocated pages.
+type pallocBits pageBits
+
+// consec8tab is a table containing the number of consecutive
+// zero bits for any uint8 value.
+//
+// The table is generated by calling consec8(i) for each
+// possible uint8 value, which is defined as:
+//
+// // consec8 counts the maximum number of consecutive 0 bits
+// // in a uint8.
+// func consec8(n uint8) int {
+// n = ^n
+// i := 0
+// for n != 0 {
+// n &= (n << 1)
+// i++
+// }
+// return i
+// }
+var consec8tab = [256]uint{
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1,
+ 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1,
+ 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0,
+}
+
+// summarize returns a packed summary of the bitmap in pallocBits.
+func (b *pallocBits) summarize() pallocSum {
+ // TODO(mknyszek): There may be something more clever to be done
+ // here to make the summarize operation more efficient. For example,
+ // we can compute start and end with 64-bit wide operations easily,
+ // but max is a bit more complex. Perhaps there exists some way to
+ // leverage the 64-bit start and end to our advantage?
+ var start, max, end uint
+ for i := 0; i < len(b); i++ {
+ a := b[i]
+ for j := 0; j < 64; j += 8 {
+ k := uint8(a >> j)
+
+ // Compute start.
+ si := uint(sys.TrailingZeros8(k))
+ if start == uint(i*64+j) {
+ start += si
+ }
+
+ // Compute max.
+ if end+si > max {
+ max = end + si
+ }
+ if mi := consec8tab[k]; mi > max {
+ max = mi
+ }
+
+ // Compute end.
+ if k == 0 {
+ end += 8
+ } else {
+ end = uint(sys.LeadingZeros8(k))
+ }
+ }
+ }
+ return packPallocSum(start, max, end)
+}
+
+// find searches for npages contiguous free pages in pallocBits and returns
+// the index where that run starts, as well as the index of the first free page
+// it found in the search. searchIdx represents the first known free page and
+// where to begin the search from.
+//
+// If find fails to find any free space, it returns an index of ^uint(0) and
+// the new searchIdx should be ignored.
+//
+// The returned searchIdx is always the index of the first free page found
+// in this bitmap during the search, except if npages == 1, in which
+// case it will be the index just after the first free page, because the
+// index returned as the first result is assumed to be allocated and so
+// represents a minor optimization for that case.
+func (b *pallocBits) find(npages uintptr, searchIdx uint) (uint, uint) {
+ if npages == 1 {
+ addr := b.find1(searchIdx)
+ // Return a searchIdx of addr + 1 since we assume addr will be
+ // allocated.
+ return addr, addr + 1
+ } else if npages <= 64 {
+ return b.findSmallN(npages, searchIdx)
+ }
+ return b.findLargeN(npages, searchIdx)
+}
+
+// find1 is a helper for find which searches for a single free page
+// in the pallocBits and returns the index.
+//
+// See find for an explanation of the searchIdx parameter.
+func (b *pallocBits) find1(searchIdx uint) uint {
+ for i := searchIdx / 64; i < uint(len(b)); i++ {
+ x := b[i]
+ if x == ^uint64(0) {
+ continue
+ }
+ return i*64 + uint(sys.TrailingZeros64(^x))
+ }
+ return ^uint(0)
+}
+
+// findSmallN is a helper for find which searches for npages contiguous free pages
+// in this pallocBits and returns the index where that run of contiguous pages
+// starts as well as the index of the first free page it finds in its search.
+//
+// See find for an explanation of the searchIdx parameter.
+//
+// Returns a ^uint(0) index on failure and the new searchIdx should be ignored.
+//
+// findSmallN assumes npages <= 64, where any such contiguous run of pages
+// crosses at most one aligned 64-bit boundary in the bits.
+func (b *pallocBits) findSmallN(npages uintptr, searchIdx uint) (uint, uint) {
+ end, newSearchIdx := uint(0), ^uint(0)
+ for i := searchIdx / 64; i < uint(len(b)); i++ {
+ bi := b[i]
+ if bi == ^uint64(0) {
+ end = 0
+ continue
+ }
+ // First see if we can pack our allocation in the trailing
+ // zeros plus the end of the last 64 bits.
+ start := uint(sys.TrailingZeros64(bi))
+ if newSearchIdx == ^uint(0) {
+ // The new searchIdx is going to be at these 64 bits after any
+ // 1s we file, so count trailing 1s.
+ newSearchIdx = i*64 + uint(sys.TrailingZeros64(^bi))
+ }
+ if end+start >= uint(npages) {
+ return i*64 - end, newSearchIdx
+ }
+ // Next, check the interior of the 64-bit chunk.
+ j := findBitRange64(^bi, uint(npages))
+ if j < 64 {
+ return i*64 + j, newSearchIdx
+ }
+ end = uint(sys.LeadingZeros64(bi))
+ }
+ return ^uint(0), newSearchIdx
+}
+
+// findLargeN is a helper for find which searches for npages contiguous free pages
+// in this pallocBits and returns the index where that run starts, as well as the
+// index of the first free page it found it its search.
+//
+// See alloc for an explanation of the searchIdx parameter.
+//
+// Returns a ^uint(0) index on failure and the new searchIdx should be ignored.
+//
+// findLargeN assumes npages > 64, where any such run of free pages
+// crosses at least one aligned 64-bit boundary in the bits.
+func (b *pallocBits) findLargeN(npages uintptr, searchIdx uint) (uint, uint) {
+ start, size, newSearchIdx := ^uint(0), uint(0), ^uint(0)
+ for i := searchIdx / 64; i < uint(len(b)); i++ {
+ x := b[i]
+ if x == ^uint64(0) {
+ size = 0
+ continue
+ }
+ if newSearchIdx == ^uint(0) {
+ // The new searchIdx is going to be at these 64 bits after any
+ // 1s we file, so count trailing 1s.
+ newSearchIdx = i*64 + uint(sys.TrailingZeros64(^x))
+ }
+ if size == 0 {
+ size = uint(sys.LeadingZeros64(x))
+ start = i*64 + 64 - size
+ continue
+ }
+ s := uint(sys.TrailingZeros64(x))
+ if s+size >= uint(npages) {
+ size += s
+ return start, newSearchIdx
+ }
+ if s < 64 {
+ size = uint(sys.LeadingZeros64(x))
+ start = i*64 + 64 - size
+ continue
+ }
+ size += 64
+ }
+ if size < uint(npages) {
+ return ^uint(0), newSearchIdx
+ }
+ return start, newSearchIdx
+}
+
+// allocRange allocates the range [i, i+n).
+func (b *pallocBits) allocRange(i, n uint) {
+ (*pageBits)(b).setRange(i, n)
+}
+
+// allocAll allocates all the bits of b.
+func (b *pallocBits) allocAll() {
+ (*pageBits)(b).setAll()
+}
+
+// free1 frees a single page in the pallocBits at i.
+func (b *pallocBits) free1(i uint) {
+ (*pageBits)(b).clear(i)
+}
+
+// free frees the range [i, i+n) of pages in the pallocBits.
+func (b *pallocBits) free(i, n uint) {
+ (*pageBits)(b).clearRange(i, n)
+}
+
+// freeAll frees all the bits of b.
+func (b *pallocBits) freeAll() {
+ (*pageBits)(b).clearAll()
+}
+
+// pages64 returns a 64-bit bitmap representing a block of 64 pages aligned
+// to 64 pages. The returned block of pages is the one containing the i'th
+// page in this pallocBits. Each bit represents whether the page is in-use.
+func (b *pallocBits) pages64(i uint) uint64 {
+ return (*pageBits)(b).block64(i)
+}
+
+// findBitRange64 returns the bit index of the first set of
+// n consecutive 1 bits. If no consecutive set of 1 bits of
+// size n may be found in c, then it returns an integer >= 64.
+func findBitRange64(c uint64, n uint) uint {
+ i := uint(0)
+ cont := uint(sys.TrailingZeros64(^c))
+ for cont < n && i < 64 {
+ i += cont
+ i += uint(sys.TrailingZeros64(c >> i))
+ cont = uint(sys.TrailingZeros64(^(c >> i)))
+ }
+ return i
+}
+
+// pallocData encapsulates pallocBits and a bitmap for
+// whether or not a given page is scavenged in a single
+// structure. It's effectively a pallocBits with
+// additional functionality.
+//
+// Update the comment on (*pageAlloc).chunks should this
+// structure change.
+type pallocData struct {
+ pallocBits
+ scavenged pageBits
+}
+
+// allocRange sets bits [i, i+n) in the bitmap to 1 and
+// updates the scavenged bits appropriately.
+func (m *pallocData) allocRange(i, n uint) {
+ // Clear the scavenged bits when we alloc the range.
+ m.pallocBits.allocRange(i, n)
+ m.scavenged.clearRange(i, n)
+}
+
+// allocAll sets every bit in the bitmap to 1 and updates
+// the scavenged bits appropriately.
+func (m *pallocData) allocAll() {
+ // Clear the scavenged bits when we alloc the range.
+ m.pallocBits.allocAll()
+ m.scavenged.clearAll()
+}
diff --git a/libgo/go/runtime/mpallocbits_test.go b/libgo/go/runtime/mpallocbits_test.go
new file mode 100644
index 0000000..71a29f3
--- /dev/null
+++ b/libgo/go/runtime/mpallocbits_test.go
@@ -0,0 +1,510 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "math/rand"
+ . "runtime"
+ "testing"
+)
+
+// Ensures that got and want are the same, and if not, reports
+// detailed diff information.
+func checkPallocBits(t *testing.T, got, want *PallocBits) bool {
+ d := DiffPallocBits(got, want)
+ if len(d) != 0 {
+ t.Errorf("%d range(s) different", len(d))
+ for _, bits := range d {
+ t.Logf("\t@ bit index %d", bits.I)
+ t.Logf("\t| got: %s", StringifyPallocBits(got, bits))
+ t.Logf("\t| want: %s", StringifyPallocBits(want, bits))
+ }
+ return false
+ }
+ return true
+}
+
+// makePallocBits produces an initialized PallocBits by setting
+// the ranges in s to 1 and the rest to zero.
+func makePallocBits(s []BitRange) *PallocBits {
+ b := new(PallocBits)
+ for _, v := range s {
+ b.AllocRange(v.I, v.N)
+ }
+ return b
+}
+
+// Ensures that PallocBits.AllocRange works, which is a fundamental
+// method used for testing and initialization since it's used by
+// makePallocBits.
+func TestPallocBitsAllocRange(t *testing.T) {
+ test := func(t *testing.T, i, n uint, want *PallocBits) {
+ checkPallocBits(t, makePallocBits([]BitRange{{i, n}}), want)
+ }
+ t.Run("OneLow", func(t *testing.T) {
+ want := new(PallocBits)
+ want[0] = 0x1
+ test(t, 0, 1, want)
+ })
+ t.Run("OneHigh", func(t *testing.T) {
+ want := new(PallocBits)
+ want[PallocChunkPages/64-1] = 1 << 63
+ test(t, PallocChunkPages-1, 1, want)
+ })
+ t.Run("Inner", func(t *testing.T) {
+ want := new(PallocBits)
+ want[2] = 0x3e
+ test(t, 129, 5, want)
+ })
+ t.Run("Aligned", func(t *testing.T) {
+ want := new(PallocBits)
+ want[2] = ^uint64(0)
+ want[3] = ^uint64(0)
+ test(t, 128, 128, want)
+ })
+ t.Run("Begin", func(t *testing.T) {
+ want := new(PallocBits)
+ want[0] = ^uint64(0)
+ want[1] = ^uint64(0)
+ want[2] = ^uint64(0)
+ want[3] = ^uint64(0)
+ want[4] = ^uint64(0)
+ want[5] = 0x1
+ test(t, 0, 321, want)
+ })
+ t.Run("End", func(t *testing.T) {
+ want := new(PallocBits)
+ want[PallocChunkPages/64-1] = ^uint64(0)
+ want[PallocChunkPages/64-2] = ^uint64(0)
+ want[PallocChunkPages/64-3] = ^uint64(0)
+ want[PallocChunkPages/64-4] = 1 << 63
+ test(t, PallocChunkPages-(64*3+1), 64*3+1, want)
+ })
+ t.Run("All", func(t *testing.T) {
+ want := new(PallocBits)
+ for i := range want {
+ want[i] = ^uint64(0)
+ }
+ test(t, 0, PallocChunkPages, want)
+ })
+}
+
+// Inverts every bit in the PallocBits.
+func invertPallocBits(b *PallocBits) {
+ for i := range b {
+ b[i] = ^b[i]
+ }
+}
+
+// Ensures two packed summaries are identical, and reports a detailed description
+// of the difference if they're not.
+func checkPallocSum(t *testing.T, got, want PallocSum) {
+ if got.Start() != want.Start() {
+ t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start())
+ }
+ if got.Max() != want.Max() {
+ t.Errorf("inconsistent max: got %d, want %d", got.Max(), want.Max())
+ }
+ if got.End() != want.End() {
+ t.Errorf("inconsistent end: got %d, want %d", got.End(), want.End())
+ }
+}
+
+func TestMallocBitsPopcntRange(t *testing.T) {
+ type test struct {
+ i, n uint // bit range to popcnt over.
+ want uint // expected popcnt result on that range.
+ }
+ tests := map[string]struct {
+ init []BitRange // bit ranges to set to 1 in the bitmap.
+ tests []test // a set of popcnt tests to run over the bitmap.
+ }{
+ "None": {
+ tests: []test{
+ {0, 1, 0},
+ {5, 3, 0},
+ {2, 11, 0},
+ {PallocChunkPages/4 + 1, PallocChunkPages / 2, 0},
+ {0, PallocChunkPages, 0},
+ },
+ },
+ "All": {
+ init: []BitRange{{0, PallocChunkPages}},
+ tests: []test{
+ {0, 1, 1},
+ {5, 3, 3},
+ {2, 11, 11},
+ {PallocChunkPages/4 + 1, PallocChunkPages / 2, PallocChunkPages / 2},
+ {0, PallocChunkPages, PallocChunkPages},
+ },
+ },
+ "Half": {
+ init: []BitRange{{PallocChunkPages / 2, PallocChunkPages / 2}},
+ tests: []test{
+ {0, 1, 0},
+ {5, 3, 0},
+ {2, 11, 0},
+ {PallocChunkPages/2 - 1, 1, 0},
+ {PallocChunkPages / 2, 1, 1},
+ {PallocChunkPages/2 + 10, 1, 1},
+ {PallocChunkPages/2 - 1, 2, 1},
+ {PallocChunkPages / 4, PallocChunkPages / 4, 0},
+ {PallocChunkPages / 4, PallocChunkPages/4 + 1, 1},
+ {PallocChunkPages/4 + 1, PallocChunkPages / 2, PallocChunkPages/4 + 1},
+ {0, PallocChunkPages, PallocChunkPages / 2},
+ },
+ },
+ "OddBound": {
+ init: []BitRange{{0, 111}},
+ tests: []test{
+ {0, 1, 1},
+ {5, 3, 3},
+ {2, 11, 11},
+ {110, 2, 1},
+ {99, 50, 12},
+ {110, 1, 1},
+ {111, 1, 0},
+ {99, 1, 1},
+ {120, 1, 0},
+ {PallocChunkPages / 2, PallocChunkPages / 2, 0},
+ {0, PallocChunkPages, 111},
+ },
+ },
+ "Scattered": {
+ init: []BitRange{
+ {1, 3}, {5, 1}, {7, 1}, {10, 2}, {13, 1}, {15, 4},
+ {21, 1}, {23, 1}, {26, 2}, {30, 5}, {36, 2}, {40, 3},
+ {44, 6}, {51, 1}, {53, 2}, {58, 3}, {63, 1}, {67, 2},
+ {71, 10}, {84, 1}, {89, 7}, {99, 2}, {103, 1}, {107, 2},
+ {111, 1}, {113, 1}, {115, 1}, {118, 1}, {120, 2}, {125, 5},
+ },
+ tests: []test{
+ {0, 11, 6},
+ {0, 64, 39},
+ {13, 64, 40},
+ {64, 64, 34},
+ {0, 128, 73},
+ {1, 128, 74},
+ {0, PallocChunkPages, 75},
+ },
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.init)
+ for _, h := range v.tests {
+ if got := b.PopcntRange(h.i, h.n); got != h.want {
+ t.Errorf("bad popcnt (i=%d, n=%d): got %d, want %d", h.i, h.n, got, h.want)
+ }
+ }
+ })
+ }
+}
+
+// Ensures computing bit summaries works as expected by generating random
+// bitmaps and checking against a reference implementation.
+func TestPallocBitsSummarizeRandom(t *testing.T) {
+ b := new(PallocBits)
+ for i := 0; i < 1000; i++ {
+ // Randomize bitmap.
+ for i := range b {
+ b[i] = rand.Uint64()
+ }
+ // Check summary against reference implementation.
+ checkPallocSum(t, b.Summarize(), SummarizeSlow(b))
+ }
+}
+
+// Ensures computing bit summaries works as expected.
+func TestPallocBitsSummarize(t *testing.T) {
+ var emptySum = PackPallocSum(PallocChunkPages, PallocChunkPages, PallocChunkPages)
+ type test struct {
+ free []BitRange // Ranges of free (zero) bits.
+ hits []PallocSum
+ }
+ tests := make(map[string]test)
+ tests["NoneFree"] = test{
+ free: []BitRange{},
+ hits: []PallocSum{
+ PackPallocSum(0, 0, 0),
+ },
+ }
+ tests["OnlyStart"] = test{
+ free: []BitRange{{0, 10}},
+ hits: []PallocSum{
+ PackPallocSum(10, 10, 0),
+ },
+ }
+ tests["OnlyEnd"] = test{
+ free: []BitRange{{PallocChunkPages - 40, 40}},
+ hits: []PallocSum{
+ PackPallocSum(0, 40, 40),
+ },
+ }
+ tests["StartAndEnd"] = test{
+ free: []BitRange{{0, 11}, {PallocChunkPages - 23, 23}},
+ hits: []PallocSum{
+ PackPallocSum(11, 23, 23),
+ },
+ }
+ tests["StartMaxEnd"] = test{
+ free: []BitRange{{0, 4}, {50, 100}, {PallocChunkPages - 4, 4}},
+ hits: []PallocSum{
+ PackPallocSum(4, 100, 4),
+ },
+ }
+ tests["OnlyMax"] = test{
+ free: []BitRange{{1, 20}, {35, 241}, {PallocChunkPages - 50, 30}},
+ hits: []PallocSum{
+ PackPallocSum(0, 241, 0),
+ },
+ }
+ tests["MultiMax"] = test{
+ free: []BitRange{{35, 2}, {40, 5}, {100, 5}},
+ hits: []PallocSum{
+ PackPallocSum(0, 5, 0),
+ },
+ }
+ tests["One"] = test{
+ free: []BitRange{{2, 1}},
+ hits: []PallocSum{
+ PackPallocSum(0, 1, 0),
+ },
+ }
+ tests["AllFree"] = test{
+ free: []BitRange{{0, PallocChunkPages}},
+ hits: []PallocSum{
+ emptySum,
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.free)
+ // In the PallocBits we create 1's represent free spots, but in our actual
+ // PallocBits 1 means not free, so invert.
+ invertPallocBits(b)
+ for _, h := range v.hits {
+ checkPallocSum(t, b.Summarize(), h)
+ }
+ })
+ }
+}
+
+// Benchmarks how quickly we can summarize a PallocBits.
+func BenchmarkPallocBitsSummarize(b *testing.B) {
+ buf0 := new(PallocBits)
+ buf1 := new(PallocBits)
+ for i := 0; i < len(buf1); i++ {
+ buf1[i] = ^uint64(0)
+ }
+ bufa := new(PallocBits)
+ for i := 0; i < len(bufa); i++ {
+ bufa[i] = 0xaa
+ }
+ for _, buf := range []*PallocBits{buf0, buf1, bufa} {
+ b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ buf.Summarize()
+ }
+ })
+ }
+}
+
+// Ensures page allocation works.
+func TestPallocBitsAlloc(t *testing.T) {
+ tests := map[string]struct {
+ before []BitRange
+ after []BitRange
+ npages uintptr
+ hits []uint
+ }{
+ "AllFree1": {
+ npages: 1,
+ hits: []uint{0, 1, 2, 3, 4, 5},
+ after: []BitRange{{0, 6}},
+ },
+ "AllFree2": {
+ npages: 2,
+ hits: []uint{0, 2, 4, 6, 8, 10},
+ after: []BitRange{{0, 12}},
+ },
+ "AllFree5": {
+ npages: 5,
+ hits: []uint{0, 5, 10, 15, 20},
+ after: []BitRange{{0, 25}},
+ },
+ "AllFree64": {
+ npages: 64,
+ hits: []uint{0, 64, 128},
+ after: []BitRange{{0, 192}},
+ },
+ "AllFree65": {
+ npages: 65,
+ hits: []uint{0, 65, 130},
+ after: []BitRange{{0, 195}},
+ },
+ "SomeFree64": {
+ before: []BitRange{{0, 32}, {64, 32}, {100, PallocChunkPages - 100}},
+ npages: 64,
+ hits: []uint{^uint(0)},
+ after: []BitRange{{0, 32}, {64, 32}, {100, PallocChunkPages - 100}},
+ },
+ "NoneFree1": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 1,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "NoneFree2": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 2,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "NoneFree5": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 5,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "NoneFree65": {
+ before: []BitRange{{0, PallocChunkPages}},
+ npages: 65,
+ hits: []uint{^uint(0), ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit1": {
+ before: []BitRange{{0, PallocChunkPages/2 - 3}, {PallocChunkPages/2 - 2, PallocChunkPages/2 + 2}},
+ npages: 1,
+ hits: []uint{PallocChunkPages/2 - 3, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit2": {
+ before: []BitRange{{0, PallocChunkPages/2 - 3}, {PallocChunkPages/2 - 1, PallocChunkPages/2 + 1}},
+ npages: 2,
+ hits: []uint{PallocChunkPages/2 - 3, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit5": {
+ before: []BitRange{{0, PallocChunkPages/2 - 3}, {PallocChunkPages/2 + 2, PallocChunkPages/2 - 2}},
+ npages: 5,
+ hits: []uint{PallocChunkPages/2 - 3, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "ExactFit65": {
+ before: []BitRange{{0, PallocChunkPages/2 - 31}, {PallocChunkPages/2 + 34, PallocChunkPages/2 - 34}},
+ npages: 65,
+ hits: []uint{PallocChunkPages/2 - 31, ^uint(0)},
+ after: []BitRange{{0, PallocChunkPages}},
+ },
+ "SomeFree161": {
+ before: []BitRange{{0, 185}, {331, 1}},
+ npages: 161,
+ hits: []uint{332},
+ after: []BitRange{{0, 185}, {331, 162}},
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.before)
+ for iter, i := range v.hits {
+ a, _ := b.Find(v.npages, 0)
+ if i != a {
+ t.Fatalf("find #%d picked wrong index: want %d, got %d", iter+1, i, a)
+ }
+ if i != ^uint(0) {
+ b.AllocRange(a, uint(v.npages))
+ }
+ }
+ want := makePallocBits(v.after)
+ checkPallocBits(t, b, want)
+ })
+ }
+}
+
+// Ensures page freeing works.
+func TestPallocBitsFree(t *testing.T) {
+ tests := map[string]struct {
+ beforeInv []BitRange
+ afterInv []BitRange
+ frees []uint
+ npages uintptr
+ }{
+ "SomeFree": {
+ npages: 1,
+ beforeInv: []BitRange{{0, 32}, {64, 32}, {100, 1}},
+ frees: []uint{32},
+ afterInv: []BitRange{{0, 33}, {64, 32}, {100, 1}},
+ },
+ "NoneFree1": {
+ npages: 1,
+ frees: []uint{0, 1, 2, 3, 4, 5},
+ afterInv: []BitRange{{0, 6}},
+ },
+ "NoneFree2": {
+ npages: 2,
+ frees: []uint{0, 2, 4, 6, 8, 10},
+ afterInv: []BitRange{{0, 12}},
+ },
+ "NoneFree5": {
+ npages: 5,
+ frees: []uint{0, 5, 10, 15, 20},
+ afterInv: []BitRange{{0, 25}},
+ },
+ "NoneFree64": {
+ npages: 64,
+ frees: []uint{0, 64, 128},
+ afterInv: []BitRange{{0, 192}},
+ },
+ "NoneFree65": {
+ npages: 65,
+ frees: []uint{0, 65, 130},
+ afterInv: []BitRange{{0, 195}},
+ },
+ }
+ for name, v := range tests {
+ v := v
+ t.Run(name, func(t *testing.T) {
+ b := makePallocBits(v.beforeInv)
+ invertPallocBits(b)
+ for _, i := range v.frees {
+ b.Free(i, uint(v.npages))
+ }
+ want := makePallocBits(v.afterInv)
+ invertPallocBits(want)
+ checkPallocBits(t, b, want)
+ })
+ }
+}
+
+func TestFindBitRange64(t *testing.T) {
+ check := func(x uint64, n uint, result uint) {
+ i := FindBitRange64(x, n)
+ if result == ^uint(0) && i < 64 {
+ t.Errorf("case (%016x, %d): got %d, want failure", x, n, i)
+ } else if result != ^uint(0) && i != result {
+ t.Errorf("case (%016x, %d): got %d, want %d", x, n, i, result)
+ }
+ }
+ for i := uint(0); i <= 64; i++ {
+ check(^uint64(0), i, 0)
+ }
+ check(0, 0, 0)
+ for i := uint(1); i <= 64; i++ {
+ check(0, i, ^uint(0))
+ }
+ check(0x8000000000000000, 1, 63)
+ check(0xc000010001010000, 2, 62)
+ check(0xc000010001030000, 2, 16)
+ check(0xe000030001030000, 3, 61)
+ check(0xe000030001070000, 3, 16)
+ check(0xffff03ff01070000, 16, 48)
+ check(0xffff03ff0107ffff, 16, 0)
+ check(0x0fff03ff01079fff, 16, ^uint(0))
+}
diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go
index 132c2ff..dd257d1 100644
--- a/libgo/go/runtime/mprof.go
+++ b/libgo/go/runtime/mprof.go
@@ -614,10 +614,20 @@ func fixupStack(stk []uintptr, skip int, canonStack *[maxStack]uintptr, size uin
// Increase the skip count to take into account the frames corresponding
// to runtime.callersRaw and to the C routine that it invokes.
skip += 2
+ sawSigtramp := false
for _, pc := range stk {
// Subtract 1 from PC to undo the 1 we added in callback in
// go-callers.c.
- function, file, _, frames := funcfileline(pc-1, -1)
+ function, file, _, frames := funcfileline(pc-1, -1, false)
+
+ // Skip an unnamed function above sigtramp, as it is
+ // likely the signal handler.
+ if sawSigtramp {
+ sawSigtramp = false
+ if function == "" {
+ continue
+ }
+ }
// Skip split-stack functions (match by function name)
skipFrame := false
@@ -630,11 +640,18 @@ func fixupStack(stk []uintptr, skip int, canonStack *[maxStack]uintptr, size uin
skipFrame = true
}
- // Skip thunks and recover functions. There is no equivalent to
- // these functions in the gc toolchain.
+ // Skip thunks and recover functions and other functions
+ // specific to gccgo, that do not appear in the gc toolchain.
fcn := function
if hasSuffix(fcn, "..r") {
skipFrame = true
+ } else if function == "runtime.deferreturn" || function == "runtime.sighandler" {
+ skipFrame = true
+ } else if function == "runtime.sigtramp" || function == "runtime.sigtrampgo" {
+ skipFrame = true
+ // Also skip subsequent unnamed functions,
+ // which will be the signal handler itself.
+ sawSigtramp = true
} else {
for fcn != "" && (fcn[len(fcn)-1] >= '0' && fcn[len(fcn)-1] <= '9') {
fcn = fcn[:len(fcn)-1]
diff --git a/libgo/go/runtime/mranges.go b/libgo/go/runtime/mranges.go
new file mode 100644
index 0000000..c14e5c7
--- /dev/null
+++ b/libgo/go/runtime/mranges.go
@@ -0,0 +1,147 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Address range data structure.
+//
+// This file contains an implementation of a data structure which
+// manages ordered address ranges.
+
+package runtime
+
+import (
+ "runtime/internal/sys"
+ "unsafe"
+)
+
+// addrRange represents a region of address space.
+type addrRange struct {
+ // base and limit together represent the region of address space
+ // [base, limit). That is, base is inclusive, limit is exclusive.
+ base, limit uintptr
+}
+
+// size returns the size of the range represented in bytes.
+func (a addrRange) size() uintptr {
+ if a.limit <= a.base {
+ return 0
+ }
+ return a.limit - a.base
+}
+
+// subtract takes the addrRange toPrune and cuts out any overlap with
+// from, then returns the new range. subtract assumes that a and b
+// either don't overlap at all, only overlap on one side, or are equal.
+// If b is strictly contained in a, thus forcing a split, it will throw.
+func (a addrRange) subtract(b addrRange) addrRange {
+ if a.base >= b.base && a.limit <= b.limit {
+ return addrRange{}
+ } else if a.base < b.base && a.limit > b.limit {
+ throw("bad prune")
+ } else if a.limit > b.limit && a.base < b.limit {
+ a.base = b.limit
+ } else if a.base < b.base && a.limit > b.base {
+ a.limit = b.base
+ }
+ return a
+}
+
+// addrRanges is a data structure holding a collection of ranges of
+// address space.
+//
+// The ranges are coalesced eagerly to reduce the
+// number ranges it holds.
+//
+// The slice backing store for this field is persistentalloc'd
+// and thus there is no way to free it.
+//
+// addrRanges is not thread-safe.
+type addrRanges struct {
+ // ranges is a slice of ranges sorted by base.
+ ranges []addrRange
+
+ // sysStat is the stat to track allocations by this type
+ sysStat *uint64
+}
+
+func (a *addrRanges) init(sysStat *uint64) {
+ ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
+ ranges.len = 0
+ ranges.cap = 16
+ ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, sysStat))
+ a.sysStat = sysStat
+}
+
+// findSucc returns the first index in a such that base is
+// less than the base of the addrRange at that index.
+func (a *addrRanges) findSucc(base uintptr) int {
+ // TODO(mknyszek): Consider a binary search for large arrays.
+ // While iterating over these ranges is potentially expensive,
+ // the expected number of ranges is small, ideally just 1,
+ // since Go heaps are usually mostly contiguous.
+ for i := range a.ranges {
+ if base < a.ranges[i].base {
+ return i
+ }
+ }
+ return len(a.ranges)
+}
+
+// add inserts a new address range to a.
+//
+// r must not overlap with any address range in a.
+func (a *addrRanges) add(r addrRange) {
+ // The copies in this function are potentially expensive, but this data
+ // structure is meant to represent the Go heap. At worst, copying this
+ // would take ~160µs assuming a conservative copying rate of 25 GiB/s (the
+ // copy will almost never trigger a page fault) for a 1 TiB heap with 4 MiB
+ // arenas which is completely discontiguous. ~160µs is still a lot, but in
+ // practice most platforms have 64 MiB arenas (which cuts this by a factor
+ // of 16) and Go heaps are usually mostly contiguous, so the chance that
+ // an addrRanges even grows to that size is extremely low.
+
+ // Because we assume r is not currently represented in a,
+ // findSucc gives us our insertion index.
+ i := a.findSucc(r.base)
+ coalescesDown := i > 0 && a.ranges[i-1].limit == r.base
+ coalescesUp := i < len(a.ranges) && r.limit == a.ranges[i].base
+ if coalescesUp && coalescesDown {
+ // We have neighbors and they both border us.
+ // Merge a.ranges[i-1], r, and a.ranges[i] together into a.ranges[i-1].
+ a.ranges[i-1].limit = a.ranges[i].limit
+
+ // Delete a.ranges[i].
+ copy(a.ranges[i:], a.ranges[i+1:])
+ a.ranges = a.ranges[:len(a.ranges)-1]
+ } else if coalescesDown {
+ // We have a neighbor at a lower address only and it borders us.
+ // Merge the new space into a.ranges[i-1].
+ a.ranges[i-1].limit = r.limit
+ } else if coalescesUp {
+ // We have a neighbor at a higher address only and it borders us.
+ // Merge the new space into a.ranges[i].
+ a.ranges[i].base = r.base
+ } else {
+ // We may or may not have neighbors which don't border us.
+ // Add the new range.
+ if len(a.ranges)+1 > cap(a.ranges) {
+ // Grow the array. Note that this leaks the old array, but since
+ // we're doubling we have at most 2x waste. For a 1 TiB heap and
+ // 4 MiB arenas which are all discontiguous (both very conservative
+ // assumptions), this would waste at most 4 MiB of memory.
+ oldRanges := a.ranges
+ ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
+ ranges.len = len(oldRanges) + 1
+ ranges.cap = cap(oldRanges) * 2
+ ranges.array = (*notInHeap)(persistentalloc(unsafe.Sizeof(addrRange{})*uintptr(ranges.cap), sys.PtrSize, a.sysStat))
+
+ // Copy in the old array, but make space for the new range.
+ copy(a.ranges[:i], oldRanges[:i])
+ copy(a.ranges[i+1:], oldRanges[i:])
+ } else {
+ a.ranges = a.ranges[:len(a.ranges)+1]
+ copy(a.ranges[i+1:], a.ranges[i:])
+ }
+ a.ranges[i] = r
+ }
+}
diff --git a/libgo/go/runtime/msize.go b/libgo/go/runtime/msize.go
index 0accb83..11d06ce 100644
--- a/libgo/go/runtime/msize.go
+++ b/libgo/go/runtime/msize.go
@@ -21,5 +21,5 @@ func roundupsize(size uintptr) uintptr {
if size+_PageSize < size {
return size
}
- return round(size, _PageSize)
+ return alignUp(size, _PageSize)
}
diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go
index cdab2ca..2d4cdbe 100644
--- a/libgo/go/runtime/mstats.go
+++ b/libgo/go/runtime/mstats.go
@@ -31,7 +31,7 @@ type mstats struct {
nfree uint64 // number of frees
// Statistics about malloc heap.
- // Protected by mheap.lock
+ // Updated atomically, or with the world stopped.
//
// Like MemStats, heap_sys and heap_inuse do not count memory
// in manually-managed spans.
@@ -40,19 +40,22 @@ type mstats struct {
heap_idle uint64 // bytes in idle spans
heap_inuse uint64 // bytes in mSpanInUse spans
heap_released uint64 // bytes released to the os
- heap_objects uint64 // total number of allocated objects
+
+ // heap_objects is not used by the runtime directly and instead
+ // computed on the fly by updatememstats.
+ heap_objects uint64 // total number of allocated objects
// Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks.
- stacks_inuse uint64 // bytes in manually-managed stack spans
+ stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW
stacks_sys uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
mspan_inuse uint64 // mspan structures
mspan_sys uint64
mcache_inuse uint64 // mcache structures
mcache_sys uint64
buckhash_sys uint64 // profiling bucket hash table
- gc_sys uint64
- other_sys uint64
+ gc_sys uint64 // updated atomically or during STW
+ other_sys uint64 // updated atomically or during STW
// Statistics about garbage collector.
// Protected by mheap or stopping the world during GC.
@@ -79,6 +82,8 @@ type mstats struct {
last_gc_nanotime uint64 // last gc (monotonic time)
tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
+ last_next_gc uint64 // next_gc for the previous GC
+ last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC
// triggerRatio is the heap growth ratio that triggers marking.
//
diff --git a/libgo/go/runtime/nbpipe_pipe.go b/libgo/go/runtime/nbpipe_pipe.go
new file mode 100644
index 0000000..822b294
--- /dev/null
+++ b/libgo/go/runtime/nbpipe_pipe.go
@@ -0,0 +1,19 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin dragonfly
+
+package runtime
+
+func nonblockingPipe() (r, w int32, errno int32) {
+ r, w, errno = pipe()
+ if errno != 0 {
+ return -1, -1, errno
+ }
+ closeonexec(r)
+ setNonblock(r)
+ closeonexec(w)
+ setNonblock(w)
+ return r, w, errno
+}
diff --git a/libgo/go/runtime/nbpipe_pipe2.go b/libgo/go/runtime/nbpipe_pipe2.go
new file mode 100644
index 0000000..e3639d9
--- /dev/null
+++ b/libgo/go/runtime/nbpipe_pipe2.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build freebsd linux netbsd openbsd solaris
+
+package runtime
+
+func nonblockingPipe() (r, w int32, errno int32) {
+ r, w, errno = pipe2(_O_NONBLOCK | _O_CLOEXEC)
+ if errno == -_ENOSYS {
+ r, w, errno = pipe()
+ if errno != 0 {
+ return -1, -1, errno
+ }
+ closeonexec(r)
+ setNonblock(r)
+ closeonexec(w)
+ setNonblock(w)
+ }
+ return r, w, errno
+}
diff --git a/libgo/go/runtime/nbpipe_test.go b/libgo/go/runtime/nbpipe_test.go
new file mode 100644
index 0000000..981143e
--- /dev/null
+++ b/libgo/go/runtime/nbpipe_test.go
@@ -0,0 +1,102 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package runtime_test
+
+import (
+ "runtime"
+ "syscall"
+ "testing"
+ "unsafe"
+)
+
+func TestNonblockingPipe(t *testing.T) {
+ t.Parallel()
+
+ // NonblockingPipe is the test name for nonblockingPipe.
+ r, w, errno := runtime.NonblockingPipe()
+ if errno != 0 {
+ t.Fatal(syscall.Errno(errno))
+ }
+ defer func() {
+ runtime.Close(r)
+ runtime.Close(w)
+ }()
+
+ checkIsPipe(t, r, w)
+ checkNonblocking(t, r, "reader")
+ checkCloseonexec(t, r, "reader")
+ checkNonblocking(t, w, "writer")
+ checkCloseonexec(t, w, "writer")
+}
+
+func checkIsPipe(t *testing.T, r, w int32) {
+ bw := byte(42)
+ if n := runtime.Write(uintptr(w), unsafe.Pointer(&bw), 1); n != 1 {
+ t.Fatalf("Write(w, &b, 1) == %d, expected 1", n)
+ }
+ var br byte
+ if n := runtime.Read(r, unsafe.Pointer(&br), 1); n != 1 {
+ t.Fatalf("Read(r, &b, 1) == %d, expected 1", n)
+ }
+ if br != bw {
+ t.Errorf("pipe read %d, expected %d", br, bw)
+ }
+}
+
+func checkNonblocking(t *testing.T, fd int32, name string) {
+ t.Helper()
+ flags, errno := fcntl(uintptr(fd), syscall.F_GETFL, 0)
+ if errno != 0 {
+ t.Errorf("fcntl(%s, F_GETFL) failed: %v", name, syscall.Errno(errno))
+ } else if flags&syscall.O_NONBLOCK == 0 {
+ t.Errorf("O_NONBLOCK not set in %s flags %#x", name, flags)
+ }
+}
+
+func checkCloseonexec(t *testing.T, fd int32, name string) {
+ t.Helper()
+ flags, errno := fcntl(uintptr(fd), syscall.F_GETFD, 0)
+ if errno != 0 {
+ t.Errorf("fcntl(%s, F_GETFD) failed: %v", name, syscall.Errno(errno))
+ } else if flags&syscall.FD_CLOEXEC == 0 {
+ t.Errorf("FD_CLOEXEC not set in %s flags %#x", name, flags)
+ }
+}
+
+func TestSetNonblock(t *testing.T) {
+ t.Parallel()
+
+ r, w, errno := runtime.Pipe()
+ if errno != 0 {
+ t.Fatal(syscall.Errno(errno))
+ }
+ defer func() {
+ runtime.Close(r)
+ runtime.Close(w)
+ }()
+
+ checkIsPipe(t, r, w)
+
+ runtime.SetNonblock(r)
+ runtime.SetNonblock(w)
+ checkNonblocking(t, r, "reader")
+ checkNonblocking(t, w, "writer")
+
+ runtime.Closeonexec(r)
+ runtime.Closeonexec(w)
+ checkCloseonexec(t, r, "reader")
+ checkCloseonexec(t, w, "writer")
+}
+
+//extern __go_fcntl_uintptr
+func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr)
+
+// Call fcntl libc function rather than calling syscall.
+func fcntl(fd uintptr, cmd int, arg uintptr) (uintptr, syscall.Errno) {
+ res, errno := fcntlUintptr(fd, uintptr(cmd), arg)
+ return res, syscall.Errno(errno)
+}
diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go
index 1ce9808..d2fb775 100644
--- a/libgo/go/runtime/netpoll.go
+++ b/libgo/go/runtime/netpoll.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd hurd js,wasm linux nacl netbsd openbsd solaris windows
+// +build aix darwin dragonfly freebsd hurd js,wasm linux netbsd openbsd solaris windows
package runtime
@@ -15,12 +15,26 @@ import (
//go:linkname netpoll
// Integrated network poller (platform-independent part).
-// A particular implementation (epoll/kqueue) must define the following functions:
-// func netpollinit() // to initialize the poller
-// func netpollopen(fd uintptr, pd *pollDesc) int32 // to arm edge-triggered notifications
-// and associate fd with pd.
-// An implementation must call the following function to denote that the pd is ready.
-// func netpollready(gpp **g, pd *pollDesc, mode int32)
+// A particular implementation (epoll/kqueue/port/AIX/Windows)
+// must define the following functions:
+//
+// func netpollinit()
+// Initialize the poller. Only called once.
+//
+// func netpollopen(fd uintptr, pd *pollDesc) int32
+// Arm edge-triggered notifications for fd. The pd argument is to pass
+// back to netpollready when fd is ready. Return an errno value.
+//
+// func netpoll(delta int64) gList
+// Poll the network. If delta < 0, block indefinitely. If delta == 0,
+// poll without blocking. If delta > 0, block for up to delta nanoseconds.
+// Return a list of goroutines built by calling netpollready.
+//
+// func netpollBreak()
+// Wake up the network poller, assumed to be blocked in netpoll.
+//
+// func netpollIsPollDescriptor(fd uintptr) bool
+// Reports whether fd is a file descriptor used by the poller.
// pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
// goroutines respectively. The semaphore can be in the following states:
@@ -82,15 +96,27 @@ type pollCache struct {
}
var (
- netpollInited uint32
+ netpollInitLock mutex
+ netpollInited uint32
+
pollcache pollCache
netpollWaiters uint32
)
//go:linkname poll_runtime_pollServerInit internal..z2fpoll.runtime_pollServerInit
func poll_runtime_pollServerInit() {
- netpollinit()
- atomic.Store(&netpollInited, 1)
+ netpollGenericInit()
+}
+
+func netpollGenericInit() {
+ if atomic.Load(&netpollInited) == 0 {
+ lock(&netpollInitLock)
+ if netpollInited == 0 {
+ netpollinit()
+ atomic.Store(&netpollInited, 1)
+ }
+ unlock(&netpollInitLock)
+ }
}
func netpollinited() bool {
@@ -102,14 +128,7 @@ func netpollinited() bool {
// poll_runtime_isPollServerDescriptor reports whether fd is a
// descriptor being used by netpoll.
func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
- fds := netpolldescriptor()
- if GOOS != "aix" && GOOS != "hurd" {
- return fd == fds
- } else {
- // AIX have a pipe in its netpoll implementation.
- // Therefore, two fd are returned by netpolldescriptor using a mask.
- return fd == fds&0xFFFF || fd == (fds>>16)&0xFFFF
- }
+ return netpollIsPollDescriptor(fd)
}
//go:linkname poll_runtime_pollOpen internal..z2fpoll.runtime_pollOpen
@@ -240,13 +259,12 @@ func poll_runtime_pollSetDeadline(ctx uintptr, d int64, mode int) {
if pd.rt.f == nil {
if pd.rd > 0 {
pd.rt.f = rtf
- pd.rt.when = pd.rd
// Copy current seq into the timer arg.
// Timer func will check the seq against current descriptor seq,
// if they differ the descriptor was reused or timers were reset.
pd.rt.arg = pd
pd.rt.seq = pd.rseq
- addtimer(&pd.rt)
+ resettimer(&pd.rt, pd.rd)
}
} else if pd.rd != rd0 || combo != combo0 {
pd.rseq++ // invalidate current timers
@@ -260,10 +278,9 @@ func poll_runtime_pollSetDeadline(ctx uintptr, d int64, mode int) {
if pd.wt.f == nil {
if pd.wd > 0 && !combo {
pd.wt.f = netpollWriteDeadline
- pd.wt.when = pd.wd
pd.wt.arg = pd
pd.wt.seq = pd.wseq
- addtimer(&pd.wt)
+ resettimer(&pd.wt, pd.wd)
}
} else if pd.wd != wd0 || combo != combo0 {
pd.wseq++ // invalidate current timers
@@ -325,8 +342,13 @@ func poll_runtime_pollUnblock(ctx uintptr) {
}
}
-// make pd ready, newly runnable goroutines (if any) are added to toRun.
-// May run during STW, so write barriers are not allowed.
+// netpollready is called by the platform-specific netpoll function.
+// It declares that the fd associated with pd is ready for I/O.
+// The toRun argument is used to build a list of goroutines to return
+// from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
+// whether the fd is ready for reading or writing or both.
+//
+// This may run while the world is stopped, so write barriers are not allowed.
//go:nowritebarrier
func netpollready(toRun *gList, pd *pollDesc, mode int32) {
var rg, wg *g
diff --git a/libgo/go/runtime/netpoll_aix.go b/libgo/go/runtime/netpoll_aix.go
index 39e36c7..a00742e 100644
--- a/libgo/go/runtime/netpoll_aix.go
+++ b/libgo/go/runtime/netpoll_aix.go
@@ -14,18 +14,6 @@ import "unsafe"
//extern poll
func libc_poll(pfds *pollfd, npfds uintptr, timeout uintptr) int32
-//go:noescape
-//extern pipe
-func libc_pipe(fd *int32) int32
-
-//extern __go_fcntl_uintptr
-func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr)
-
-func fcntl(fd, cmd int32, arg uintptr) int32 {
- r, _ := fcntlUintptr(uintptr(fd), uintptr(cmd), arg)
- return int32(r)
-}
-
// pollfd represents the poll structure for AIX operating system.
type pollfd struct {
fd int32
@@ -49,22 +37,13 @@ var (
)
func netpollinit() {
- var p [2]int32
-
// Create the pipe we use to wakeup poll.
- if err := libc_pipe(&p[0]); err < 0 {
+ r, w, errno := nonblockingPipe()
+ if errno != 0 {
throw("netpollinit: failed to create pipe")
}
- rdwake = p[0]
- wrwake = p[1]
-
- fl := uintptr(fcntl(rdwake, _F_GETFL, 0))
- fcntl(rdwake, _F_SETFL, fl|_O_NONBLOCK)
- fcntl(rdwake, _F_SETFD, _FD_CLOEXEC)
-
- fl = uintptr(fcntl(wrwake, _F_GETFL, 0))
- fcntl(wrwake, _F_SETFL, fl|_O_NONBLOCK)
- fcntl(wrwake, _F_SETFD, _FD_CLOEXEC)
+ rdwake = r
+ wrwake = w
// Pre-allocate array of pollfd structures for poll.
pfds = make([]pollfd, 1, 128)
@@ -77,12 +56,8 @@ func netpollinit() {
pds[0] = nil
}
-func netpolldescriptor() uintptr {
- // Both fd must be returned
- if rdwake > 0xFFFF || wrwake > 0xFFFF {
- throw("netpolldescriptor: invalid fd number")
- }
- return uintptr(rdwake<<16 | wrwake)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(rdwake) || fd == uintptr(wrwake)
}
// netpollwakeup writes on wrwake to wakeup poll before any changes.
@@ -146,12 +121,32 @@ func netpollarm(pd *pollDesc, mode int) {
unlock(&mtxset)
}
+// netpollBreak interrupts an epollwait.
+func netpollBreak() {
+ netpollwakeup()
+}
+
+// netpoll checks for ready network connections.
+// Returns list of goroutines that become runnable.
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
//go:nowritebarrierrec
-func netpoll(block bool) gList {
- timeout := ^uintptr(0)
- if !block {
- timeout = 0
+func netpoll(delay int64) gList {
+ var timeout uintptr
+ if delay < 0 {
+ timeout = ^uintptr(0)
+ } else if delay == 0 {
+ // TODO: call poll with timeout == 0
return gList{}
+ } else if delay < 1e6 {
+ timeout = 1
+ } else if delay < 1e15 {
+ timeout = uintptr(delay / 1e6)
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ timeout = 1e9
}
retry:
lock(&mtxpoll)
@@ -167,20 +162,29 @@ retry:
throw("poll failed")
}
unlock(&mtxset)
+ // If a timed sleep was interrupted, just return to
+ // recalculate how long we should sleep now.
+ if timeout > 0 {
+ return gList{}
+ }
goto retry
}
// Check if some descriptors need to be changed
if n != 0 && pfds[0].revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 {
- var b [1]byte
- for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 {
+ if delay != 0 {
+ // A netpollwakeup could be picked up by a
+ // non-blocking poll. Only clear the wakeup
+ // if blocking.
+ var b [1]byte
+ for read(rdwake, unsafe.Pointer(&b[0]), 1) == 1 {
+ }
}
- // Do not look at the other fds in this case as the mode may have changed
- // XXX only additions of flags are made, so maybe it is ok
- unlock(&mtxset)
- goto retry
+ // Still look at the other fds even if the mode may have
+ // changed, as netpollBreak might have been called.
+ n--
}
var toRun gList
- for i := 0; i < len(pfds) && n > 0; i++ {
+ for i := 1; i < len(pfds) && n > 0; i++ {
pfd := &pfds[i]
var mode int32
@@ -202,8 +206,5 @@ retry:
}
}
unlock(&mtxset)
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/libgo/go/runtime/netpoll_epoll.go b/libgo/go/runtime/netpoll_epoll.go
index 885ac1f..7b215f3 100644
--- a/libgo/go/runtime/netpoll_epoll.go
+++ b/libgo/go/runtime/netpoll_epoll.go
@@ -22,33 +22,44 @@ func epollctl(epfd, op, fd int32, ev *epollevent) int32
//extern epoll_wait
func epollwait(epfd int32, ev *epollevent, nev, timeout int32) int32
-//extern __go_fcntl_uintptr
-func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr)
-
-func closeonexec(fd int32) {
- fcntlUintptr(uintptr(fd), _F_SETFD, _FD_CLOEXEC)
-}
-
var (
epfd int32 = -1 // epoll descriptor
+
+ netpollBreakRd, netpollBreakWr uintptr // for netpollBreak
)
func netpollinit() {
epfd = epollcreate1(_EPOLL_CLOEXEC)
- if epfd >= 0 {
- return
- }
- epfd = epollcreate(1024)
- if epfd >= 0 {
+ if epfd < 0 {
+ epfd = epollcreate(1024)
+ if epfd < 0 {
+ println("runtime: epollcreate failed with", -epfd)
+ throw("runtime: netpollinit failed")
+ }
closeonexec(epfd)
- return
}
- println("netpollinit: failed to create epoll descriptor", errno())
- throw("netpollinit: failed to create descriptor")
+ r, w, cerrno := nonblockingPipe()
+ if cerrno != 0 {
+ println("runtime: pipe failed with", cerrno)
+ throw("runtime: pipe failed")
+ }
+ ev := epollevent{
+ events: _EPOLLIN,
+ }
+ *(**uintptr)(unsafe.Pointer(&ev.data)) = &netpollBreakRd
+ if epollctl(epfd, _EPOLL_CTL_ADD, r, &ev) < 0 {
+ cerrno = int32(errno())
+ }
+ if cerrno != 0 {
+ println("runtime: epollctl failed with", cerrno)
+ throw("runtime: epollctl failed")
+ }
+ netpollBreakRd = uintptr(r)
+ netpollBreakWr = uintptr(w)
}
-func netpolldescriptor() uintptr {
- return uintptr(epfd)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(epfd) || fd == netpollBreakRd || fd == netpollBreakWr
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -73,15 +84,47 @@ func netpollarm(pd *pollDesc, mode int) {
throw("runtime: unused")
}
-// polls for ready network connections
-// returns list of goroutines that become runnable
-func netpoll(block bool) gList {
+// netpollBreak interrupts an epollwait.
+func netpollBreak() {
+ for {
+ var b byte
+ n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
+ if n == 1 {
+ break
+ }
+ if n == -_EINTR {
+ continue
+ }
+ if n == -_EAGAIN {
+ return
+ }
+ println("runtime: netpollBreak write failed with", -n)
+ throw("runtime: netpollBreak write failed")
+ }
+}
+
+// netpoll checks for ready network connections.
+// Returns list of goroutines that become runnable.
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
if epfd == -1 {
return gList{}
}
- waitms := int32(-1)
- if !block {
+ var waitms int32
+ if delay < 0 {
+ waitms = -1
+ } else if delay == 0 {
waitms = 0
+ } else if delay < 1e6 {
+ waitms = 1
+ } else if delay < 1e15 {
+ waitms = int32(delay / 1e6)
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ waitms = 1e9
}
var events [128]epollevent
retry:
@@ -92,6 +135,11 @@ retry:
println("runtime: epollwait on fd", epfd, "failed with", e)
throw("runtime: netpoll failed")
}
+ // If a timed sleep was interrupted, just return to
+ // recalculate how long we should sleep now.
+ if waitms > 0 {
+ return gList{}
+ }
goto retry
}
var toRun gList
@@ -100,6 +148,22 @@ retry:
if ev.events == 0 {
continue
}
+
+ if *(**uintptr)(unsafe.Pointer(&ev.data)) == &netpollBreakRd {
+ if ev.events != _EPOLLIN {
+ println("runtime: netpoll: break fd ready for", ev.events)
+ throw("runtime: netpoll: break fd ready for something unexpected")
+ }
+ if delay != 0 {
+ // netpollBreak could be picked up by a
+ // nonblocking poll. Only read the byte
+ // if blocking.
+ var tmp [16]byte
+ read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
+ }
+ continue
+ }
+
var mode int32
if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
mode += 'r'
@@ -116,8 +180,5 @@ retry:
netpollready(&toRun, pd, mode)
}
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/libgo/go/runtime/netpoll_fake.go b/libgo/go/runtime/netpoll_fake.go
index 5b1a63a..b2af3b8 100644
--- a/libgo/go/runtime/netpoll_fake.go
+++ b/libgo/go/runtime/netpoll_fake.go
@@ -2,18 +2,18 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Fake network poller for NaCl and wasm/js.
-// Should never be used, because NaCl and wasm/js network connections do not honor "SetNonblock".
+// Fake network poller for wasm/js.
+// Should never be used, because wasm/js network connections do not honor "SetNonblock".
-// +build nacl js,wasm
+// +build js,wasm
package runtime
func netpollinit() {
}
-func netpolldescriptor() uintptr {
- return ^uintptr(0)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return false
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -27,6 +27,9 @@ func netpollclose(fd uintptr) int32 {
func netpollarm(pd *pollDesc, mode int) {
}
-func netpoll(block bool) gList {
+func netpollBreak() {
+}
+
+func netpoll(delay int64) gList {
return gList{}
}
diff --git a/libgo/go/runtime/netpoll_kqueue.go b/libgo/go/runtime/netpoll_kqueue.go
index ce1acdf..9450461 100644
--- a/libgo/go/runtime/netpoll_kqueue.go
+++ b/libgo/go/runtime/netpoll_kqueue.go
@@ -17,16 +17,10 @@ func kqueue() int32
//extern kevent
func kevent(kq int32, ch *keventt, nch uintptr, ev *keventt, nev uintptr, ts *timespec) int32
-//extern __go_fcntl_uintptr
-func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr)
-
-//go:nosplit
-func closeonexec(fd int32) {
- fcntlUintptr(uintptr(fd), _F_SETFD, _FD_CLOEXEC)
-}
-
var (
kq int32 = -1
+
+ netpollBreakRd, netpollBreakWr uintptr // for netpollBreak
)
func netpollinit() {
@@ -36,10 +30,27 @@ func netpollinit() {
throw("runtime: netpollinit failed")
}
closeonexec(kq)
+ r, w, errno := nonblockingPipe()
+ if errno != 0 {
+ println("runtime: pipe failed with", -errno)
+ throw("runtime: pipe failed")
+ }
+ ev := keventt{
+ filter: _EVFILT_READ,
+ flags: _EV_ADD,
+ }
+ *(*uintptr)(unsafe.Pointer(&ev.ident)) = uintptr(r)
+ n := kevent(kq, &ev, 1, nil, 0, nil)
+ if n < 0 {
+ println("runtime: kevent failed with", -n)
+ throw("runtime: kevent failed")
+ }
+ netpollBreakRd = uintptr(r)
+ netpollBreakWr = uintptr(w)
}
-func netpolldescriptor() uintptr {
- return uintptr(kq)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(kq) || fd == netpollBreakRd || fd == netpollBreakWr
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -72,15 +83,43 @@ func netpollarm(pd *pollDesc, mode int) {
throw("runtime: unused")
}
-// Polls for ready network connections.
+// netpollBreak interrupts an epollwait.
+func netpollBreak() {
+ for {
+ var b byte
+ n := write(netpollBreakWr, unsafe.Pointer(&b), 1)
+ if n == 1 || n == -_EAGAIN {
+ break
+ }
+ if n == -_EINTR {
+ continue
+ }
+ println("runtime: netpollBreak write failed with", -n)
+ throw("runtime: netpollBreak write failed")
+ }
+}
+
+// netpoll checks for ready network connections.
// Returns list of goroutines that become runnable.
-func netpoll(block bool) gList {
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
if kq == -1 {
return gList{}
}
var tp *timespec
var ts timespec
- if !block {
+ if delay < 0 {
+ tp = nil
+ } else if delay == 0 {
+ tp = &ts
+ } else {
+ ts.setNsec(delay)
+ if ts.tv_sec > 1e6 {
+ // Darwin returns EINVAL if the sleep time is too long.
+ ts.tv_sec = 1e6
+ }
tp = &ts
}
var events [64]keventt
@@ -92,11 +131,32 @@ retry:
println("runtime: kevent on fd", kq, "failed with", e)
throw("runtime: netpoll failed")
}
+ // If a timed sleep was interrupted, just return to
+ // recalculate how long we should sleep now.
+ if delay > 0 {
+ return gList{}
+ }
goto retry
}
var toRun gList
for i := 0; i < int(n); i++ {
ev := &events[i]
+
+ if uintptr(ev.ident) == netpollBreakRd {
+ if ev.filter != _EVFILT_READ {
+ println("runtime: netpoll: break fd ready for", ev.filter)
+ throw("runtime: netpoll: break fd ready for something unexpected")
+ }
+ if delay != 0 {
+ // netpollBreak could be picked up by a
+ // nonblocking poll. Only read the byte
+ // if blocking.
+ var tmp [16]byte
+ read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
+ }
+ continue
+ }
+
var mode int32
switch ev.filter {
case _EVFILT_READ:
@@ -126,8 +186,5 @@ retry:
netpollready(&toRun, pd, mode)
}
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/libgo/go/runtime/netpoll_solaris.go b/libgo/go/runtime/netpoll_solaris.go
index 222af29..acb8bab 100644
--- a/libgo/go/runtime/netpoll_solaris.go
+++ b/libgo/go/runtime/netpoll_solaris.go
@@ -67,14 +67,6 @@ import "unsafe"
// again we know for sure we are always talking about the same file
// descriptor and can safely access the data we want (the event set).
-//extern __go_fcntl_uintptr
-func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr)
-
-func fcntl(fd, cmd int32, arg uintptr) int32 {
- r, _ := fcntlUintptr(uintptr(fd), uintptr(cmd), arg)
- return int32(r)
-}
-
//extern port_create
func port_create() int32
@@ -88,12 +80,15 @@ func port_dissociate(port, source int32, object uintptr) int32
//extern port_getn
func port_getn(port int32, evs *portevent, max uint32, nget *uint32, timeout *timespec) int32
+//extern port_alert
+func port_alert(port int32, flags, events uint32, user uintptr) int32
+
var portfd int32 = -1
func netpollinit() {
portfd = port_create()
if portfd >= 0 {
- fcntl(portfd, _F_SETFD, _FD_CLOEXEC)
+ closeonexec(portfd)
return
}
@@ -101,8 +96,8 @@ func netpollinit() {
throw("runtime: netpollinit failed")
}
-func netpolldescriptor() uintptr {
- return uintptr(portfd)
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == uintptr(portfd)
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -164,27 +159,68 @@ func netpollarm(pd *pollDesc, mode int) {
unlock(&pd.lock)
}
-// polls for ready network connections
-// returns list of goroutines that become runnable
-func netpoll(block bool) gList {
+// netpollBreak interrupts a port_getn wait.
+func netpollBreak() {
+ // Use port_alert to put portfd into alert mode.
+ // This will wake up all threads sleeping in port_getn on portfd,
+ // and cause their calls to port_getn to return immediately.
+ // Further, until portfd is taken out of alert mode,
+ // all calls to port_getn will return immediately.
+ if port_alert(portfd, _PORT_ALERT_UPDATE, _POLLHUP, uintptr(unsafe.Pointer(&portfd))) < 0 {
+ if e := errno(); e != _EBUSY {
+ println("runtime: port_alert failed with", e)
+ throw("runtime: netpoll: port_alert failed")
+ }
+ }
+}
+
+// netpoll checks for ready network connections.
+// Returns list of goroutines that become runnable.
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
if portfd == -1 {
return gList{}
}
var wait *timespec
- var zero timespec
- if !block {
- wait = &zero
+ var ts timespec
+ if delay < 0 {
+ wait = nil
+ } else if delay == 0 {
+ wait = &ts
+ } else {
+ ts.setNsec(delay)
+ if ts.tv_sec > 1e6 {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e6 s == ~11.5 days.
+ ts.tv_sec = 1e6
+ }
+ wait = &ts
}
var events [128]portevent
retry:
var n uint32 = 1
- if port_getn(portfd, &events[0], uint32(len(events)), &n, wait) < 0 {
- if e := errno(); e != _EINTR {
+ r := port_getn(portfd, &events[0], uint32(len(events)), &n, wait)
+ e := errno()
+ if r < 0 && e == _ETIME && n > 0 {
+ // As per port_getn(3C), an ETIME failure does not preclude the
+ // delivery of some number of events. Treat a timeout failure
+ // with delivered events as a success.
+ r = 0
+ }
+ if r < 0 {
+ if e != _EINTR && e != _ETIME {
print("runtime: port_getn on fd ", portfd, " failed (errno=", e, ")\n")
throw("runtime: netpoll failed")
}
+ // If a timed sleep was interrupted and there are no events,
+ // just return to recalculate how long we should sleep now.
+ if delay > 0 {
+ return gList{}
+ }
goto retry
}
@@ -192,6 +228,24 @@ retry:
for i := 0; i < int(n); i++ {
ev := &events[i]
+ if ev.portev_source == _PORT_SOURCE_ALERT {
+ if ev.portev_events != _POLLHUP || unsafe.Pointer(ev.portev_user) != unsafe.Pointer(&portfd) {
+ throw("runtime: netpoll: bad port_alert wakeup")
+ }
+ if delay != 0 {
+ // Now that a blocking call to netpoll
+ // has seen the alert, take portfd
+ // back out of alert mode.
+ // See the comment in netpollBreak.
+ if port_alert(portfd, 0, 0, 0) < 0 {
+ e := errno()
+ println("runtime: port_alert failed with", e)
+ throw("runtime: netpoll: port_alert failed")
+ }
+ }
+ continue
+ }
+
if ev.portev_events == 0 {
continue
}
@@ -228,8 +282,5 @@ retry:
}
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/libgo/go/runtime/netpoll_stub.go b/libgo/go/runtime/netpoll_stub.go
index f585333..fe45cfb 100644
--- a/libgo/go/runtime/netpoll_stub.go
+++ b/libgo/go/runtime/netpoll_stub.go
@@ -6,16 +6,42 @@
package runtime
+import "runtime/internal/atomic"
+
+var netpollInited uint32
var netpollWaiters uint32
+var netpollStubLock mutex
+var netpollNote note
+var netpollBroken uint32
+
+func netpollGenericInit() {
+ atomic.Store(&netpollInited, 1)
+}
+
+func netpollBreak() {
+ if atomic.Cas(&netpollBroken, 0, 1) {
+ notewakeup(&netpollNote)
+ }
+}
+
// Polls for ready network connections.
// Returns list of goroutines that become runnable.
-func netpoll(block bool) gList {
+func netpoll(delay int64) gList {
// Implementation for platforms that do not support
// integrated network poller.
+ if delay != 0 {
+ // This lock ensures that only one goroutine tries to use
+ // the note. It should normally be completely uncontended.
+ lock(&netpollStubLock)
+ noteclear(&netpollNote)
+ atomic.Store(&netpollBroken, 0)
+ notetsleep(&netpollNote, delay)
+ unlock(&netpollStubLock)
+ }
return gList{}
}
func netpollinited() bool {
- return false
+ return atomic.Load(&netpollInited) != 0
}
diff --git a/libgo/go/runtime/netpoll_windows.go b/libgo/go/runtime/netpoll_windows.go
index 07ef15c..ced52cb 100644
--- a/libgo/go/runtime/netpoll_windows.go
+++ b/libgo/go/runtime/netpoll_windows.go
@@ -41,8 +41,8 @@ func netpollinit() {
}
}
-func netpolldescriptor() uintptr {
- return iocphandle
+func netpollIsPollDescriptor(fd uintptr) bool {
+ return fd == iocphandle
}
func netpollopen(fd uintptr, pd *pollDesc) int32 {
@@ -61,9 +61,19 @@ func netpollarm(pd *pollDesc, mode int) {
throw("runtime: unused")
}
-// Polls for completed network IO.
+func netpollBreak() {
+ if stdcall4(_PostQueuedCompletionStatus, iocphandle, 0, 0, 0) == 0 {
+ println("runtime: netpoll: PostQueuedCompletionStatus failed (errno=", getlasterror(), ")")
+ throw("runtime: netpoll: PostQueuedCompletionStatus failed")
+ }
+}
+
+// netpoll checks for ready network connections.
// Returns list of goroutines that become runnable.
-func netpoll(block bool) gList {
+// delay < 0: blocks indefinitely
+// delay == 0: does not block, just polls
+// delay > 0: block for up to that many nanoseconds
+func netpoll(delay int64) gList {
var entries [64]overlappedEntry
var wait, qty, key, flags, n, i uint32
var errno int32
@@ -75,23 +85,32 @@ func netpoll(block bool) gList {
if iocphandle == _INVALID_HANDLE_VALUE {
return gList{}
}
- wait = 0
- if block {
+ if delay < 0 {
wait = _INFINITE
+ } else if delay == 0 {
+ wait = 0
+ } else if delay < 1e6 {
+ wait = 1
+ } else if delay < 1e15 {
+ wait = uint32(delay / 1e6)
+ } else {
+ // An arbitrary cap on how long to wait for a timer.
+ // 1e9 ms == ~11.5 days.
+ wait = 1e9
}
-retry:
+
if _GetQueuedCompletionStatusEx != nil {
n = uint32(len(entries) / int(gomaxprocs))
if n < 8 {
n = 8
}
- if block {
+ if delay != 0 {
mp.blocked = true
}
if stdcall6(_GetQueuedCompletionStatusEx, iocphandle, uintptr(unsafe.Pointer(&entries[0])), uintptr(n), uintptr(unsafe.Pointer(&n)), uintptr(wait), 0) == 0 {
mp.blocked = false
errno = int32(getlasterror())
- if !block && errno == _WAIT_TIMEOUT {
+ if errno == _WAIT_TIMEOUT {
return gList{}
}
println("runtime: GetQueuedCompletionStatusEx failed (errno=", errno, ")")
@@ -100,24 +119,32 @@ retry:
mp.blocked = false
for i = 0; i < n; i++ {
op = entries[i].op
- errno = 0
- qty = 0
- if stdcall5(_WSAGetOverlappedResult, op.pd.fd, uintptr(unsafe.Pointer(op)), uintptr(unsafe.Pointer(&qty)), 0, uintptr(unsafe.Pointer(&flags))) == 0 {
- errno = int32(getlasterror())
+ if op != nil {
+ errno = 0
+ qty = 0
+ if stdcall5(_WSAGetOverlappedResult, op.pd.fd, uintptr(unsafe.Pointer(op)), uintptr(unsafe.Pointer(&qty)), 0, uintptr(unsafe.Pointer(&flags))) == 0 {
+ errno = int32(getlasterror())
+ }
+ handlecompletion(&toRun, op, errno, qty)
+ } else {
+ if delay == 0 {
+ // Forward the notification to the
+ // blocked poller.
+ netpollBreak()
+ }
}
- handlecompletion(&toRun, op, errno, qty)
}
} else {
op = nil
errno = 0
qty = 0
- if block {
+ if delay != 0 {
mp.blocked = true
}
if stdcall5(_GetQueuedCompletionStatus, iocphandle, uintptr(unsafe.Pointer(&qty)), uintptr(unsafe.Pointer(&key)), uintptr(unsafe.Pointer(&op)), uintptr(wait)) == 0 {
mp.blocked = false
errno = int32(getlasterror())
- if !block && errno == _WAIT_TIMEOUT {
+ if errno == _WAIT_TIMEOUT {
return gList{}
}
if op == nil {
@@ -127,11 +154,16 @@ retry:
// dequeued failed IO packet, so report that
}
mp.blocked = false
+ if op == nil {
+ if delay == 0 {
+ // Forward the notification to the
+ // blocked poller.
+ netpollBreak()
+ }
+ return gList{}
+ }
handlecompletion(&toRun, op, errno, qty)
}
- if block && toRun.empty() {
- goto retry
- }
return toRun
}
diff --git a/libgo/go/runtime/os3_solaris.go b/libgo/go/runtime/os3_solaris.go
index d5fbccd..001feed 100644
--- a/libgo/go/runtime/os3_solaris.go
+++ b/libgo/go/runtime/os3_solaris.go
@@ -25,13 +25,6 @@ func getncpu() int32 {
return n
}
-func osinit() {
- ncpu = getncpu()
- if physPageSize == 0 {
- physPageSize = uintptr(getPageSize())
- }
-}
-
func sysargs(argc int32, argv **byte) {
executablePath = gostringnocopy(getexecname())
}
diff --git a/libgo/go/runtime/os_darwin.go b/libgo/go/runtime/os_darwin.go
index 498bd43..58e0412 100644
--- a/libgo/go/runtime/os_darwin.go
+++ b/libgo/go/runtime/os_darwin.go
@@ -6,24 +6,6 @@ package runtime
import "unsafe"
-//extern pipe
-func libcPipe([2]int32) int32
-
-func pipe() (r, w int32, e int32) {
- var p [2]int32
- r := libcPipe(noescape(unsafe.Pointer(&p)))
- if r < 0 {
- e = int32(errno())
- }
- return p[0], p[1], e
-}
-
-//go:nosplit
-func setNonblock(fd int32) {
- flags := fcntlUintptr(uintptr(fd), _F_GETFL, 0)
- fcntlUintptr(uintptr(fd), _F_SETFL, flags|_O_NONBLOCK)
-}
-
type mOS struct {
initialized bool
mutex pthreadmutex
diff --git a/libgo/go/runtime/os_freebsd_arm64.go b/libgo/go/runtime/os_freebsd_arm64.go
new file mode 100644
index 0000000..51ebf9d
--- /dev/null
+++ b/libgo/go/runtime/os_freebsd_arm64.go
@@ -0,0 +1,155 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "internal/cpu"
+
+const (
+ hwcap_FP = 1 << 0
+ hwcap_ASIMD = 1 << 1
+ hwcap_EVTSTRM = 1 << 2
+ hwcap_AES = 1 << 3
+ hwcap_PMULL = 1 << 4
+ hwcap_SHA1 = 1 << 5
+ hwcap_SHA2 = 1 << 6
+ hwcap_CRC32 = 1 << 7
+ hwcap_ATOMICS = 1 << 8
+ hwcap_FPHP = 1 << 9
+ hwcap_ASIMDHP = 1 << 10
+ hwcap_CPUID = 1 << 11
+ hwcap_ASIMDRDM = 1 << 12
+ hwcap_JSCVT = 1 << 13
+ hwcap_FCMA = 1 << 14
+ hwcap_LRCPC = 1 << 15
+ hwcap_DCPOP = 1 << 16
+ hwcap_SHA3 = 1 << 17
+ hwcap_SM3 = 1 << 18
+ hwcap_SM4 = 1 << 19
+ hwcap_ASIMDDP = 1 << 20
+ hwcap_SHA512 = 1 << 21
+ hwcap_SVE = 1 << 22
+ hwcap_ASIMDFHM = 1 << 23
+)
+
+func getisar0() uint64
+func getisar1() uint64
+func getpfr0() uint64
+
+// no hwcap support on FreeBSD aarch64, we need to retrieve the info from
+// ID_AA64ISAR0_EL1, ID_AA64ISAR1_EL1 and ID_AA64PFR0_EL1
+func archauxv(tag, val uintptr) {
+ var isar0, isar1, pfr0 uint64
+
+ isar0 = getisar0()
+ isar1 = getisar1()
+ pfr0 = getpfr0()
+
+ // ID_AA64ISAR0_EL1
+ switch extractBits(isar0, 4, 7) {
+ case 1:
+ cpu.HWCap |= hwcap_AES
+ case 2:
+ cpu.HWCap |= hwcap_PMULL | hwcap_AES
+ }
+
+ switch extractBits(isar0, 8, 11) {
+ case 1:
+ cpu.HWCap |= hwcap_SHA1
+ }
+
+ switch extractBits(isar0, 12, 15) {
+ case 1:
+ cpu.HWCap |= hwcap_SHA2
+ case 2:
+ cpu.HWCap |= hwcap_SHA2 | hwcap_SHA512
+ }
+
+ switch extractBits(isar0, 16, 19) {
+ case 1:
+ cpu.HWCap |= hwcap_CRC32
+ }
+
+ switch extractBits(isar0, 20, 23) {
+ case 2:
+ cpu.HWCap |= hwcap_ATOMICS
+ }
+
+ switch extractBits(isar0, 28, 31) {
+ case 1:
+ cpu.HWCap |= hwcap_ASIMDRDM
+ }
+
+ switch extractBits(isar0, 32, 35) {
+ case 1:
+ cpu.HWCap |= hwcap_SHA3
+ }
+
+ switch extractBits(isar0, 36, 39) {
+ case 1:
+ cpu.HWCap |= hwcap_SM3
+ }
+
+ switch extractBits(isar0, 40, 43) {
+ case 1:
+ cpu.HWCap |= hwcap_SM4
+ }
+
+ switch extractBits(isar0, 44, 47) {
+ case 1:
+ cpu.HWCap |= hwcap_ASIMDDP
+ }
+
+ // ID_AA64ISAR1_EL1
+ switch extractBits(isar1, 0, 3) {
+ case 1:
+ cpu.HWCap |= hwcap_DCPOP
+ }
+
+ switch extractBits(isar1, 12, 15) {
+ case 1:
+ cpu.HWCap |= hwcap_JSCVT
+ }
+
+ switch extractBits(isar1, 16, 19) {
+ case 1:
+ cpu.HWCap |= hwcap_FCMA
+ }
+
+ switch extractBits(isar1, 20, 23) {
+ case 1:
+ cpu.HWCap |= hwcap_LRCPC
+ }
+
+ // ID_AA64PFR0_EL1
+ switch extractBits(pfr0, 16, 19) {
+ case 0:
+ cpu.HWCap |= hwcap_FP
+ case 1:
+ cpu.HWCap |= hwcap_FP | hwcap_FPHP
+ }
+
+ switch extractBits(pfr0, 20, 23) {
+ case 0:
+ cpu.HWCap |= hwcap_ASIMD
+ case 1:
+ cpu.HWCap |= hwcap_ASIMD | hwcap_ASIMDHP
+ }
+
+ switch extractBits(pfr0, 32, 35) {
+ case 1:
+ cpu.HWCap |= hwcap_SVE
+ }
+}
+
+func extractBits(data uint64, start, end uint) uint {
+ return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
+}
+
+//go:nosplit
+func cputicks() int64 {
+ // Currently cputicks() is used in blocking profiler and to seed fastrand().
+ // nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+ return nanotime()
+}
diff --git a/libgo/go/runtime/os_gccgo.go b/libgo/go/runtime/os_gccgo.go
index ef33d67..ab19022 100644
--- a/libgo/go/runtime/os_gccgo.go
+++ b/libgo/go/runtime/os_gccgo.go
@@ -51,3 +51,45 @@ func getRandomData(r []byte) {
closefd(fd)
extendRandom(r, int(n))
}
+
+//go:noescape
+//extern pipe
+func libcPipe(*[2]int32) int32
+
+func pipe() (r, w int32, e int32) {
+ var p [2]int32
+ res := libcPipe(&p)
+ if res < 0 {
+ e = int32(errno())
+ }
+ return p[0], p[1], e
+}
+
+//go:noescape
+//extern pipe2
+func libcPipe2(*[2]int32, int32) int32
+
+func pipe2(flags int32) (r, w int32, e int32) {
+ var p [2]int32
+ res := libcPipe2(&p, flags)
+ if res < 0 {
+ e = int32(errno())
+ }
+ return p[0], p[1], e
+}
+
+//extern __go_fcntl_uintptr
+func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr)
+
+//go:nosplit
+func closeonexec(fd int32) {
+ fcntlUintptr(uintptr(fd), _F_SETFD, _FD_CLOEXEC)
+}
+
+//go:nosplit
+func setNonblock(fd int32) {
+ flags, errno := fcntlUintptr(uintptr(fd), _F_GETFL, 0)
+ if errno == 0 {
+ fcntlUintptr(uintptr(fd), _F_SETFL, flags|_O_NONBLOCK)
+ }
+}
diff --git a/libgo/go/runtime/os_illumos.go b/libgo/go/runtime/os_illumos.go
new file mode 100644
index 0000000..ddcb8c9
--- /dev/null
+++ b/libgo/go/runtime/os_illumos.go
@@ -0,0 +1,102 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "unsafe"
+)
+
+// Return the minimum value seen for the zone CPU cap, or 0 if no cap is
+// detected.
+func getcpucap() uint64 {
+ // The resource control block is an opaque object whose size is only
+ // known to libc. In practice, given the contents, it is unlikely to
+ // grow beyond 8KB so we'll use a static buffer of that size here.
+ const rblkmaxsize = 8 * 1024
+ if rctlblk_size() > rblkmaxsize {
+ return 0
+ }
+
+ // The "zone.cpu-cap" resource control, as described in
+ // resource_controls(5), "sets a limit on the amount of CPU time that
+ // can be used by a zone. The unit used is the percentage of a single
+ // CPU that can be used by all user threads in a zone, expressed as an
+ // integer." A C string of the name must be passed to getrctl(2).
+ name := []byte("zone.cpu-cap\x00")
+
+ // To iterate over the list of values for a particular resource
+ // control, we need two blocks: one for the previously read value and
+ // one for the next value.
+ var rblk0 [rblkmaxsize]byte
+ var rblk1 [rblkmaxsize]byte
+ rblk := &rblk0[0]
+ rblkprev := &rblk1[0]
+
+ var flag uint32 = _RCTL_FIRST
+ var capval uint64 = 0
+
+ for {
+ if getrctl(unsafe.Pointer(&name[0]), unsafe.Pointer(rblkprev), unsafe.Pointer(rblk), flag) != 0 {
+ // The end of the sequence is reported as an ENOENT
+ // failure, but determining the CPU cap is not critical
+ // here. We'll treat any failure as if it were the end
+ // of sequence.
+ break
+ }
+
+ lflags := rctlblk_get_local_flags(unsafe.Pointer(rblk))
+ action := rctlblk_get_local_action(unsafe.Pointer(rblk), 0)
+ if (lflags&_RCTL_LOCAL_MAXIMAL) == 0 && action == _RCTL_LOCAL_DENY {
+ // This is a finite (not maximal) value representing a
+ // cap (deny) action.
+ v := rctlblk_get_value(unsafe.Pointer(rblk))
+ if capval == 0 || capval > v {
+ capval = v
+ }
+ }
+
+ // Swap the blocks around so that we can fetch the next value
+ t := rblk
+ rblk = rblkprev
+ rblkprev = t
+ flag = _RCTL_NEXT
+ }
+
+ return capval
+}
+
+func getncpu() int32 {
+ n := int32(sysconf(__SC_NPROCESSORS_ONLN))
+ if n < 1 {
+ return 1
+ }
+
+ if cents := int32(getcpucap()); cents > 0 {
+ // Convert from a percentage of CPUs to a number of CPUs,
+ // rounding up to make use of a fractional CPU
+ // e.g., 336% becomes 4 CPUs
+ ncap := (cents + 99) / 100
+ if ncap < n {
+ return ncap
+ }
+ }
+
+ return n
+}
+
+//extern getrctl
+func getrctl(controlname, oldbuf, newbuf unsafe.Pointer, flags uint32) int32
+
+//extern rctlblk_get_local_action
+func rctlblk_get_local_action(buf, signalp unsafe.Pointer) uint32
+
+//extern rctlblk_get_local_flags
+func rctlblk_get_local_flags(buf unsafe.Pointer) uint32
+
+//extern rctlblk_get_value
+func rctlblk_get_value(buf unsafe.Pointer) uint64
+
+//extern rctlblk_size
+func rclblk_size() uintptr
diff --git a/libgo/go/runtime/os_js.go b/libgo/go/runtime/os_js.go
index ad6db18..ff0ee3a 100644
--- a/libgo/go/runtime/os_js.go
+++ b/libgo/go/runtime/os_js.go
@@ -12,7 +12,7 @@ import (
func exit(code int32)
-func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+func write1(fd uintptr, p unsafe.Pointer, n int32) int32 {
if fd > 2 {
throw("runtime.write to fd > 2 is unsupported")
}
@@ -131,7 +131,6 @@ func os_sigpipe() {
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
@@ -143,3 +142,9 @@ func syscall_now() (sec int64, nsec int32) {
// gsignalStack is unused on js.
type gsignalStack struct{}
+
+const preemptMSupported = false
+
+func preemptM(mp *m) {
+ // No threads, so nothing to do.
+}
diff --git a/libgo/go/runtime/os_linux_arm.go b/libgo/go/runtime/os_linux_arm.go
index 8de9d11..0a90188 100644
--- a/libgo/go/runtime/os_linux_arm.go
+++ b/libgo/go/runtime/os_linux_arm.go
@@ -6,16 +6,8 @@ package runtime
import "internal/cpu"
-var randomNumber uint32
-
func archauxv(tag, val uintptr) {
switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
case _AT_HWCAP:
cpu.HWCap = uint(val)
case _AT_HWCAP2:
diff --git a/libgo/go/runtime/os_linux_arm64.go b/libgo/go/runtime/os_linux_arm64.go
index 30d63bf..a482d47 100644
--- a/libgo/go/runtime/os_linux_arm64.go
+++ b/libgo/go/runtime/os_linux_arm64.go
@@ -8,17 +8,8 @@ package runtime
import "internal/cpu"
-var randomNumber uint32
-
func archauxv(tag, val uintptr) {
switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
-
case _AT_HWCAP:
// arm64 doesn't have a 'cpuid' instruction equivalent and relies on
// HWCAP/HWCAP2 bits for hardware capabilities.
diff --git a/libgo/go/runtime/os_linux_mips64x.go b/libgo/go/runtime/os_linux_mips64x.go
index b7f737f..2b59dcb 100644
--- a/libgo/go/runtime/os_linux_mips64x.go
+++ b/libgo/go/runtime/os_linux_mips64x.go
@@ -7,15 +7,5 @@
package runtime
-var randomNumber uint32
-
func archauxv(tag, val uintptr) {
- switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
- }
}
diff --git a/libgo/go/runtime/os_linux_mipsx.go b/libgo/go/runtime/os_linux_mipsx.go
index a2696de..2bfd6f40 100644
--- a/libgo/go/runtime/os_linux_mipsx.go
+++ b/libgo/go/runtime/os_linux_mipsx.go
@@ -7,15 +7,5 @@
package runtime
-var randomNumber uint32
-
func archauxv(tag, val uintptr) {
- switch tag {
- case _AT_RANDOM:
- // sysargs filled in startupRandomData, but that
- // pointer may not be word aligned, so we must treat
- // it as a byte array.
- randomNumber = uint32(startupRandomData[4]) | uint32(startupRandomData[5])<<8 |
- uint32(startupRandomData[6])<<16 | uint32(startupRandomData[7])<<24
- }
}
diff --git a/libgo/go/runtime/os_netbsd_arm64.go b/libgo/go/runtime/os_netbsd_arm64.go
index fd81eb7..8d21b0a 100644
--- a/libgo/go/runtime/os_netbsd_arm64.go
+++ b/libgo/go/runtime/os_netbsd_arm64.go
@@ -19,6 +19,5 @@ func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintp
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/libgo/go/runtime/os_only_solaris.go b/libgo/go/runtime/os_only_solaris.go
new file mode 100644
index 0000000..e2f5409
--- /dev/null
+++ b/libgo/go/runtime/os_only_solaris.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Solaris code that doesn't also apply to illumos.
+
+// +build !illumos
+
+package runtime
+
+func getncpu() int32 {
+ n := int32(sysconf(__SC_NPROCESSORS_ONLN))
+ if n < 1 {
+ return 1
+ }
+
+ return n
+}
diff --git a/libgo/go/runtime/os_openbsd_arm64.go b/libgo/go/runtime/os_openbsd_arm64.go
index f15a95b..d559a2a 100644
--- a/libgo/go/runtime/os_openbsd_arm64.go
+++ b/libgo/go/runtime/os_openbsd_arm64.go
@@ -12,7 +12,6 @@ import (
func cputicks() int64 {
// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand().
// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
- // TODO: need more entropy to better seed fastrand.
return nanotime()
}
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index 9667181..88c598e 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -50,7 +50,7 @@ import (
// pc should be the program counter of the compiler-generated code that
// triggered this panic.
func panicCheck1(pc uintptr, msg string) {
- name, _, _, _ := funcfileline(pc-1, -1)
+ name, _, _, _ := funcfileline(pc-1, -1, false)
if hasPrefix(name, "runtime.") {
throw(msg)
}
@@ -548,6 +548,14 @@ func Goexit() {
// for detailed comments.
gp := getg()
gp.goexiting = true
+
+ // Create a panic object for Goexit, so we can recognize when it might be
+ // bypassed by a recover().
+ var p _panic
+ p.goexit = true
+ p.link = gp._panic
+ gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+
for {
d := gp._defer
if d == nil {
@@ -608,7 +616,12 @@ func preprintpanics(p *_panic) {
func printpanics(p *_panic) {
if p.link != nil {
printpanics(p.link)
- print("\t")
+ if !p.link.goexit {
+ print("\t")
+ }
+ }
+ if p.goexit {
+ return
}
print("panic: ")
printany(p.arg)
@@ -704,9 +717,12 @@ func gopanic(e interface{}) {
d._panic = nil
if p.recovered {
- atomic.Xadd(&runningPanicDefers, -1)
-
gp._panic = p.link
+ if gp._panic != nil && gp._panic.goexit && gp._panic.aborted {
+ Goexit()
+ throw("Goexit returned")
+ }
+ atomic.Xadd(&runningPanicDefers, -1)
// Aborted panics are marked but remain on the g.panic list.
// Remove them from the list.
@@ -717,6 +733,11 @@ func gopanic(e interface{}) {
gp.sig = 0
}
+ if gp._panic != nil && gp._panic.goexit {
+ Goexit()
+ throw("Goexit returned")
+ }
+
// Unwind the stack by throwing an exception.
// The compiler has arranged to create
// exception handlers in each function
@@ -922,7 +943,7 @@ func makefuncreturning() {
func gorecover() interface{} {
gp := getg()
p := gp._panic
- if p != nil && !p.recovered {
+ if p != nil && !p.goexit && !p.recovered {
p.recovered = true
return p.arg
}
diff --git a/libgo/go/runtime/pprof/label.go b/libgo/go/runtime/pprof/label.go
index 20f9cdb..2d92ef7 100644
--- a/libgo/go/runtime/pprof/label.go
+++ b/libgo/go/runtime/pprof/label.go
@@ -60,11 +60,11 @@ func Labels(args ...string) LabelSet {
if len(args)%2 != 0 {
panic("uneven number of arguments to pprof.Labels")
}
- labels := LabelSet{}
+ list := make([]label, 0, len(args)/2)
for i := 0; i+1 < len(args); i += 2 {
- labels.list = append(labels.list, label{key: args[i], value: args[i+1]})
+ list = append(list, label{key: args[i], value: args[i+1]})
}
- return labels
+ return LabelSet{list: list}
}
// Label returns the value of the label with the given key on ctx, and a boolean indicating
diff --git a/libgo/go/runtime/pprof/label_test.go b/libgo/go/runtime/pprof/label_test.go
index 240445f..de39d85 100644
--- a/libgo/go/runtime/pprof/label_test.go
+++ b/libgo/go/runtime/pprof/label_test.go
@@ -24,7 +24,7 @@ func (s labelSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s labelSorter) Less(i, j int) bool { return s[i].key < s[j].key }
func TestContextLabels(t *testing.T) {
- // Background context starts with no lablels.
+ // Background context starts with no labels.
ctx := context.Background()
labels := labelsSorted(ctx)
if len(labels) != 0 {
diff --git a/libgo/go/runtime/pprof/mprof_test.go b/libgo/go/runtime/pprof/mprof_test.go
index 6fe892b..c352dea 100644
--- a/libgo/go/runtime/pprof/mprof_test.go
+++ b/libgo/go/runtime/pprof/mprof_test.go
@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// +build !js
+
package pprof
import (
@@ -10,6 +12,7 @@ import (
"reflect"
"regexp"
"runtime"
+ "runtime/pprof/internal/profile"
"testing"
"unsafe"
)
@@ -27,6 +30,10 @@ func allocateTransient2M() {
memSink = make([]byte, 2<<20)
}
+func allocateTransient2MInline() {
+ memSink = make([]byte, 4<<20)
+}
+
type Obj32 struct {
link *Obj32
pad [32 - unsafe.Sizeof(uintptr(0))]byte
@@ -71,47 +78,102 @@ func TestMemoryProfiler(t *testing.T) {
// Do the interesting allocations.
allocateTransient1M()
allocateTransient2M()
+ allocateTransient2MInline()
allocatePersistent1K()
allocateReflect()
memSink = nil
runtime.GC() // materialize stats
- var buf bytes.Buffer
- if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
- t.Fatalf("failed to write heap profile: %v", err)
- }
memoryProfilerRun++
- tests := []string{
-
- fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f x]+
-# 0x[0-9,a-f]+ pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/mprof_test\.go:40
-# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test\.go:74
+ tests := []struct {
+ stk []string
+ legacy string
+ }{{
+ stk: []string{"pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f x]+
+# 0x[0-9,a-f]+ pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/mprof_test\.go:47
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test\.go:82
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
-
- fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f x]+
-# 0x[0-9,a-f]+ pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/mprof_test.go:21
-# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test.go:72
-`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
-
+ }, {
+ stk: []string{"pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
+# 0x[0-9,a-f]+ pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/mprof_test.go:24
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test.go:79
+`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
+ }, {
+ stk: []string{"pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
// This should start with "0: 0" but gccgo's imprecise
// GC means that sometimes the value is not collected.
- fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
-# 0x[0-9,a-f]+ pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/mprof_test.go:27
-# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test.go:73
+ legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
+# 0x[0-9,a-f]+ pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/mprof_test.go:30
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test.go:80
`, memoryProfilerRun, (2<<20)*memoryProfilerRun, memoryProfilerRun, (2<<20)*memoryProfilerRun),
-
- // This should start with "0: 0" but gccgo's imprecise
- // GC means that sometimes the value is not collected.
- fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @( 0x[0-9,a-f]+)+
-# 0x[0-9,a-f]+ pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/mprof_test.go:48
+ }, {
+ stk: []string{"pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
+ legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @ 0x[0-9,a-f x]+
+# 0x[0-9,a-f]+ pprof\.allocateTransient2MInline\+0x[0-9,a-f]+ .*/mprof_test.go:34
+# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test.go:81
+`, memoryProfilerRun, (4<<20)*memoryProfilerRun, memoryProfilerRun, (4<<20)*memoryProfilerRun),
+ }, {
+ stk: []string{"pprof.allocateReflectTransient"},
+ legacy: fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @( 0x[0-9,a-f]+)+
+# 0x[0-9,a-f]+ pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/mprof_test.go:55
`, memoryProfilerRun, (3<<20)*memoryProfilerRun, memoryProfilerRun, (3<<20)*memoryProfilerRun),
- }
+ }}
- for _, test := range tests {
- if !regexp.MustCompile(test).Match(buf.Bytes()) {
- t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test, buf.String())
+ t.Run("debug=1", func(t *testing.T) {
+ var buf bytes.Buffer
+ if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
+ t.Fatalf("failed to write heap profile: %v", err)
}
- }
+
+ for _, test := range tests {
+ if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) {
+ t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String())
+ }
+ }
+ })
+
+ t.Run("proto", func(t *testing.T) {
+ var buf bytes.Buffer
+ if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
+ t.Fatalf("failed to write heap profile: %v", err)
+ }
+ p, err := profile.Parse(&buf)
+ if err != nil {
+ t.Fatalf("failed to parse heap profile: %v", err)
+ }
+ t.Logf("Profile = %v", p)
+
+ stks := stacks(p)
+ for _, test := range tests {
+ if !containsStack(stks, test.stk) {
+ t.Logf("stks:\n%v", stks)
+ t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p)
+ }
+ }
+
+ if !containsInlinedCall(TestMemoryProfiler, 4<<10) {
+ t.Logf("Can't determine whether allocateTransient2MInline was inlined into TestMemoryProfiler.")
+ return
+ }
+
+ // Check the inlined function location is encoded correctly.
+ for _, loc := range p.Location {
+ inlinedCaller, inlinedCallee := false, false
+ for _, line := range loc.Line {
+ if line.Function.Name == "runtime/pprof.allocateTransient2MInline" {
+ inlinedCallee = true
+ }
+ if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" {
+ inlinedCaller = true
+ }
+ }
+ if inlinedCallee != inlinedCaller {
+ t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc)
+ }
+ }
+ })
}
diff --git a/libgo/go/runtime/pprof/pprof.go b/libgo/go/runtime/pprof/pprof.go
index 996b3cb..183881c 100644
--- a/libgo/go/runtime/pprof/pprof.go
+++ b/libgo/go/runtime/pprof/pprof.go
@@ -28,7 +28,7 @@
// if err != nil {
// log.Fatal("could not create CPU profile: ", err)
// }
-// defer f.Close()
+// defer f.Close() // error handling omitted for example
// if err := pprof.StartCPUProfile(f); err != nil {
// log.Fatal("could not start CPU profile: ", err)
// }
@@ -42,7 +42,7 @@
// if err != nil {
// log.Fatal("could not create memory profile: ", err)
// }
-// defer f.Close()
+// defer f.Close() // error handling omitted for example
// runtime.GC() // get up-to-date statistics
// if err := pprof.WriteHeapProfile(f); err != nil {
// log.Fatal("could not write memory profile: ", err)
@@ -386,16 +386,9 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler fun
count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
values[0] = count
values[1] = int64(nanosec)
- locs = locs[:0]
- for _, addr := range r.Stack() {
- // For count profiles, all stack addresses are
- // return PCs, which is what locForPC expects.
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
- }
+ // For count profiles, all stack addresses are
+ // return PCs, which is what appendLocsForStack expects.
+ locs = b.appendLocsForStack(locs[:0], r.Stack())
b.pbSample(values, locs, nil)
}
b.build()
@@ -451,16 +444,9 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
var locs []uint64
for _, k := range keys {
values[0] = int64(count[k])
- locs = locs[:0]
- for _, addr := range p.Stack(index[k]) {
- // For count profiles, all stack addresses are
- // return PCs, which is what locForPC expects.
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
- }
+ // For count profiles, all stack addresses are
+ // return PCs, which is what appendLocsForStack expects.
+ locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
b.pbSample(values, locs, nil)
}
b.build()
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index 49a555c..bba9ee3 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !nacl,!js
+// +build !js
package pprof
@@ -49,8 +49,12 @@ var (
// Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime.
func cpuHog1(x int) int {
+ return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
foo := x
- for i := 0; i < 1e5; i++ {
+ for i := 0; i < n; i++ {
if foo > 0 {
foo *= foo
} else {
@@ -100,35 +104,111 @@ func TestCPUProfileMultithreaded(t *testing.T) {
})
}
+// containsInlinedCall reports whether the function body for the function f is
+// known to contain an inlined function call within the first maxBytes bytes.
+func containsInlinedCall(f interface{}, maxBytes int) bool {
+ _, found := findInlinedCall(f, maxBytes)
+ return found
+}
+
+// findInlinedCall returns the PC of an inlined function call within
+// the function body for the function f if any.
+func findInlinedCall(f interface{}, maxBytes int) (pc uint64, found bool) {
+ fFunc := runtime.FuncForPC(uintptr(funcPC(f)))
+ if fFunc == nil || fFunc.Entry() == 0 {
+ panic("failed to locate function entry")
+ }
+
+ for offset := 0; offset < maxBytes; offset++ {
+ innerPC := fFunc.Entry() + uintptr(offset)
+ inner := runtime.FuncForPC(innerPC)
+ if inner == nil {
+ // No function known for this PC value.
+ // It might simply be misaligned, so keep searching.
+ continue
+ }
+ if inner.Entry() != fFunc.Entry() {
+ // Scanned past f and didn't find any inlined functions.
+ break
+ }
+ if inner.Name() != fFunc.Name() {
+ // This PC has f as its entry-point, but is not f. Therefore, it must be a
+ // function inlined into f.
+ return uint64(innerPC), true
+ }
+ }
+
+ return 0, false
+}
+
func TestCPUProfileInlining(t *testing.T) {
- testCPUProfile(t, stackContains, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
+ if !containsInlinedCall(inlinedCaller, 4<<10) {
+ t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
+ }
+
+ p := testCPUProfile(t, stackContains, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(inlinedCaller, &salt1, dur)
})
+
+ // Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
+ for _, loc := range p.Location {
+ hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
+ for _, line := range loc.Line {
+ if line.Function.Name == "runtime/pprof.inlinedCallee" {
+ hasInlinedCallee = true
+ }
+ if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
+ hasInlinedCallerAfterInlinedCallee = true
+ }
+ }
+ if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
+ t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
+ }
+ }
}
func inlinedCaller(x int) int {
- x = inlinedCallee(x)
+ x = inlinedCallee(x, 1e5)
return x
}
-func inlinedCallee(x int) int {
- // We could just use cpuHog1, but for loops prevent inlining
- // right now. :(
- foo := x
- i := 0
-loop:
- if foo > 0 {
- foo *= foo
- } else {
- foo *= foo + 1
+func inlinedCallee(x, n int) int {
+ return cpuHog0(x, n)
+}
+
+func TestCPUProfileRecursion(t *testing.T) {
+ p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
+ cpuHogger(recursionCaller, &salt1, dur)
+ })
+
+ // check the Location encoding was not confused by recursive calls.
+ for i, loc := range p.Location {
+ recursionFunc := 0
+ for _, line := range loc.Line {
+ if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
+ recursionFunc++
+ }
+ }
+ if recursionFunc > 1 {
+ t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
+ }
}
- if i++; i < 1e5 {
- goto loop
+}
+
+func recursionCaller(x int) int {
+ y := recursionCallee(3, x)
+ return y
+}
+
+func recursionCallee(n, x int) int {
+ if n == 0 {
+ return 1
}
- return foo
+ y := inlinedCallee(x, 1e4)
+ return y * recursionCallee(n-1, x)
}
-func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
+func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil {
t.Fatal(err)
@@ -137,11 +217,12 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca
count := uintptr(sample.Value[0])
f(count, sample.Location, sample.Label)
}
+ return p
}
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
-// as interpreted by matches.
-func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) {
+// as interpreted by matches, and returns the parsed profile.
+func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
switch runtime.GOOS {
case "darwin":
switch runtime.GOARCH {
@@ -195,8 +276,8 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
f(duration)
StopCPUProfile()
- if profileOk(t, matches, need, avoid, prof, duration) {
- return
+ if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
+ return p
}
duration *= 2
@@ -217,6 +298,7 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
}
t.FailNow()
+ return nil
}
func contains(slice []string, s string) bool {
@@ -242,7 +324,7 @@ func stackContains(spec string, count uintptr, stk []*profile.Location, labels m
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
-func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) {
+func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
ok = true
// Check that profile is well formed, contains 'need', and does not contain
@@ -251,7 +333,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
avoidSamples := make([]uintptr, len(avoid))
var samples uintptr
var buf bytes.Buffer
- parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
+ p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk)
samples += count
@@ -287,7 +369,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
// not enough samples due to coarse timer
// resolution. Let it go.
t.Log("too few samples on Windows (golang.org/issue/10842)")
- return false
+ return p, false
}
// Check that we got a reasonable number of samples.
@@ -309,7 +391,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
}
if len(need) == 0 {
- return ok
+ return p, ok
}
var total uintptr
@@ -332,7 +414,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
ok = false
}
}
- return ok
+ return p, ok
}
// Fork can hang if preempted with signals frequently enough (see issue 5517).
@@ -1076,3 +1158,111 @@ func TestTracebackAll(t *testing.T) {
runtime.Stack(buf, true)
}
}
+
+// TestTryAdd tests the cases that's hard to test with real program execution.
+// For example, the current go compilers may not inline functions involved in recursion
+// but that may not be true in the future compilers. This tests such cases by
+// using fake call sequences and forcing the profile build utilizing
+// translateCPUProfile defined in proto_test.go
+func TestTryAdd(t *testing.T) {
+ inlinedCallerPtr := uint64(funcPC(inlinedCaller)) + 1
+ inlinedCalleePtr, found := findInlinedCall(inlinedCaller, 4<<10)
+ if !found {
+ t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
+ }
+ inlinedCalleePtr += 1 // +1 to be safely inside of the function body.
+
+ period := int64(2000 * 1000) // 1/500*1e9 nanosec.
+
+ testCases := []struct {
+ name string
+ input []uint64 // following the input format assumed by profileBuilder.addCPUData.
+ wantLocs [][]string // ordered location entries with function names.
+ wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs.
+ }{{
+ name: "bug35538",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 7, 0, 10, inlinedCalleePtr, inlinedCallerPtr, inlinedCalleePtr, inlinedCallerPtr,
+ 5, 0, 20, inlinedCalleePtr, inlinedCallerPtr,
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
+ {Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ name: "recursive_inlined_funcs",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 5, 0, 30, inlinedCalleePtr, inlinedCalleePtr,
+ 4, 0, 40, inlinedCalleePtr,
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
+ {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ name: "truncated_stack_trace_later",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 5, 0, 50, inlinedCalleePtr, inlinedCallerPtr,
+ 4, 0, 60, inlinedCalleePtr,
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ name: "truncated_stack_trace_first",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCalleePtr,
+ 5, 0, 80, inlinedCalleePtr, inlinedCallerPtr,
+ },
+ wantLocs: [][]string{ // the inline info is screwed up, but better than a crash.
+ {"runtime/pprof.inlinedCallee"},
+ {"runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
+ },
+ }}
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ p, err := translateCPUProfile(tc.input)
+ if err != nil {
+ t.Fatalf("translating profile: %v", err)
+ }
+ t.Logf("Profile: %v\n", p)
+
+ // One location entry with all inlined functions.
+ var gotLoc [][]string
+ for _, loc := range p.Location {
+ var names []string
+ for _, line := range loc.Line {
+ names = append(names, line.Function.Name)
+ }
+ gotLoc = append(gotLoc, names)
+ }
+ if got, want := fmtJSON(gotLoc), fmtJSON(tc.wantLocs); got != want {
+ t.Errorf("Got Location = %+v\n\twant %+v", got, want)
+ }
+ // All samples should point to one location.
+ var gotSamples []*profile.Sample
+ for _, sample := range p.Sample {
+ var locs []*profile.Location
+ for _, loc := range sample.Location {
+ locs = append(locs, &profile.Location{ID: loc.ID})
+ }
+ gotSamples = append(gotSamples, &profile.Sample{Value: sample.Value, Location: locs})
+ }
+ if got, want := fmtJSON(gotSamples), fmtJSON(tc.wantSamples); got != want {
+ t.Errorf("Got Samples = %+v\n\twant %+v", got, want)
+ }
+ })
+ }
+}
diff --git a/libgo/go/runtime/pprof/proto.go b/libgo/go/runtime/pprof/proto.go
index ef3eeb1..ba5db85 100644
--- a/libgo/go/runtime/pprof/proto.go
+++ b/libgo/go/runtime/pprof/proto.go
@@ -54,9 +54,10 @@ type profileBuilder struct {
pb protobuf
strings []string
stringMap map[string]int
- locs map[uintptr]int
- funcs map[string]int // Package path-qualified function name to Function.ID
+ locs map[uintptr]locInfo // list of locInfo starting with the given PC.
+ funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
+ deck pcDeck
}
type memMap struct {
@@ -220,15 +221,7 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
b.pb.endMessage(tag, start)
}
-// locForPC returns the location ID for addr.
-// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call.
-// It may emit to b.pb, so there must be no message encoding in progress.
-func (b *profileBuilder) locForPC(addr uintptr) uint64 {
- id := uint64(b.locs[addr])
- if id != 0 {
- return id
- }
-
+func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
// Expand this one address using CallersFrames so we can cache
// each expansion. In general, CallersFrames takes a whole
// stack, but in this case we know there will be no skips in
@@ -238,7 +231,7 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
if frame.Function == "runtime.goexit" || frame.Function == "runtime.kickoff" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
- return 0
+ return nil, 0
}
symbolizeResult := lookupTried
@@ -251,59 +244,22 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
// a reasonable call PC. This mostly happens in tests.
frame.PC = addr - 1
}
-
- // We can't write out functions while in the middle of the
- // Location message, so record new functions we encounter and
- // write them out after the Location.
- type newFunc struct {
- id uint64
- name, file string
- }
- newFuncs := make([]newFunc, 0, 8)
-
- id = uint64(len(b.locs)) + 1
- b.locs[addr] = int(id)
- start := b.pb.startMessage()
- b.pb.uint64Opt(tagLocation_ID, id)
- b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
- for frame.Function != "runtime.goexit" && frame.Function != "runtime.kickoff" {
- // Write out each line in frame expansion.
- funcID := uint64(b.funcs[frame.Function])
- if funcID == 0 {
- funcID = uint64(len(b.funcs)) + 1
- b.funcs[frame.Function] = int(funcID)
- newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
- }
- b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
- if !more {
- break
- }
+ ret := []runtime.Frame{frame}
+ for frame.Function != "runtime.goexit" && frame.Function != "runtime.kickoff" && more == true {
frame, more = frames.Next()
+ ret = append(ret, frame)
}
- for i := range b.mem {
- if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
- b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
-
- m := b.mem[i]
- m.funcs |= symbolizeResult
- b.mem[i] = m
- break
- }
- }
- b.pb.endMessage(tagProfile_Location, start)
+ return ret, symbolizeResult
+}
- // Write out functions we found during frame expansion.
- for _, fn := range newFuncs {
- start := b.pb.startMessage()
- b.pb.uint64Opt(tagFunction_ID, fn.id)
- b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
- b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
- b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
- b.pb.endMessage(tagProfile_Function, start)
- }
+type locInfo struct {
+ // location id assigned by the profileBuilder
+ id uint64
- b.flush()
- return id
+ // sequence of PCs, including the fake PCs returned by the traceback
+ // to represent inlined functions
+ // https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
+ pcs []uintptr
}
// newProfileBuilder returns a new profileBuilder.
@@ -318,7 +274,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
start: time.Now(),
strings: []string{""},
stringMap: map[string]int{"": 0},
- locs: map[uintptr]int{},
+ locs: map[uintptr]locInfo{},
funcs: map[string]int{},
}
b.readMapping()
@@ -402,6 +358,7 @@ func (b *profileBuilder) build() {
values := []int64{0, 0}
var locs []uint64
+
for e := b.m.all; e != nil; e = e.nextAll {
values[0] = e.count
values[1] = e.count * b.period
@@ -415,23 +372,8 @@ func (b *profileBuilder) build() {
}
}
- locs = locs[:0]
- for i, addr := range e.stk {
- // Addresses from stack traces point to the
- // next instruction after each call, except
- // for the leaf, which points to where the
- // signal occurred. locForPC expects return
- // PCs, so increment the leaf address to look
- // like a return PC.
- if i == 0 {
- addr++
- }
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
- }
+ locs = b.appendLocsForStack(locs[:0], e.stk)
+
b.pbSample(values, locs, labels)
}
@@ -448,6 +390,203 @@ func (b *profileBuilder) build() {
b.zw.Close()
}
+// appendLocsForStack appends the location IDs for the given stack trace to the given
+// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
+// an inline marker as the runtime traceback function returns.
+//
+// It may emit to b.pb, so there must be no message encoding in progress.
+func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
+ b.deck.reset()
+ for len(stk) > 0 {
+ addr := stk[0]
+ if l, ok := b.locs[addr]; ok {
+ // first record the location if there is any pending accumulated info.
+ if id := b.emitLocation(); id > 0 {
+ locs = append(locs, id)
+ }
+
+ // then, record the cached location.
+ locs = append(locs, l.id)
+
+ // The stk may be truncated due to the stack depth limit
+ // (e.g. See maxStack and maxCPUProfStack in runtime) or
+ // bugs in runtime. Avoid the crash in either case.
+ // TODO(hyangah): The correct fix may require using the exact
+ // pcs as the key for b.locs cache management instead of just
+ // relying on the very first pc. We are late in the go1.14 dev
+ // cycle, so this is a workaround with little code change.
+ if len(l.pcs) > len(stk) {
+ stk = nil
+ // TODO(hyangah): would be nice if we can enable
+ // debug print out on demand and report the problematic
+ // cached location entry and stack traces. Do we already
+ // have such facility to utilize (e.g. GODEBUG)?
+ } else {
+ stk = stk[len(l.pcs):] // skip the matching pcs.
+ }
+ continue
+ }
+
+ frames, symbolizeResult := allFrames(addr)
+ if len(frames) == 0 { // runtime.goexit.
+ if id := b.emitLocation(); id > 0 {
+ locs = append(locs, id)
+ }
+ stk = stk[1:]
+ continue
+ }
+
+ if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
+ stk = stk[1:]
+ continue
+ }
+ // add failed because this addr is not inlined with
+ // the existing PCs in the deck. Flush the deck and retry to
+ // handle this pc.
+ if id := b.emitLocation(); id > 0 {
+ locs = append(locs, id)
+ }
+
+ // check cache again - previous emitLocation added a new entry
+ if l, ok := b.locs[addr]; ok {
+ locs = append(locs, l.id)
+ stk = stk[len(l.pcs):] // skip the matching pcs.
+ } else {
+ b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
+ stk = stk[1:]
+ }
+ }
+ if id := b.emitLocation(); id > 0 { // emit remaining location.
+ locs = append(locs, id)
+ }
+ return locs
+}
+
+// pcDeck is a helper to detect a sequence of inlined functions from
+// a stack trace returned by the runtime.
+//
+// The stack traces returned by runtime's trackback functions are fully
+// expanded (at least for Go functions) and include the fake pcs representing
+// inlined functions. The profile proto expects the inlined functions to be
+// encoded in one Location message.
+// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
+//
+// Runtime does not directly expose whether a frame is for an inlined function
+// and looking up debug info is not ideal, so we use a heuristic to filter
+// the fake pcs and restore the inlined and entry functions. Inlined functions
+// have the following properties:
+// Frame's Func is nil (note: also true for non-Go functions), and
+// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
+// Frame's Name does not match its entry function frame's name.
+//
+// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
+// we use pcDeck to temporarily hold the observed pcs and their expanded frames
+// until we observe the entry function frame.
+type pcDeck struct {
+ pcs []uintptr
+ frames []runtime.Frame
+ symbolizeResult symbolizeFlag
+}
+
+func (d *pcDeck) reset() {
+ d.pcs = d.pcs[:0]
+ d.frames = d.frames[:0]
+ d.symbolizeResult = 0
+}
+
+// tryAdd tries to add the pc and Frames expanded from it (most likely one,
+// since the stack trace is already fully expanded) and the symbolizeResult
+// to the deck. If it fails the caller needs to flush the deck and retry.
+func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
+ if existing := len(d.pcs); existing > 0 {
+ // 'frames' are all expanded from one 'pc' and represent all inlined functions
+ // so we check only the last one.
+ newFrame := frames[0]
+ last := d.frames[existing-1]
+ if last.Func != nil { // the last frame can't be inlined. Flush.
+ return false
+ }
+ if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
+ return false
+ }
+
+ if last.Entry != newFrame.Entry { // newFrame is for a different function.
+ return false
+ }
+ if last.Function == newFrame.Function { // maybe recursion.
+ return false
+ }
+ }
+ d.pcs = append(d.pcs, pc)
+ d.frames = append(d.frames, frames...)
+ d.symbolizeResult |= symbolizeResult
+ return true
+}
+
+// emitLocation emits the new location and function information recorded in the deck
+// and returns the location ID encoded in the profile protobuf.
+// It emits to b.pb, so there must be no message encoding in progress.
+// It resets the deck.
+func (b *profileBuilder) emitLocation() uint64 {
+ if len(b.deck.pcs) == 0 {
+ return 0
+ }
+ defer b.deck.reset()
+
+ addr := b.deck.pcs[0]
+ firstFrame := b.deck.frames[0]
+
+ // We can't write out functions while in the middle of the
+ // Location message, so record new functions we encounter and
+ // write them out after the Location.
+ type newFunc struct {
+ id uint64
+ name, file string
+ }
+ newFuncs := make([]newFunc, 0, 8)
+
+ id := uint64(len(b.locs)) + 1
+ b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
+
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagLocation_ID, id)
+ b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
+ for _, frame := range b.deck.frames {
+ // Write out each line in frame expansion.
+ funcID := uint64(b.funcs[frame.Function])
+ if funcID == 0 {
+ funcID = uint64(len(b.funcs)) + 1
+ b.funcs[frame.Function] = int(funcID)
+ newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+ }
+ b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
+ }
+ for i := range b.mem {
+ if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
+ b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
+
+ m := b.mem[i]
+ m.funcs |= b.deck.symbolizeResult
+ b.mem[i] = m
+ break
+ }
+ }
+ b.pb.endMessage(tagProfile_Location, start)
+
+ // Write out functions we found during frame expansion.
+ for _, fn := range newFuncs {
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagFunction_ID, fn.id)
+ b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+ b.pb.endMessage(tagProfile_Function, start)
+ }
+
+ b.flush()
+ return id
+}
+
// readMapping reads /proc/self/maps and writes mappings to b.pb.
// It saves the address ranges of the mappings in b.mem for use
// when emitting locations.
diff --git a/libgo/go/runtime/pprof/proto_test.go b/libgo/go/runtime/pprof/proto_test.go
index 7e7c7cc..67d6b5d 100644
--- a/libgo/go/runtime/pprof/proto_test.go
+++ b/libgo/go/runtime/pprof/proto_test.go
@@ -118,9 +118,9 @@ func TestConvertCPUProfile(t *testing.T) {
b := []uint64{
3, 0, 500, // hz = 500
- 5, 0, 10, uint64(addr1), uint64(addr1 + 2), // 10 samples in addr1
- 5, 0, 40, uint64(addr2), uint64(addr2 + 2), // 40 samples in addr2
- 5, 0, 10, uint64(addr1), uint64(addr1 + 2), // 10 samples in addr1
+ 5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
+ 5, 0, 40, uint64(addr2 + 1), uint64(addr2 + 2), // 40 samples in addr2
+ 5, 0, 10, uint64(addr1 + 1), uint64(addr1 + 2), // 10 samples in addr1
}
p, err := translateCPUProfile(b)
if err != nil {
@@ -360,6 +360,17 @@ func TestMapping(t *testing.T) {
continue
}
}
+
+ if traceback == "Go+C" {
+ // The test code was arranged to have PCs from C and
+ // they are not symbolized.
+ // Check no Location containing those unsymbolized PCs contains multiple lines.
+ for i, loc := range prof.Location {
+ if !symbolized(loc) && len(loc.Line) > 1 {
+ t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc)
+ }
+ }
+ }
})
}
}
diff --git a/libgo/go/runtime/pprof/protomem.go b/libgo/go/runtime/pprof/protomem.go
index 1c88aae..fa75a28 100644
--- a/libgo/go/runtime/pprof/protomem.go
+++ b/libgo/go/runtime/pprof/protomem.go
@@ -27,30 +27,27 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau
values := []int64{0, 0, 0, 0}
var locs []uint64
for _, r := range p {
- locs = locs[:0]
hideRuntime := true
for tries := 0; tries < 2; tries++ {
- for _, addr := range r.Stack() {
- // For heap profiles, all stack
- // addresses are return PCs, which is
- // what locForPC expects.
- if hideRuntime {
+ stk := r.Stack()
+ // For heap profiles, all stack
+ // addresses are return PCs, which is
+ // what appendLocsForStack expects.
+ if hideRuntime {
+ for i, addr := range stk {
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
continue
}
// Found non-runtime. Show any runtime uses above it.
- hideRuntime = false
+ stk = stk[i:]
+ break
}
- l := b.locForPC(addr)
- if l == 0 { // runtime.goexit
- continue
- }
- locs = append(locs, l)
}
+ locs = b.appendLocsForStack(locs[:0], stk)
if len(locs) > 0 {
break
}
- hideRuntime = false // try again, and show all frames
+ hideRuntime = false // try again, and show all frames next time.
}
values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
diff --git a/libgo/go/runtime/pprof/testdata/README b/libgo/go/runtime/pprof/testdata/README
new file mode 100644
index 0000000..876538e
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/README
@@ -0,0 +1,9 @@
+These binaries were generated by:
+
+$ cat empty.s
+.global _start
+_start:
+$ as --32 -o empty.o empty.s && ld --build-id -m elf_i386 -o test32 empty.o
+$ as --64 -o empty.o empty.s && ld --build-id -o test64 empty.o
+$ powerpc-linux-gnu-as -o empty.o empty.s && powerpc-linux-gnu-ld --build-id -o test32be empty.o
+$ powerpc64-linux-gnu-as -o empty.o empty.s && powerpc64-linux-gnu-ld --build-id -o test64be empty.o
diff --git a/libgo/go/runtime/pprof/testdata/mappingtest/main.go b/libgo/go/runtime/pprof/testdata/mappingtest/main.go
index 476b9e8..484b7f9 100644
--- a/libgo/go/runtime/pprof/testdata/mappingtest/main.go
+++ b/libgo/go/runtime/pprof/testdata/mappingtest/main.go
@@ -17,8 +17,7 @@ package main
int cpuHogCSalt1 = 0;
int cpuHogCSalt2 = 0;
-void CPUHogCFunction() {
- int foo = cpuHogCSalt1;
+void CPUHogCFunction0(int foo) {
int i;
for (i = 0; i < 100000; i++) {
if (foo > 0) {
@@ -30,6 +29,10 @@ void CPUHogCFunction() {
}
}
+void CPUHogCFunction() {
+ CPUHogCFunction0(cpuHogCSalt1);
+}
+
struct CgoTracebackArg {
uintptr_t context;
uintptr_t sigContext;
@@ -39,8 +42,9 @@ struct CgoTracebackArg {
void CollectCgoTraceback(void* parg) {
struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
- arg->buf[0] = (uintptr_t)(CPUHogCFunction);
- arg->buf[1] = 0;
+ arg->buf[0] = (uintptr_t)(CPUHogCFunction0);
+ arg->buf[1] = (uintptr_t)(CPUHogCFunction);
+ arg->buf[2] = 0;
};
*/
import "C"
@@ -81,7 +85,6 @@ var salt1 int
var salt2 int
func cpuHogGoFunction() {
- // Generates CPU profile samples including a Go call path.
for {
foo := salt1
for i := 0; i < 1e5; i++ {
diff --git a/libgo/go/runtime/pprof/testdata/test32 b/libgo/go/runtime/pprof/testdata/test32
new file mode 100755
index 0000000..ce59472
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/test32
Binary files differ
diff --git a/libgo/go/runtime/pprof/testdata/test32be b/libgo/go/runtime/pprof/testdata/test32be
new file mode 100755
index 0000000..f13a732
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/test32be
Binary files differ
diff --git a/libgo/go/runtime/pprof/testdata/test64 b/libgo/go/runtime/pprof/testdata/test64
new file mode 100755
index 0000000..3fb42fb
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/test64
Binary files differ
diff --git a/libgo/go/runtime/pprof/testdata/test64be b/libgo/go/runtime/pprof/testdata/test64be
new file mode 100755
index 0000000..09b4b01
--- /dev/null
+++ b/libgo/go/runtime/pprof/testdata/test64be
Binary files differ
diff --git a/libgo/go/runtime/preempt.go b/libgo/go/runtime/preempt.go
new file mode 100644
index 0000000..1a8f9ac
--- /dev/null
+++ b/libgo/go/runtime/preempt.go
@@ -0,0 +1,370 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Goroutine preemption
+//
+// A goroutine can be preempted at any safe-point. Currently, there
+// are a few categories of safe-points:
+//
+// 1. A blocked safe-point occurs for the duration that a goroutine is
+// descheduled, blocked on synchronization, or in a system call.
+//
+// 2. Synchronous safe-points occur when a running goroutine checks
+// for a preemption request.
+//
+// 3. Asynchronous safe-points occur at any instruction in user code
+// where the goroutine can be safely paused and a conservative
+// stack and register scan can find stack roots. The runtime can
+// stop a goroutine at an async safe-point using a signal.
+//
+// At both blocked and synchronous safe-points, a goroutine's CPU
+// state is minimal and the garbage collector has complete information
+// about its entire stack. This makes it possible to deschedule a
+// goroutine with minimal space, and to precisely scan a goroutine's
+// stack.
+//
+// Synchronous safe-points are implemented by overloading the stack
+// bound check in function prologues. To preempt a goroutine at the
+// next synchronous safe-point, the runtime poisons the goroutine's
+// stack bound to a value that will cause the next stack bound check
+// to fail and enter the stack growth implementation, which will
+// detect that it was actually a preemption and redirect to preemption
+// handling.
+//
+// Preemption at asynchronous safe-points is implemented by suspending
+// the thread using an OS mechanism (e.g., signals) and inspecting its
+// state to determine if the goroutine was at an asynchronous
+// safe-point. Since the thread suspension itself is generally
+// asynchronous, it also checks if the running goroutine wants to be
+// preempted, since this could have changed. If all conditions are
+// satisfied, it adjusts the signal context to make it look like the
+// signaled thread just called asyncPreempt and resumes the thread.
+// asyncPreempt spills all registers and enters the scheduler.
+//
+// (An alternative would be to preempt in the signal handler itself.
+// This would let the OS save and restore the register state and the
+// runtime would only need to know how to extract potentially
+// pointer-containing registers from the signal context. However, this
+// would consume an M for every preempted G, and the scheduler itself
+// is not designed to run from a signal handler, as it tends to
+// allocate memory and start threads in the preemption path.)
+
+package runtime
+
+import (
+ "runtime/internal/atomic"
+)
+
+type suspendGState struct {
+ g *g
+
+ // dead indicates the goroutine was not suspended because it
+ // is dead. This goroutine could be reused after the dead
+ // state was observed, so the caller must not assume that it
+ // remains dead.
+ dead bool
+
+ // stopped indicates that this suspendG transitioned the G to
+ // _Gwaiting via g.preemptStop and thus is responsible for
+ // readying it when done.
+ stopped bool
+}
+
+// suspendG suspends goroutine gp at a safe-point and returns the
+// state of the suspended goroutine. The caller gets read access to
+// the goroutine until it calls resumeG.
+//
+// It is safe for multiple callers to attempt to suspend the same
+// goroutine at the same time. The goroutine may execute between
+// subsequent successful suspend operations. The current
+// implementation grants exclusive access to the goroutine, and hence
+// multiple callers will serialize. However, the intent is to grant
+// shared read access, so please don't depend on exclusive access.
+//
+// This must be called from the system stack and the user goroutine on
+// the current M (if any) must be in a preemptible state. This
+// prevents deadlocks where two goroutines attempt to suspend each
+// other and both are in non-preemptible states. There are other ways
+// to resolve this deadlock, but this seems simplest.
+//
+// TODO(austin): What if we instead required this to be called from a
+// user goroutine? Then we could deschedule the goroutine while
+// waiting instead of blocking the thread. If two goroutines tried to
+// suspend each other, one of them would win and the other wouldn't
+// complete the suspend until it was resumed. We would have to be
+// careful that they couldn't actually queue up suspend for each other
+// and then both be suspended. This would also avoid the need for a
+// kernel context switch in the synchronous case because we could just
+// directly schedule the waiter. The context switch is unavoidable in
+// the signal case.
+//
+//go:systemstack
+func suspendG(gp *g) suspendGState {
+ if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
+ // Since we're on the system stack of this M, the user
+ // G is stuck at an unsafe point. If another goroutine
+ // were to try to preempt m.curg, it could deadlock.
+ throw("suspendG from non-preemptible goroutine")
+ }
+
+ // See https://golang.org/cl/21503 for justification of the yield delay.
+ const yieldDelay = 10 * 1000
+ var nextYield int64
+
+ // Drive the goroutine to a preemption point.
+ stopped := false
+ var asyncM *m
+ var asyncGen uint32
+ var nextPreemptM int64
+ for i := 0; ; i++ {
+ switch s := readgstatus(gp); s {
+ default:
+ if s&_Gscan != 0 {
+ // Someone else is suspending it. Wait
+ // for them to finish.
+ //
+ // TODO: It would be nicer if we could
+ // coalesce suspends.
+ break
+ }
+
+ dumpgstatus(gp)
+ throw("invalid g status")
+
+ case _Gdead:
+ // Nothing to suspend.
+ //
+ // preemptStop may need to be cleared, but
+ // doing that here could race with goroutine
+ // reuse. Instead, goexit0 clears it.
+ return suspendGState{dead: true}
+
+ case _Gcopystack:
+ // The stack is being copied. We need to wait
+ // until this is done.
+
+ case _Gexitingsyscall:
+ // This is a transient state. Try again.
+
+ case _Gpreempted:
+ // We (or someone else) suspended the G. Claim
+ // ownership of it by transitioning it to
+ // _Gwaiting.
+ if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
+ break
+ }
+
+ // We stopped the G, so we have to ready it later.
+ stopped = true
+
+ s = _Gwaiting
+ fallthrough
+
+ case _Grunnable, _Gsyscall, _Gwaiting:
+ // Claim goroutine by setting scan bit.
+ // This may race with execution or readying of gp.
+ // The scan bit keeps it from transition state.
+ if !castogscanstatus(gp, s, s|_Gscan) {
+ break
+ }
+
+ // Clear the preemption request. It's safe to
+ // reset the stack guard because we hold the
+ // _Gscan bit and thus own the stack.
+ gp.preemptStop = false
+ gp.preempt = false
+
+ // The goroutine was already at a safe-point
+ // and we've now locked that in.
+ //
+ // TODO: It would be much better if we didn't
+ // leave it in _Gscan, but instead gently
+ // prevented its scheduling until resumption.
+ // Maybe we only use this to bump a suspended
+ // count and the scheduler skips suspended
+ // goroutines? That wouldn't be enough for
+ // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
+ // for all those transitions we need to check
+ // suspended and deschedule?
+ return suspendGState{g: gp, stopped: stopped}
+
+ case _Grunning:
+ // Optimization: if there is already a pending preemption request
+ // (from the previous loop iteration), don't bother with the atomics.
+ if asyncM != nil && gp.preemptStop && gp.preempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen {
+ break
+ }
+
+ // Temporarily block state transitions.
+ if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
+ break
+ }
+
+ // Request synchronous preemption.
+ gp.preemptStop = true
+ gp.preempt = true
+
+ // Prepare for asynchronous preemption.
+ asyncM2 := gp.m
+ asyncGen2 := atomic.Load(&asyncM2.preemptGen)
+ needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
+ asyncM = asyncM2
+ asyncGen = asyncGen2
+
+ casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+
+ // Send asynchronous preemption. We do this
+ // after CASing the G back to _Grunning
+ // because preemptM may be synchronous and we
+ // don't want to catch the G just spinning on
+ // its status.
+ if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
+ // Rate limit preemptM calls. This is
+ // particularly important on Windows
+ // where preemptM is actually
+ // synchronous and the spin loop here
+ // can lead to live-lock.
+ now := nanotime()
+ if now >= nextPreemptM {
+ nextPreemptM = now + yieldDelay/2
+ preemptM(asyncM)
+ }
+ }
+ }
+
+ // TODO: Don't busy wait. This loop should really only
+ // be a simple read/decide/CAS loop that only fails if
+ // there's an active race. Once the CAS succeeds, we
+ // should queue up the preemption (which will require
+ // it to be reliable in the _Grunning case, not
+ // best-effort) and then sleep until we're notified
+ // that the goroutine is suspended.
+ if i == 0 {
+ nextYield = nanotime() + yieldDelay
+ }
+ if nanotime() < nextYield {
+ procyield(10)
+ } else {
+ osyield()
+ nextYield = nanotime() + yieldDelay/2
+ }
+ }
+}
+
+// resumeG undoes the effects of suspendG, allowing the suspended
+// goroutine to continue from its current safe-point.
+func resumeG(state suspendGState) {
+ if state.dead {
+ // We didn't actually stop anything.
+ return
+ }
+
+ gp := state.g
+ switch s := readgstatus(gp); s {
+ default:
+ dumpgstatus(gp)
+ throw("unexpected g status")
+
+ case _Grunnable | _Gscan,
+ _Gwaiting | _Gscan,
+ _Gsyscall | _Gscan:
+ casfrom_Gscanstatus(gp, s, s&^_Gscan)
+ }
+
+ if state.stopped {
+ // We stopped it, so we need to re-schedule it.
+ ready(gp, 0, true)
+ }
+}
+
+// canPreemptM reports whether mp is in a state that is safe to preempt.
+//
+// It is nosplit because it has nosplit callers.
+//
+//go:nosplit
+func canPreemptM(mp *m) bool {
+ return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
+}
+
+//go:generate go run mkpreempt.go
+
+// asyncPreempt saves all user registers and calls asyncPreempt2.
+//
+// When stack scanning encounters an asyncPreempt frame, it scans that
+// frame and its parent frame conservatively.
+//
+// asyncPreempt is implemented in assembly.
+func asyncPreempt()
+
+//go:nosplit
+func asyncPreempt2() {
+ gp := getg()
+ gp.asyncSafePoint = true
+ if gp.preemptStop {
+ mcall(preemptPark)
+ } else {
+ mcall(gopreempt_m)
+ }
+ gp.asyncSafePoint = false
+}
+
+// wantAsyncPreempt returns whether an asynchronous preemption is
+// queued for gp.
+func wantAsyncPreempt(gp *g) bool {
+ // Check both the G and the P.
+ return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
+}
+
+// isAsyncSafePoint reports whether gp at instruction PC is an
+// asynchronous safe point. This indicates that:
+//
+// 1. It's safe to suspend gp and conservatively scan its stack and
+// registers. There are no potentially hidden pointer values and it's
+// not in the middle of an atomic sequence like a write barrier.
+//
+// 2. gp has enough stack space to inject the asyncPreempt call.
+//
+// 3. It's generally safe to interact with the runtime, even if we're
+// in a signal handler stopped here. For example, there are no runtime
+// locks held, so acquiring a runtime lock won't self-deadlock.
+func isAsyncSafePoint(gp *g, pc uintptr) bool {
+ mp := gp.m
+
+ // Only user Gs can have safe-points. We check this first
+ // because it's extremely common that we'll catch mp in the
+ // scheduler processing this G preemption.
+ if mp.curg != gp {
+ return false
+ }
+
+ // Check M state.
+ if mp.p == 0 || !canPreemptM(mp) {
+ return false
+ }
+
+ // Check if PC is an unsafe-point.
+ f := FuncForPC(pc)
+ if f == nil {
+ // Not Go code.
+ return false
+ }
+ name := f.Name()
+ if hasPrefix(name, "runtime.") ||
+ hasPrefix(name, "runtime..z2finternal..z2f") ||
+ hasPrefix(name, "reflect.") {
+ // For now we never async preempt the runtime or
+ // anything closely tied to the runtime. Known issues
+ // include: various points in the scheduler ("don't
+ // preempt between here and here"), much of the defer
+ // implementation (untyped info on stack), bulk write
+ // barriers (write barrier check),
+ // reflect.{makeFuncStub,methodValueCall}.
+ //
+ // TODO(austin): We should improve this, or opt things
+ // in incrementally.
+ return false
+ }
+
+ return true
+}
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index a025137..c0e8577 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -548,6 +548,7 @@ func schedinit() {
usestackmaps = probestackmaps()
mallocinit()
+ fastrandinit() // must run before mcommoninit
mcommoninit(_g_.m)
cpuinit() // must run before alginit
alginit() // maps must not be used before this call
@@ -622,8 +623,8 @@ func mcommoninit(mp *m) {
sched.mnext++
checkmcount()
- mp.fastrand[0] = 1597334677 * uint32(mp.id)
- mp.fastrand[1] = uint32(cputicks())
+ mp.fastrand[0] = uint32(int64Hash(uint64(mp.id), fastrandseed))
+ mp.fastrand[1] = uint32(int64Hash(uint64(cputicks()), ^fastrandseed))
if mp.fastrand[0]|mp.fastrand[1] == 0 {
mp.fastrand[1] = 1
}
@@ -640,6 +641,13 @@ func mcommoninit(mp *m) {
unlock(&sched.lock)
}
+var fastrandseed uintptr
+
+func fastrandinit() {
+ s := (*[unsafe.Sizeof(fastrandseed)]byte)(unsafe.Pointer(&fastrandseed))[:]
+ getRandomData(s)
+}
+
// Mark gp ready to run.
func ready(gp *g, traceskip int, next bool) {
if trace.enabled {
@@ -704,18 +712,6 @@ func readgstatus(gp *g) uint32 {
return atomic.Load(&gp.atomicstatus)
}
-// Ownership of gcscanvalid:
-//
-// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
-// then gp owns gp.gcscanvalid, and other goroutines must not modify it.
-//
-// Otherwise, a second goroutine can lock the scan state by setting _Gscan
-// in the status bit and then modify gcscanvalid, and then unlock the scan state.
-//
-// Note that the first condition implies an exception to the second:
-// if a second goroutine changes gp's status to _Grunning|_Gscan,
-// that second goroutine still does not have the right to modify gcscanvalid.
-
// The Gscanstatuses are acting like locks and this releases them.
// If it proves to be a performance hit we should be able to make these
// simple atomic stores but for now we are going to throw if
@@ -732,7 +728,8 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
case _Gscanrunnable,
_Gscanwaiting,
_Gscanrunning,
- _Gscansyscall:
+ _Gscansyscall,
+ _Gscanpreempted:
if newval == oldval&^_Gscan {
success = atomic.Cas(&gp.atomicstatus, oldval, newval)
}
@@ -774,17 +771,6 @@ func casgstatus(gp *g, oldval, newval uint32) {
})
}
- if oldval == _Grunning && gp.gcscanvalid {
- // If oldvall == _Grunning, then the actual status must be
- // _Grunning or _Grunning|_Gscan; either way,
- // we own gp.gcscanvalid, so it's safe to read.
- // gp.gcscanvalid must not be true when we are running.
- systemstack(func() {
- print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
- throw("casgstatus")
- })
- }
-
// See https://golang.org/cl/21503 for justification of the yield delay.
const yieldDelay = 5 * 1000
var nextYield int64
@@ -795,14 +781,6 @@ func casgstatus(gp *g, oldval, newval uint32) {
if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
throw("casgstatus: waiting for Gwaiting but is Grunnable")
}
- // Help GC if needed.
- // if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
- // gp.preemptscan = false
- // systemstack(func() {
- // gcphasework(gp)
- // })
- // }
- // But meanwhile just yield.
if i == 0 {
nextYield = nanotime() + yieldDelay
}
@@ -815,174 +793,28 @@ func casgstatus(gp *g, oldval, newval uint32) {
nextYield = nanotime() + yieldDelay/2
}
}
- if newval == _Grunning {
- gp.gcscanvalid = false
- }
}
-// scang blocks until gp's stack has been scanned.
-// It might be scanned by scang or it might be scanned by the goroutine itself.
-// Either way, the stack scan has completed when scang returns.
-func scang(gp *g, gcw *gcWork) {
- // Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
- // Nothing is racing with us now, but gcscandone might be set to true left over
- // from an earlier round of stack scanning (we scan twice per GC).
- // We use gcscandone to record whether the scan has been done during this round.
-
- gp.gcscandone = false
-
- // See https://golang.org/cl/21503 for justification of the yield delay.
- const yieldDelay = 10 * 1000
- var nextYield int64
-
- // Endeavor to get gcscandone set to true,
- // either by doing the stack scan ourselves or by coercing gp to scan itself.
- // gp.gcscandone can transition from false to true when we're not looking
- // (if we asked for preemption), so any time we lock the status using
- // castogscanstatus we have to double-check that the scan is still not done.
-loop:
- for i := 0; !gp.gcscandone; i++ {
- switch s := readgstatus(gp); s {
- default:
- dumpgstatus(gp)
- throw("stopg: invalid status")
-
- case _Gdead:
- // No stack.
- gp.gcscandone = true
- break loop
-
- case _Gcopystack:
- // Stack being switched. Go around again.
-
- case _Gsyscall:
- if usestackmaps {
- // Claim goroutine by setting scan bit.
- // Racing with execution or readying of gp.
- // The scan bit keeps them from running
- // the goroutine until we're done.
- if castogscanstatus(gp, s, s|_Gscan) {
- if gp.scanningself {
- // Don't try to scan the stack
- // if the goroutine is going to do
- // it itself.
- // FIXME: can this happen?
- restartg(gp)
- break
- }
- if !gp.gcscandone {
- // Send a signal to let the goroutine scan
- // itself. This races with enter/exitsyscall.
- // If the goroutine is not stopped at a safepoint,
- // it will not scan the stack and we'll try again.
- mp := gp.m
- noteclear(&mp.scannote)
- gp.scangcw = uintptr(unsafe.Pointer(gcw))
- tgkill(getpid(), _pid_t(mp.procid), _SIGURG)
-
- // Wait for gp to scan its own stack.
- notesleep(&mp.scannote)
-
- if !gp.gcscandone {
- // The signal delivered at a bad time.
- // Try again.
- restartg(gp)
- break
- }
- }
- restartg(gp)
- break loop
- }
- break
- }
- fallthrough
-
- case _Grunnable, _Gwaiting:
- // Claim goroutine by setting scan bit.
- // Racing with execution or readying of gp.
- // The scan bit keeps them from running
- // the goroutine until we're done.
- if castogscanstatus(gp, s, s|_Gscan) {
- if gp.scanningself {
- // Don't try to scan the stack
- // if the goroutine is going to do
- // it itself.
- restartg(gp)
- break
- }
- if !gp.gcscandone {
- scanstack(gp, gcw)
- gp.gcscandone = true
- }
- restartg(gp)
- break loop
- }
-
- case _Gexitingsyscall:
- // This is a transient state during which we should not scan its stack.
- // Try again.
-
- case _Gscanwaiting:
- // newstack is doing a scan for us right now. Wait.
-
- case _Gscanrunning:
- // checkPreempt is scanning. Wait.
-
- case _Grunning:
- // Goroutine running. Try to preempt execution so it can scan itself.
- // The preemption handler (in newstack) does the actual scan.
-
- // Optimization: if there is already a pending preemption request
- // (from the previous loop iteration), don't bother with the atomics.
- if gp.preemptscan && gp.preempt {
- break
- }
-
- // Ask for preemption and self scan.
- if castogscanstatus(gp, _Grunning, _Gscanrunning) {
- if !gp.gcscandone {
- gp.preemptscan = true
- gp.preempt = true
- }
- casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
- }
- }
-
- if i == 0 {
- nextYield = nanotime() + yieldDelay
- }
- if nanotime() < nextYield {
- procyield(10)
- } else {
- osyield()
- nextYield = nanotime() + yieldDelay/2
- }
+// casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted.
+//
+// TODO(austin): This is the only status operation that both changes
+// the status and locks the _Gscan bit. Rethink this.
+func casGToPreemptScan(gp *g, old, new uint32) {
+ if old != _Grunning || new != _Gscan|_Gpreempted {
+ throw("bad g transition")
}
-
- gp.preemptscan = false // cancel scan request if no longer needed
-}
-
-// The GC requests that this routine be moved from a scanmumble state to a mumble state.
-func restartg(gp *g) {
- if gp.scang != 0 || gp.scangcw != 0 {
- print("g ", gp.goid, "is being scanned scang=", gp.scang, " scangcw=", gp.scangcw, "\n")
- throw("restartg: being scanned")
+ for !atomic.Cas(&gp.atomicstatus, _Grunning, _Gscan|_Gpreempted) {
}
+}
- s := readgstatus(gp)
- switch s {
- default:
- dumpgstatus(gp)
- throw("restartg: unexpected status")
-
- case _Gdead:
- // ok
-
- case _Gscanrunnable,
- _Gscanwaiting,
- _Gscansyscall:
- casfrom_Gscanstatus(gp, s, s&^_Gscan)
+// casGFromPreempted attempts to transition gp from _Gpreempted to
+// _Gwaiting. If successful, the caller is responsible for
+// re-scheduling gp.
+func casGFromPreempted(gp *g, old, new uint32) bool {
+ if old != _Gpreempted || new != _Gwaiting {
+ throw("bad g transition")
}
+ return atomic.Cas(&gp.atomicstatus, _Gpreempted, _Gwaiting)
}
// stopTheWorld stops all P's from executing goroutines, interrupting
@@ -1001,8 +833,23 @@ func restartg(gp *g) {
// goroutines.
func stopTheWorld(reason string) {
semacquire(&worldsema)
- getg().m.preemptoff = reason
- systemstack(stopTheWorldWithSema)
+ gp := getg()
+ gp.m.preemptoff = reason
+ systemstack(func() {
+ // Mark the goroutine which called stopTheWorld preemptible so its
+ // stack may be scanned.
+ // This lets a mark worker scan us while we try to stop the world
+ // since otherwise we could get in a mutual preemption deadlock.
+ // We must not modify anything on the G stack because a stack shrink
+ // may occur. A stack shrink is otherwise OK though because in order
+ // to return from this function (and to leave the system stack) we
+ // must have preempted all goroutines, including any attempting
+ // to scan our stack, in which case, any stack shrinking will
+ // have already completed by the time we exit.
+ casgstatus(gp, _Grunning, _Gwaiting)
+ stopTheWorldWithSema()
+ casgstatus(gp, _Gwaiting, _Grunning)
+ })
}
// startTheWorld undoes the effects of stopTheWorld.
@@ -1014,10 +861,31 @@ func startTheWorld() {
getg().m.preemptoff = ""
}
-// Holding worldsema grants an M the right to try to stop the world
-// and prevents gomaxprocs from changing concurrently.
+// stopTheWorldGC has the same effect as stopTheWorld, but blocks
+// until the GC is not running. It also blocks a GC from starting
+// until startTheWorldGC is called.
+func stopTheWorldGC(reason string) {
+ semacquire(&gcsema)
+ stopTheWorld(reason)
+}
+
+// startTheWorldGC undoes the effects of stopTheWorldGC.
+func startTheWorldGC() {
+ startTheWorld()
+ semrelease(&gcsema)
+}
+
+// Holding worldsema grants an M the right to try to stop the world.
var worldsema uint32 = 1
+// Holding gcsema grants the M the right to block a GC, and blocks
+// until the current GC is done. In particular, it prevents gomaxprocs
+// from changing concurrently.
+//
+// TODO(mknyszek): Once gomaxprocs and the execution tracer can handle
+// being changed/enabled during a GC, remove this.
+var gcsema uint32 = 1
+
// stopTheWorldWithSema is the core implementation of stopTheWorld.
// The caller is responsible for acquiring worldsema and disabling
// preemption first and then should stopTheWorldWithSema on the system
@@ -1119,7 +987,7 @@ func stopTheWorldWithSema() {
func startTheWorldWithSema(emitTraceEvent bool) int64 {
mp := acquirem() // disable preemption because it can be holding p in a local var
if netpollinited() {
- list := netpoll(false) // non-blocking
+ list := netpoll(0) // non-blocking
injectglist(&list)
}
lock(&sched.lock)
@@ -1299,6 +1167,11 @@ func mexit(osStack bool) {
// Free the gsignal stack.
if m.gsignal != nil {
stackfree(m.gsignal)
+ // On some platforms, when calling into VDSO (e.g. nanotime)
+ // we store our g on the gsignal stack, if there is one.
+ // Now the stack is freed, unlink it from the m, so we
+ // won't write to it when calling VDSO code.
+ m.gsignal = nil
}
// Remove m from allm.
@@ -1637,8 +1510,6 @@ func oneNewExtraM() {
// the goroutine stack ends.
mp, g0SP, g0SPSize := allocm(nil, nil, true)
gp := malg(true, false, nil, nil)
- gp.gcscanvalid = true
- gp.gcscandone = true
// malg returns status as _Gidle. Change to _Gdead before
// adding to allg where GC can see it. We use _Gdead to hide
// this from tracebacks and stack scans since it isn't a
@@ -1704,6 +1575,7 @@ func dropm() {
// Return mp.curg to dead state.
casgstatus(mp.curg, _Gsyscall, _Gdead)
+ mp.curg.preemptStop = false
atomic.Xadd(&sched.ngsys, +1)
// Block signals before unminit.
@@ -2034,6 +1906,9 @@ func handoffp(_p_ *p) {
startm(_p_, false)
return
}
+ if when := nobarrierWakeTime(_p_); when != 0 {
+ wakeNetPoller(when)
+ }
pidleput(_p_)
unlock(&sched.lock)
}
@@ -2135,14 +2010,16 @@ func gcstopm() {
func execute(gp *g, inheritTime bool) {
_g_ := getg()
+ // Assign gp.m before entering _Grunning so running Gs have an
+ // M.
+ _g_.m.curg = gp
+ gp.m = _g_.m
casgstatus(gp, _Grunnable, _Grunning)
gp.waitsince = 0
gp.preempt = false
if !inheritTime {
_g_.m.p.ptr().schedtick++
}
- _g_.m.curg = gp
- gp.m = _g_.m
// Check whether the profiler needs to be turned on or off.
hz := sched.profilehz
@@ -2163,7 +2040,7 @@ func execute(gp *g, inheritTime bool) {
}
// Finds a runnable goroutine to execute.
-// Tries to steal from other P's, get g from global queue, poll network.
+// Tries to steal from other P's, get g from local or global queue, poll network.
func findrunnable() (gp *g, inheritTime bool) {
_g_ := getg()
@@ -2180,6 +2057,9 @@ top:
if _p_.runSafePointFn != 0 {
runSafePointFn()
}
+
+ now, pollUntil, _ := checkTimers(_p_, 0)
+
if fingwait && fingwake {
if gp := wakefing(); gp != nil {
ready(gp, 0, true)
@@ -2212,7 +2092,7 @@ top:
// not set lastpoll yet), this thread will do blocking netpoll below
// anyway.
if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Load64(&sched.lastpoll) != 0 {
- if list := netpoll(false); !list.empty() { // non-blocking
+ if list := netpoll(0); !list.empty() { // non-blocking
gp := list.pop()
injectglist(&list)
casgstatus(gp, _Gwaiting, _Grunnable)
@@ -2225,12 +2105,7 @@ top:
// Steal work from other P's.
procs := uint32(gomaxprocs)
- if atomic.Load(&sched.npidle) == procs-1 {
- // Either GOMAXPROCS=1 or everybody, except for us, is idle already.
- // New work can appear from returning syscall/cgocall, network or timers.
- // Neither of that submits to local run queues, so no point in stealing.
- goto stop
- }
+ ranTimer := false
// If number of spinning M's >= number of busy P's, block.
// This is necessary to prevent excessive CPU consumption
// when GOMAXPROCS>>1 but the program parallelism is low.
@@ -2247,11 +2122,48 @@ top:
goto top
}
stealRunNextG := i > 2 // first look for ready queues with more than 1 g
- if gp := runqsteal(_p_, allp[enum.position()], stealRunNextG); gp != nil {
+ p2 := allp[enum.position()]
+ if _p_ == p2 {
+ continue
+ }
+ if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
return gp, false
}
+
+ // Consider stealing timers from p2.
+ // This call to checkTimers is the only place where
+ // we hold a lock on a different P's timers.
+ // Lock contention can be a problem here, so avoid
+ // grabbing the lock if p2 is running and not marked
+ // for preemption. If p2 is running and not being
+ // preempted we assume it will handle its own timers.
+ if i > 2 && shouldStealTimers(p2) {
+ tnow, w, ran := checkTimers(p2, now)
+ now = tnow
+ if w != 0 && (pollUntil == 0 || w < pollUntil) {
+ pollUntil = w
+ }
+ if ran {
+ // Running the timers may have
+ // made an arbitrary number of G's
+ // ready and added them to this P's
+ // local run queue. That invalidates
+ // the assumption of runqsteal
+ // that is always has room to add
+ // stolen G's. So check now if there
+ // is a local G to run.
+ if gp, inheritTime := runqget(_p_); gp != nil {
+ return gp, inheritTime
+ }
+ ranTimer = true
+ }
+ }
}
}
+ if ranTimer {
+ // Running a timer may have made some goroutine ready.
+ goto top
+ }
stop:
@@ -2268,10 +2180,16 @@ stop:
return gp, false
}
+ delta := int64(-1)
+ if pollUntil != 0 {
+ // checkTimers ensures that polluntil > now.
+ delta = pollUntil - now
+ }
+
// wasm only:
// If a callback returned and no other goroutine is awake,
// then pause execution until a callback was triggered.
- if beforeIdle() {
+ if beforeIdle(delta) {
// At least one goroutine got woken.
goto top
}
@@ -2359,21 +2277,35 @@ stop:
}
// poll network
- if netpollinited() && atomic.Load(&netpollWaiters) > 0 && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
+ if netpollinited() && (atomic.Load(&netpollWaiters) > 0 || pollUntil != 0) && atomic.Xchg64(&sched.lastpoll, 0) != 0 {
+ atomic.Store64(&sched.pollUntil, uint64(pollUntil))
if _g_.m.p != 0 {
throw("findrunnable: netpoll with p")
}
if _g_.m.spinning {
throw("findrunnable: netpoll with spinning")
}
- list := netpoll(true) // block until new work is available
+ if faketime != 0 {
+ // When using fake time, just poll.
+ delta = 0
+ }
+ list := netpoll(delta) // block until new work is available
+ atomic.Store64(&sched.pollUntil, 0)
atomic.Store64(&sched.lastpoll, uint64(nanotime()))
- if !list.empty() {
- lock(&sched.lock)
- _p_ = pidleget()
- unlock(&sched.lock)
- if _p_ != nil {
- acquirep(_p_)
+ if faketime != 0 && list.empty() {
+ // Using fake time and nothing is ready; stop M.
+ // When all M's stop, checkdead will call timejump.
+ stopm()
+ goto top
+ }
+ lock(&sched.lock)
+ _p_ = pidleget()
+ unlock(&sched.lock)
+ if _p_ == nil {
+ injectglist(&list)
+ } else {
+ acquirep(_p_)
+ if !list.empty() {
gp := list.pop()
injectglist(&list)
casgstatus(gp, _Gwaiting, _Grunnable)
@@ -2382,7 +2314,16 @@ stop:
}
return gp, false
}
- injectglist(&list)
+ if wasSpinning {
+ _g_.m.spinning = true
+ atomic.Xadd(&sched.nmspinning, 1)
+ }
+ goto top
+ }
+ } else if pollUntil != 0 && netpollinited() {
+ pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
+ if pollerPollUntil == 0 || pollerPollUntil > pollUntil {
+ netpollBreak()
}
}
stopm()
@@ -2402,7 +2343,7 @@ func pollWork() bool {
return true
}
if netpollinited() && atomic.Load(&netpollWaiters) > 0 && sched.lastpoll != 0 {
- if list := netpoll(false); !list.empty() {
+ if list := netpoll(0); !list.empty() {
injectglist(&list)
return true
}
@@ -2410,6 +2351,22 @@ func pollWork() bool {
return false
}
+// wakeNetPoller wakes up the thread sleeping in the network poller,
+// if there is one, and if it isn't going to wake up anyhow before
+// the when argument.
+func wakeNetPoller(when int64) {
+ if atomic.Load64(&sched.lastpoll) == 0 {
+ // In findrunnable we ensure that when polling the pollUntil
+ // field is either zero or the time to which the current
+ // poll is expected to run. This can have a spurious wakeup
+ // but should never miss a wakeup.
+ pollerPollUntil := int64(atomic.Load64(&sched.pollUntil))
+ if pollerPollUntil == 0 || pollerPollUntil > when {
+ netpollBreak()
+ }
+ }
+}
+
func resetspinning() {
_g_ := getg()
if !_g_.m.spinning {
@@ -2474,14 +2431,26 @@ func schedule() {
}
top:
+ pp := _g_.m.p.ptr()
+ pp.preempt = false
+
if sched.gcwaiting != 0 {
gcstopm()
goto top
}
- if _g_.m.p.ptr().runSafePointFn != 0 {
+ if pp.runSafePointFn != 0 {
runSafePointFn()
}
+ // Sanity check: if we are spinning, the run queue should be empty.
+ // Check this before calling checkTimers, as that might call
+ // goready to put a ready goroutine on the local run queue.
+ if _g_.m.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) {
+ throw("schedule: spinning with local work")
+ }
+
+ checkTimers(pp, 0)
+
var gp *g
var inheritTime bool
@@ -2513,9 +2482,8 @@ top:
}
if gp == nil {
gp, inheritTime = runqget(_g_.m.p.ptr())
- if gp != nil && _g_.m.spinning {
- throw("schedule: spinning with local work")
- }
+ // We can see gp != nil here even if the M is spinning,
+ // if checkTimers added a local goroutine via goready.
// Because gccgo does not implement preemption as a stack check,
// we need to check for preemption here for fairness.
@@ -2591,6 +2559,62 @@ func dropg() {
setGNoWB(&_g_.m.curg, nil)
}
+// checkTimers runs any timers for the P that are ready.
+// If now is not 0 it is the current time.
+// It returns the current time or 0 if it is not known,
+// and the time when the next timer should run or 0 if there is no next timer,
+// and reports whether it ran any timers.
+// If the time when the next timer should run is not 0,
+// it is always larger than the returned time.
+// We pass now in and out to avoid extra calls of nanotime.
+//go:yeswritebarrierrec
+func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) {
+ lock(&pp.timersLock)
+
+ adjusttimers(pp)
+
+ rnow = now
+ if len(pp.timers) > 0 {
+ if rnow == 0 {
+ rnow = nanotime()
+ }
+ for len(pp.timers) > 0 {
+ // Note that runtimer may temporarily unlock
+ // pp.timersLock.
+ if tw := runtimer(pp, rnow); tw != 0 {
+ if tw > 0 {
+ pollUntil = tw
+ }
+ break
+ }
+ ran = true
+ }
+ }
+
+ unlock(&pp.timersLock)
+
+ return rnow, pollUntil, ran
+}
+
+// shouldStealTimers reports whether we should try stealing the timers from p2.
+// We don't steal timers from a running P that is not marked for preemption,
+// on the assumption that it will run its own timers. This reduces
+// contention on the timers lock.
+func shouldStealTimers(p2 *p) bool {
+ if p2.status != _Prunning {
+ return true
+ }
+ mp := p2.m.ptr()
+ if mp == nil || mp.locks > 0 {
+ return false
+ }
+ gp := mp.curg
+ if gp == nil || gp.atomicstatus != _Grunning || !gp.preempt {
+ return false
+ }
+ return true
+}
+
func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
unlock((*mutex)(lock))
return true
@@ -2604,8 +2628,8 @@ func park_m(gp *g) {
traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
}
- dropg()
casgstatus(gp, _Grunning, _Gwaiting)
+ dropg()
if fn := _g_.m.waitunlockf; fn != nil {
ok := fn(gp, _g_.m.waitlock)
@@ -2628,8 +2652,8 @@ func goschedImpl(gp *g) {
dumpgstatus(gp)
throw("bad g status")
}
- dropg()
casgstatus(gp, _Grunning, _Grunnable)
+ dropg()
lock(&sched.lock)
globrunqput(gp)
unlock(&sched.lock)
@@ -2648,7 +2672,7 @@ func gosched_m(gp *g) {
// goschedguarded is a forbidden-states-avoided version of gosched_m
func goschedguarded_m(gp *g) {
- if gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" || gp.m.p.ptr().status != _Prunning {
+ if !canPreemptM(gp.m) {
gogo(gp) // never return
}
@@ -2665,6 +2689,47 @@ func gopreempt_m(gp *g) {
goschedImpl(gp)
}
+// preemptPark parks gp and puts it in _Gpreempted.
+//
+//go:systemstack
+func preemptPark(gp *g) {
+ if trace.enabled {
+ traceGoPark(traceEvGoBlock, 0)
+ }
+ status := readgstatus(gp)
+ if status&^_Gscan != _Grunning {
+ dumpgstatus(gp)
+ throw("bad g status")
+ }
+ gp.waitreason = waitReasonPreempted
+ // Transition from _Grunning to _Gscan|_Gpreempted. We can't
+ // be in _Grunning when we dropg because then we'd be running
+ // without an M, but the moment we're in _Gpreempted,
+ // something could claim this G before we've fully cleaned it
+ // up. Hence, we set the scan bit to lock down further
+ // transitions until we can dropg.
+ casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted)
+ dropg()
+ casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted)
+ schedule()
+}
+
+// goyield is like Gosched, but it:
+// - does not emit a GoSched trace event
+// - puts the current G on the runq of the current P instead of the globrunq
+func goyield() {
+ checkTimeouts()
+ mcall(goyield_m)
+}
+
+func goyield_m(gp *g) {
+ pp := gp.m.p.ptr()
+ casgstatus(gp, _Grunning, _Grunnable)
+ dropg()
+ runqput(pp, gp, false)
+ schedule()
+}
+
// Finishes execution of the current goroutine.
func goexit1() {
if trace.enabled {
@@ -2687,6 +2752,7 @@ func goexit0(gp *g) {
gp.lockedm = 0
_g_.m.lockedg = 0
gp.entry = nil
+ gp.preemptStop = false
gp.paniconfault = false
gp._defer = nil // should be true already but just in case.
gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
@@ -2705,9 +2771,6 @@ func goexit0(gp *g) {
gp.gcAssistBytes = 0
}
- // Note that gp's stack scan is now "valid" because it has no
- // stack.
- gp.gcscanvalid = true
dropg()
if GOARCH == "wasm" { // no threads yet on wasm
@@ -3215,7 +3278,6 @@ func newproc(fn uintptr, arg unsafe.Pointer) *g {
if isSystemGoroutine(newg, false) {
atomic.Xadd(&sched.ngsys, +1)
}
- newg.gcscanvalid = false
casgstatus(newg, _Gdead, _Grunnable)
if _p_.goidcache == _p_.goidcacheend {
@@ -3736,6 +3798,20 @@ func (pp *p) destroy() {
globrunqputhead(pp.runnext.ptr())
pp.runnext = 0
}
+ if len(pp.timers) > 0 {
+ plocal := getg().m.p.ptr()
+ // The world is stopped, but we acquire timersLock to
+ // protect against sysmon calling timeSleepUntil.
+ // This is the only case where we hold the timersLock of
+ // more than one P, so there are no deadlock concerns.
+ lock(&plocal.timersLock)
+ lock(&pp.timersLock)
+ moveTimers(plocal, pp.timers)
+ pp.timers = nil
+ pp.adjustTimers = 0
+ unlock(&pp.timersLock)
+ unlock(&plocal.timersLock)
+ }
// If there's a background worker, make it runnable and put
// it on the global queue so it can clean itself up.
if gp := pp.gcBgMarkWorker.ptr(); gp != nil {
@@ -3761,14 +3837,18 @@ func (pp *p) destroy() {
pp.deferpoolbuf[i] = nil
}
pp.deferpool = pp.deferpoolbuf[:0]
+ systemstack(func() {
+ for i := 0; i < pp.mspancache.len; i++ {
+ // Safe to call since the world is stopped.
+ mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
+ }
+ pp.mspancache.len = 0
+ pp.pcache.flush(&mheap_.pages)
+ })
freemcache(pp.mcache)
pp.mcache = nil
gfpurge(pp)
traceProcFree(pp)
- if raceenabled {
- raceprocdestroy(pp.raceprocctx)
- pp.raceprocctx = 0
- }
pp.gcAssistTime = 0
pp.status = _Pdead
}
@@ -4016,7 +4096,8 @@ func checkdead() {
}
s := readgstatus(gp)
switch s &^ _Gscan {
- case _Gwaiting:
+ case _Gwaiting,
+ _Gpreempted:
grunning++
case _Grunnable,
_Grunning,
@@ -4028,17 +4109,18 @@ func checkdead() {
}
unlock(&allglock)
if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
+ unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
throw("no goroutines (main called runtime.Goexit) - deadlock!")
}
// Maybe jump time forward for playground.
- gp := timejump()
- if gp != nil {
- casgstatus(gp, _Gwaiting, _Grunnable)
- globrunqput(gp)
- _p_ := pidleget()
- if _p_ == nil {
- throw("checkdead: no p for timer")
+ _p_ := timejump()
+ if _p_ != nil {
+ for pp := &sched.pidle; *pp != 0; pp = &(*pp).ptr().link {
+ if (*pp).ptr() == _p_ {
+ *pp = _p_.link
+ break
+ }
}
mp := mget()
if mp == nil {
@@ -4051,7 +4133,15 @@ func checkdead() {
return
}
+ // There are no goroutines running, so we can look at the P's.
+ for _, _p_ := range allp {
+ if len(_p_.timers) > 0 {
+ return
+ }
+ }
+
getg().m.throwing = -1 // do not dump full stacks
+ unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang
throw("all goroutines are asleep - deadlock!")
}
@@ -4084,32 +4174,34 @@ func sysmon() {
delay = 10 * 1000
}
usleep(delay)
+ now := nanotime()
+ next := timeSleepUntil()
if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
lock(&sched.lock)
if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
- atomic.Store(&sched.sysmonwait, 1)
- unlock(&sched.lock)
- // Make wake-up period small enough
- // for the sampling to be correct.
- maxsleep := forcegcperiod / 2
- shouldRelax := true
- if osRelaxMinNS > 0 {
- next := timeSleepUntil()
- now := nanotime()
- if next-now < osRelaxMinNS {
- shouldRelax = false
+ if next > now {
+ atomic.Store(&sched.sysmonwait, 1)
+ unlock(&sched.lock)
+ // Make wake-up period small enough
+ // for the sampling to be correct.
+ sleep := forcegcperiod / 2
+ if next-now < sleep {
+ sleep = next - now
}
+ shouldRelax := sleep >= osRelaxMinNS
+ if shouldRelax {
+ osRelax(true)
+ }
+ notetsleep(&sched.sysmonnote, sleep)
+ if shouldRelax {
+ osRelax(false)
+ }
+ now = nanotime()
+ next = timeSleepUntil()
+ lock(&sched.lock)
+ atomic.Store(&sched.sysmonwait, 0)
+ noteclear(&sched.sysmonnote)
}
- if shouldRelax {
- osRelax(true)
- }
- notetsleep(&sched.sysmonnote, maxsleep)
- if shouldRelax {
- osRelax(false)
- }
- lock(&sched.lock)
- atomic.Store(&sched.sysmonwait, 0)
- noteclear(&sched.sysmonnote)
idle = 0
delay = 20
}
@@ -4121,10 +4213,9 @@ func sysmon() {
}
// poll network if not polled for more than 10ms
lastpoll := int64(atomic.Load64(&sched.lastpoll))
- now := nanotime()
if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
- list := netpoll(false) // non-blocking - returns list of goroutines
+ list := netpoll(0) // non-blocking - returns list of goroutines
if !list.empty() {
// Need to decrement number of idle locked M's
// (pretending that one more is running) before injectglist.
@@ -4138,6 +4229,12 @@ func sysmon() {
incidlelocked(1)
}
}
+ if next < now {
+ // There are timers that should have already run,
+ // perhaps because there is an unpreemptible P.
+ // Try to start an M to run them.
+ startm(nil, false)
+ }
// retake P's blocked in syscalls
// and preempt long running G's
if retake(now) != 0 {
@@ -4296,6 +4393,12 @@ func preemptone(_p_ *p) bool {
// and a few other places, which is at least better than doing
// nothing at all.
+ // Request an async preemption of this P.
+ if preemptMSupported && debug.asyncpreemptoff == 0 {
+ _p_.preempt = true
+ preemptM(mp)
+ }
+
return true
}
@@ -4324,7 +4427,7 @@ func schedtrace(detailed bool) {
if mp != nil {
id = mp.id
}
- print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, "\n")
+ print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gFree.n, " timerslen=", len(_p_.timers), "\n")
} else {
// In non-detailed mode format lengths of per-P run queues as:
// [len1 len2 len3 len4]
diff --git a/libgo/go/runtime/proc_test.go b/libgo/go/runtime/proc_test.go
index fee03be..a693937 100644
--- a/libgo/go/runtime/proc_test.go
+++ b/libgo/go/runtime/proc_test.go
@@ -362,6 +362,17 @@ func TestPreemptionGC(t *testing.T) {
atomic.StoreUint32(&stop, 1)
}
+func TestAsyncPreempt(t *testing.T) {
+ if !runtime.PreemptMSupported {
+ t.Skip("asynchronous preemption not supported on this platform")
+ }
+ output := runTestProg(t, "testprog", "AsyncPreempt")
+ want := "OK\n"
+ if output != want {
+ t.Fatalf("want %s, got %s\n", want, output)
+ }
+}
+
func TestGCFairness(t *testing.T) {
output := runTestProg(t, "testprog", "GCFairness")
want := "OK\n"
@@ -988,3 +999,42 @@ func TestPreemptionAfterSyscall(t *testing.T) {
func TestGetgThreadSwitch(t *testing.T) {
runtime.RunGetgThreadSwitchTest()
}
+
+// TestNetpollBreak tests that netpollBreak can break a netpoll.
+// This test is not particularly safe since the call to netpoll
+// will pick up any stray files that are ready, but it should work
+// OK as long it is not run in parallel.
+func TestNetpollBreak(t *testing.T) {
+ if runtime.GOMAXPROCS(0) == 1 {
+ t.Skip("skipping: GOMAXPROCS=1")
+ }
+
+ // Make sure that netpoll is initialized.
+ runtime.NetpollGenericInit()
+
+ start := time.Now()
+ c := make(chan bool, 2)
+ go func() {
+ c <- true
+ runtime.Netpoll(10 * time.Second.Nanoseconds())
+ c <- true
+ }()
+ <-c
+ // Loop because the break might get eaten by the scheduler.
+ // Break twice to break both the netpoll we started and the
+ // scheduler netpoll.
+loop:
+ for {
+ runtime.Usleep(100)
+ runtime.NetpollBreak()
+ runtime.NetpollBreak()
+ select {
+ case <-c:
+ break loop
+ default:
+ }
+ }
+ if dur := time.Since(start); dur > 5*time.Second {
+ t.Errorf("netpollBreak did not interrupt netpoll: slept for: %v", dur)
+ }
+}
diff --git a/libgo/go/runtime/race0.go b/libgo/go/runtime/race0.go
index f1d3706..6f26afa 100644
--- a/libgo/go/runtime/race0.go
+++ b/libgo/go/runtime/race0.go
@@ -29,6 +29,7 @@ func racereadrangepc(addr unsafe.Pointer, sz, callerpc, pc uintptr) { th
func racewriterangepc(addr unsafe.Pointer, sz, callerpc, pc uintptr) { throw("race") }
func raceacquire(addr unsafe.Pointer) { throw("race") }
func raceacquireg(gp *g, addr unsafe.Pointer) { throw("race") }
+func raceacquirectx(racectx uintptr, addr unsafe.Pointer) { throw("race") }
func racerelease(addr unsafe.Pointer) { throw("race") }
func racereleaseg(gp *g, addr unsafe.Pointer) { throw("race") }
func racereleasemerge(addr unsafe.Pointer) { throw("race") }
@@ -38,3 +39,4 @@ func racemalloc(p unsafe.Pointer, sz uintptr) { th
func racefree(p unsafe.Pointer, sz uintptr) { throw("race") }
func racegostart(pc uintptr) uintptr { throw("race"); return 0 }
func racegoend() { throw("race") }
+func racectxend(racectx uintptr) { throw("race") }
diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go
index c64549c..60aa90f 100644
--- a/libgo/go/runtime/runtime1.go
+++ b/libgo/go/runtime/runtime1.go
@@ -326,6 +326,7 @@ var debug struct {
scheddetail int32
schedtrace int32
tracebackancestors int32
+ asyncpreemptoff int32
}
var dbgvars = []dbgVar{
@@ -345,6 +346,7 @@ var dbgvars = []dbgVar{
{"scheddetail", &debug.scheddetail},
{"schedtrace", &debug.schedtrace},
{"tracebackancestors", &debug.tracebackancestors},
+ {"asyncpreemptoff", &debug.asyncpreemptoff},
}
func parsedebugvars() {
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 77648c2..d50f82a 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -40,7 +40,7 @@ const (
// _Grunning means this goroutine may execute user code. The
// stack is owned by this goroutine. It is not on a run queue.
- // It is assigned an M and a P.
+ // It is assigned an M and a P (g.m and g.m.p are valid).
_Grunning // 2
// _Gsyscall means this goroutine is executing a system call.
@@ -78,11 +78,18 @@ const (
// stack is owned by the goroutine that put it in _Gcopystack.
_Gcopystack // 8
+ // _Gpreempted means this goroutine stopped itself for a
+ // suspendG preemption. It is like _Gwaiting, but nothing is
+ // yet responsible for ready()ing it. Some suspendG must CAS
+ // the status to _Gwaiting to take responsibility for
+ // ready()ing this G.
+ _Gpreempted // 9
+
// _Gexitingsyscall means this goroutine is exiting from a
// system call. This is like _Gsyscall, but the GC should not
// scan its stack. Currently this is only used in exitsyscall0
// as a transient state when it drops the G.
- _Gexitingsyscall // 9
+ _Gexitingsyscall // 10
// _Gscan combined with one of the above states other than
// _Grunning indicates that GC is scanning the stack. The
@@ -95,11 +102,12 @@ const (
//
// atomicstatus&~Gscan gives the state the goroutine will
// return to when the scan completes.
- _Gscan = 0x1000
- _Gscanrunnable = _Gscan + _Grunnable // 0x1001
- _Gscanrunning = _Gscan + _Grunning // 0x1002
- _Gscansyscall = _Gscan + _Gsyscall // 0x1003
- _Gscanwaiting = _Gscan + _Gwaiting // 0x1004
+ _Gscan = 0x1000
+ _Gscanrunnable = _Gscan + _Grunnable // 0x1001
+ _Gscanrunning = _Gscan + _Grunning // 0x1002
+ _Gscansyscall = _Gscan + _Gsyscall // 0x1003
+ _Gscanwaiting = _Gscan + _Gwaiting // 0x1004
+ _Gscanpreempted = _Gscan + _Gpreempted // 0x1009
)
const (
@@ -407,21 +415,36 @@ type g struct {
param unsafe.Pointer // passed parameter on wakeup
atomicstatus uint32
// Not for gccgo: stackLock uint32 // sigprof/scang lock; TODO: fold in to atomicstatus
- goid int64
- schedlink guintptr
- waitsince int64 // approx time when the g become blocked
- waitreason waitReason // if status==Gwaiting
- preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
- paniconfault bool // panic (instead of crash) on unexpected fault address
- preemptscan bool // preempted g does scan for gc
- gcscandone bool // g has scanned stack; protected by _Gscan bit in status
- gcscanvalid bool // false at start of gc cycle, true if G has not run since last scan; TODO: remove?
- throwsplit bool // must not split stack
- raceignore int8 // ignore race detection events
- sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
- sysexitticks int64 // cputicks when syscall has returned (for tracing)
- traceseq uint64 // trace event sequencer
- tracelastp puintptr // last P emitted an event for this goroutine
+ goid int64
+ schedlink guintptr
+ waitsince int64 // approx time when the g become blocked
+ waitreason waitReason // if status==Gwaiting
+ preempt bool // preemption signal, duplicates stackguard0 = stackpreempt
+ preemptStop bool // transition to _Gpreempted on preemption; otherwise, just deschedule
+ // Not for gccgo: preemptShrink bool // shrink stack at synchronous safe point
+ // asyncSafePoint is set if g is stopped at an asynchronous
+ // safe point. This means there are frames on the stack
+ // without precise pointer information.
+ asyncSafePoint bool
+
+ paniconfault bool // panic (instead of crash) on unexpected fault address
+ preemptscan bool // preempted g does scan for gc
+ gcscandone bool // g has scanned stack; protected by _Gscan bit in status
+ throwsplit bool // must not split stack
+
+ gcScannedSyscallStack bool // gccgo specific; see scanSyscallStack
+
+ // activeStackChans indicates that there are unlocked channels
+ // pointing into this goroutine's stack. If true, stack
+ // copying needs to acquire channel locks to protect these
+ // areas of the stack.
+ activeStackChans bool
+
+ raceignore int8 // ignore race detection events
+ sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
+ sysexitticks int64 // cputicks when syscall has returned (for tracing)
+ traceseq uint64 // trace event sequencer
+ tracelastp puintptr // last P emitted an event for this goroutine
lockedm muintptr
sig uint32
writebuf []byte
@@ -555,8 +578,7 @@ type m struct {
waittraceskip int
startingtrace bool
syscalltick uint32
- // Not for gccgo: thread uintptr // thread handle
- freelink *m // on sched.freem
+ freelink *m // on sched.freem
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
@@ -566,6 +588,11 @@ type m struct {
// Not for gccgo: libcallg guintptr
// Not for gccgo: syscall libcall // stores syscall parameters on windows
+ // preemptGen counts the number of completed preemption
+ // signals. This is used to detect when a preemption is
+ // requested, but fails. Accessed atomically.
+ preemptGen uint32
+
dlogPerM
mOS
@@ -590,6 +617,7 @@ type p struct {
sysmontick sysmontick // last tick observed by sysmon
m muintptr // back-link to associated m (nil if idle)
mcache *mcache
+ pcache pageCache
raceprocctx uintptr
// gccgo has only one size of defer.
@@ -624,6 +652,17 @@ type p struct {
sudogcache []*sudog
sudogbuf [128]*sudog
+ // Cache of mspan objects from the heap.
+ mspancache struct {
+ // We need an explicit length here because this field is used
+ // in allocation codepaths where write barriers are not allowed,
+ // and eliminating the write barrier/keeping it eliminated from
+ // slice updates is tricky, moreso than just managing the length
+ // ourselves.
+ len int
+ buf [128]*mspan
+ }
+
tracebuf traceBufPtr
// traceSweep indicates the sweep events should be traced.
@@ -660,13 +699,36 @@ type p struct {
runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
+ // Lock for timers. We normally access the timers while running
+ // on this P, but the scheduler can also do it from a different P.
+ timersLock mutex
+
+ // Actions to take at some time. This is used to implement the
+ // standard library's time package.
+ // Must hold timersLock to access.
+ timers []*timer
+
+ // Number of timerModifiedEarlier timers on P's heap.
+ // This should only be modified while holding timersLock,
+ // or while the timer status is in a transient state
+ // such as timerModifying.
+ adjustTimers uint32
+
+ // Race context used while executing timer functions.
+ // Not for gccgo: timerRaceCtx uintptr
+
+ // preempt is set to indicate that this P should be enter the
+ // scheduler ASAP (regardless of what G is running on it).
+ preempt bool
+
pad cpu.CacheLinePad
}
type schedt struct {
// accessed atomically. keep at top to ensure alignment on 32-bit systems.
- goidgen uint64
- lastpoll uint64
+ goidgen uint64
+ lastpoll uint64 // time of last network poll, 0 if currently polling
+ pollUntil uint64 // time to which current poll is sleeping
lock mutex
@@ -841,6 +903,11 @@ type _defer struct {
// panics
// This is the gccgo version.
+//
+// This is marked go:notinheap because _panic values must only ever
+// live on the stack.
+//
+//go:notinheap
type _panic struct {
// The next entry in the stack.
link *_panic
@@ -858,6 +925,9 @@ type _panic struct {
// Whether this panic was already seen by a deferred function
// which called panic again.
aborted bool
+
+ // Whether this panic was created for goexit.
+ goexit bool
}
// ancestorInfo records details of where a goroutine was started.
@@ -906,6 +976,7 @@ const (
waitReasonTraceReaderBlocked // "trace reader (blocked)"
waitReasonWaitForGCCycle // "wait for GC cycle"
waitReasonGCWorkerIdle // "GC worker (idle)"
+ waitReasonPreempted // "preempted"
)
var waitReasonStrings = [...]string{
@@ -934,6 +1005,7 @@ var waitReasonStrings = [...]string{
waitReasonTraceReaderBlocked: "trace reader (blocked)",
waitReasonWaitForGCCycle: "wait for GC cycle",
waitReasonGCWorkerIdle: "GC worker (idle)",
+ waitReasonPreempted: "preempted",
}
func (w waitReason) String() string {
diff --git a/libgo/go/runtime/runtime_mmap_test.go b/libgo/go/runtime/runtime_mmap_test.go
index c7703f4..21918c5 100644
--- a/libgo/go/runtime/runtime_mmap_test.go
+++ b/libgo/go/runtime/runtime_mmap_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build aix darwin dragonfly freebsd hurd linux nacl netbsd openbsd solaris
+// +build aix darwin dragonfly freebsd hurd linux netbsd openbsd solaris
package runtime_test
diff --git a/libgo/go/runtime/runtime_test.go b/libgo/go/runtime/runtime_test.go
index e607868..ca35205 100644
--- a/libgo/go/runtime/runtime_test.go
+++ b/libgo/go/runtime/runtime_test.go
@@ -122,6 +122,21 @@ func BenchmarkDeferMany(b *testing.B) {
}
}
+func BenchmarkPanicRecover(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ defer3()
+ }
+}
+
+func defer3() {
+ defer func(x, y, z int) {
+ if recover() == nil {
+ panic("failed recover")
+ }
+ }(1, 2, 3)
+ panic("hi")
+}
+
// golang.org/issue/7063
func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
SetCPUProfileRate(0)
@@ -177,13 +192,14 @@ func TestSetPanicOnFault(t *testing.T) {
}
}
+// testSetPanicOnFault tests one potentially faulting address.
+// It deliberately constructs and uses an invalid pointer,
+// so mark it as nocheckptr.
+//go:nocheckptr
func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
if strings.Contains(Version(), "llvm") {
t.Skip("LLVM doesn't support non-call exception")
}
- if GOOS == "nacl" {
- t.Skip("nacl doesn't seem to fault on high addresses")
- }
if GOOS == "js" {
t.Skip("js does not support catching faults")
}
@@ -283,32 +299,6 @@ func TestTrailingZero(t *testing.T) {
}
*/
-func TestBadOpen(t *testing.T) {
- if GOOS == "windows" || GOOS == "nacl" || GOOS == "js" {
- t.Skip("skipping OS that doesn't have open/read/write/close")
- }
- // make sure we get the correct error code if open fails. Same for
- // read/write/close on the resulting -1 fd. See issue 10052.
- nonfile := []byte("/notreallyafile")
- fd := Open(&nonfile[0], 0, 0)
- if fd != -1 {
- t.Errorf("open(\"%s\")=%d, want -1", string(nonfile), fd)
- }
- var buf [32]byte
- r := Read(-1, unsafe.Pointer(&buf[0]), int32(len(buf)))
- if r != -1 {
- t.Errorf("read()=%d, want -1", r)
- }
- w := Write(^uintptr(0), unsafe.Pointer(&buf[0]), int32(len(buf)))
- if w != -1 {
- t.Errorf("write()=%d, want -1", w)
- }
- c := Close(-1)
- if c != -1 {
- t.Errorf("close()=%d, want -1", c)
- }
-}
-
func TestAppendGrowth(t *testing.T) {
var x []int64
check := func(want int) {
diff --git a/libgo/go/runtime/select.go b/libgo/go/runtime/select.go
index 41e5e88..c9e3dd7 100644
--- a/libgo/go/runtime/select.go
+++ b/libgo/go/runtime/select.go
@@ -19,7 +19,7 @@ const debugSelect = false
// scase.kind values.
// Known to compiler.
-// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselect.
+// Changes here must also be made in src/cmd/compile/internal/gc/select.go's walkselectcases.
const (
caseNil = iota
caseRecv
@@ -70,6 +70,9 @@ func selunlock(scases []scase, lockorder []uint16) {
}
func selparkcommit(gp *g, _ unsafe.Pointer) bool {
+ // There are unlocked sudogs that point into gp's stack. Stack
+ // copying must lock the channels of those sudogs.
+ gp.activeStackChans = true
// This must not access gp's stack (see gopark). In
// particular, it must not access the *hselect. That's okay,
// because by the time this is called, gp.waiting has all
@@ -308,6 +311,7 @@ loop:
// wait for someone to wake us up
gp.param = nil
gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
+ gp.activeStackChans = false
sellock(scases, lockorder)
@@ -458,8 +462,6 @@ sclose:
}
func (c *hchan) sortkey() uintptr {
- // TODO(khr): if we have a moving garbage collector, we'll need to
- // change this function.
return uintptr(unsafe.Pointer(c))
}
diff --git a/libgo/go/runtime/sema.go b/libgo/go/runtime/sema.go
index c002e29..fb16796 100644
--- a/libgo/go/runtime/sema.go
+++ b/libgo/go/runtime/sema.go
@@ -180,7 +180,7 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) {
atomic.Xadd(&root.nwait, -1)
}
unlock(&root.lock)
- if s != nil { // May be slow, so unlock first
+ if s != nil { // May be slow or even yield, so unlock first
acquiretime := s.acquiretime
if acquiretime != 0 {
mutexevent(t0-acquiretime, 3+skipframes)
@@ -192,6 +192,25 @@ func semrelease1(addr *uint32, handoff bool, skipframes int) {
s.ticket = 1
}
readyWithTime(s, 5+skipframes)
+ if s.ticket == 1 && getg().m.locks == 0 {
+ // Direct G handoff
+ // readyWithTime has added the waiter G as runnext in the
+ // current P; we now call the scheduler so that we start running
+ // the waiter G immediately.
+ // Note that waiter inherits our time slice: this is desirable
+ // to avoid having a highly contended semaphore hog the P
+ // indefinitely. goyield is like Gosched, but it does not emit a
+ // GoSched trace event and, more importantly, puts the current G
+ // on the local runq instead of the global one.
+ // We only do this in the starving regime (handoff=true), as in
+ // the non-starving case it is possible for a different waiter
+ // to acquire the semaphore while we are yielding/scheduling,
+ // and this would be wasteful. We wait instead to enter starving
+ // regime, and then we start to do direct handoffs of ticket and
+ // P.
+ // See issue 33747 for discussion.
+ goyield()
+ }
}
}
@@ -373,19 +392,11 @@ Found:
func (root *semaRoot) rotateLeft(x *sudog) {
// p -> (x a (y b c))
p := x.parent
- a, y := x.prev, x.next
- b, c := y.prev, y.next
+ y := x.next
+ b := y.prev
y.prev = x
x.parent = y
- y.next = c
- if c != nil {
- c.parent = y
- }
- x.prev = a
- if a != nil {
- a.parent = x
- }
x.next = b
if b != nil {
b.parent = x
@@ -409,23 +420,15 @@ func (root *semaRoot) rotateLeft(x *sudog) {
func (root *semaRoot) rotateRight(y *sudog) {
// p -> (y (x a b) c)
p := y.parent
- x, c := y.prev, y.next
- a, b := x.prev, x.next
+ x := y.prev
+ b := x.next
- x.prev = a
- if a != nil {
- a.parent = x
- }
x.next = y
y.parent = x
y.prev = b
if b != nil {
b.parent = y
}
- y.next = c
- if c != nil {
- c.parent = y
- }
x.parent = p
if p == nil {
diff --git a/libgo/go/runtime/sema_test.go b/libgo/go/runtime/sema_test.go
new file mode 100644
index 0000000..8bd5d4c
--- /dev/null
+++ b/libgo/go/runtime/sema_test.go
@@ -0,0 +1,97 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ . "runtime"
+ "sync/atomic"
+ "testing"
+)
+
+// TestSemaHandoff checks that when semrelease+handoff is
+// requested, the G that releases the semaphore yields its
+// P directly to the first waiter in line.
+// See issue 33747 for discussion.
+func TestSemaHandoff(t *testing.T) {
+ const iter = 10000
+ ok := 0
+ for i := 0; i < iter; i++ {
+ if testSemaHandoff() {
+ ok++
+ }
+ }
+ // As long as two thirds of handoffs are direct, we
+ // consider the test successful. The scheduler is
+ // nondeterministic, so this test checks that we get the
+ // desired outcome in a significant majority of cases.
+ // The actual ratio of direct handoffs is much higher
+ // (>90%) but we use a lower threshold to minimize the
+ // chances that unrelated changes in the runtime will
+ // cause the test to fail or become flaky.
+ if ok < iter*2/3 {
+ t.Fatal("direct handoff < 2/3:", ok, iter)
+ }
+}
+
+func TestSemaHandoff1(t *testing.T) {
+ if GOMAXPROCS(-1) <= 1 {
+ t.Skip("GOMAXPROCS <= 1")
+ }
+ defer GOMAXPROCS(GOMAXPROCS(-1))
+ GOMAXPROCS(1)
+ TestSemaHandoff(t)
+}
+
+func TestSemaHandoff2(t *testing.T) {
+ if GOMAXPROCS(-1) <= 2 {
+ t.Skip("GOMAXPROCS <= 2")
+ }
+ defer GOMAXPROCS(GOMAXPROCS(-1))
+ GOMAXPROCS(2)
+ TestSemaHandoff(t)
+}
+
+func testSemaHandoff() bool {
+ var sema, res uint32
+ done := make(chan struct{})
+
+ // We're testing that the current goroutine is able to yield its time slice
+ // to another goroutine. Stop the current goroutine from migrating to
+ // another CPU where it can win the race (and appear to have not yielded) by
+ // keeping the CPUs slightly busy.
+ for i := 0; i < GOMAXPROCS(-1); i++ {
+ go func() {
+ for {
+ select {
+ case <-done:
+ return
+ default:
+ }
+ Gosched()
+ }
+ }()
+ }
+
+ go func() {
+ Semacquire(&sema)
+ atomic.CompareAndSwapUint32(&res, 0, 1)
+
+ Semrelease1(&sema, true, 0)
+ close(done)
+ }()
+ for SemNwait(&sema) == 0 {
+ Gosched() // wait for goroutine to block in Semacquire
+ }
+
+ // The crux of the test: we release the semaphore with handoff
+ // and immediately perform a CAS both here and in the waiter; we
+ // want the CAS in the waiter to execute first.
+ Semrelease1(&sema, true, 0)
+ atomic.CompareAndSwapUint32(&res, 0, 2)
+
+ <-done // wait for goroutines to finish to avoid data races
+
+ return res == 1 // did the waiter run first?
+}
diff --git a/libgo/go/runtime/semasleep_test.go b/libgo/go/runtime/semasleep_test.go
index f5b4a50..9b371b0 100644
--- a/libgo/go/runtime/semasleep_test.go
+++ b/libgo/go/runtime/semasleep_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//+build !nacl,!plan9,!windows,!js
+// +build !plan9,!windows,!js
package runtime_test
diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go
deleted file mode 100644
index 3583c7b..0000000
--- a/libgo/go/runtime/signal_sighandler.go
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build aix darwin dragonfly freebsd hurd linux nacl netbsd openbsd solaris
-
-package runtime
-
-import (
- "unsafe"
-)
-
-// crashing is the number of m's we have waited for when implementing
-// GOTRACEBACK=crash when a signal is received.
-var crashing int32
-
-// testSigtrap is used by the runtime tests. If non-nil, it is called
-// on SIGTRAP. If it returns true, the normal behavior on SIGTRAP is
-// suppressed.
-var testSigtrap func(info *_siginfo_t, ctxt *sigctxt, gp *g) bool
-
-// sighandler is invoked when a signal occurs. The global g will be
-// set to a gsignal goroutine and we will be running on the alternate
-// signal stack. The parameter g will be the value of the global g
-// when the signal occurred. The sig, info, and ctxt parameters are
-// from the system signal handler: they are the parameters passed when
-// the SA is passed to the sigaction system call.
-//
-// The garbage collector may have stopped the world, so write barriers
-// are not allowed.
-//
-//go:nowritebarrierrec
-func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
- _g_ := getg()
- c := &sigctxt{info, ctxt}
-
- sigfault, sigpc := getSiginfo(info, ctxt)
-
- if sig == _SIGURG && usestackmaps {
- // We may be signaled to do a stack scan.
- // The signal delivery races with enter/exitsyscall.
- // We may be on g0 stack now. gp.m.curg is the g we
- // want to scan.
- // If we're not on g stack, give up. The sender will
- // try again later.
- // If we're not stopped at a safepoint (doscanstack will
- // return false), also give up.
- if s := readgstatus(gp.m.curg); s == _Gscansyscall {
- if gp == gp.m.curg {
- if doscanstack(gp, (*gcWork)(unsafe.Pointer(gp.scangcw))) {
- gp.gcscanvalid = true
- gp.gcscandone = true
- }
- }
- gp.m.curg.scangcw = 0
- notewakeup(&gp.m.scannote)
- return
- }
- }
-
- if sig == _SIGPROF {
- sigprof(sigpc, gp, _g_.m)
- return
- }
-
- if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
- return
- }
-
- flags := int32(_SigThrow)
- if sig < uint32(len(sigtable)) {
- flags = sigtable[sig].flags
- }
- if flags&_SigPanic != 0 && gp.throwsplit {
- // We can't safely sigpanic because it may grow the
- // stack. Abort in the signal handler instead.
- flags = (flags &^ _SigPanic) | _SigThrow
- }
- if isAbortPC(sigpc) {
- // On many architectures, the abort function just
- // causes a memory fault. Don't turn that into a panic.
- flags = _SigThrow
- }
- if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
- // Emulate gc by passing arguments out of band,
- // although we don't really have to.
- gp.sig = sig
- gp.sigcode0 = uintptr(c.sigcode())
- gp.sigcode1 = sigfault
- gp.sigpc = sigpc
-
- setg(gp)
-
- // All signals were blocked due to the sigaction mask;
- // unblock them.
- var set sigset
- sigfillset(&set)
- sigprocmask(_SIG_UNBLOCK, &set, nil)
-
- sigpanic()
- throw("sigpanic returned")
- }
-
- if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
- if sigsend(sig) {
- return
- }
- }
-
- if c.sigcode() == _SI_USER && signal_ignored(sig) {
- return
- }
-
- if flags&_SigKill != 0 {
- dieFromSignal(sig)
- }
-
- if flags&_SigThrow == 0 {
- return
- }
-
- _g_.m.throwing = 1
- _g_.m.caughtsig.set(gp)
-
- if crashing == 0 {
- startpanic_m()
- }
-
- if sig < uint32(len(sigtable)) {
- print(sigtable[sig].name, "\n")
- } else {
- print("Signal ", sig, "\n")
- }
-
- print("PC=", hex(sigpc), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
- if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
- print("signal arrived during cgo execution\n")
- gp = _g_.m.lockedg.ptr()
- }
- print("\n")
-
- level, _, docrash := gotraceback()
- if level > 0 {
- goroutineheader(gp)
- traceback(0)
- if crashing == 0 {
- tracebackothers(gp)
- print("\n")
- }
- dumpregs(info, ctxt)
- }
-
- if docrash {
- crashing++
- if crashing < mcount()-int32(extraMCount) {
- // There are other m's that need to dump their stacks.
- // Relay SIGQUIT to the next m by sending it to the current process.
- // All m's that have already received SIGQUIT have signal masks blocking
- // receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
- // When the last m receives the SIGQUIT, it will fall through to the call to
- // crash below. Just in case the relaying gets botched, each m involved in
- // the relay sleeps for 5 seconds and then does the crash/exit itself.
- // In expected operation, the last m has received the SIGQUIT and run
- // crash/exit and the process is gone, all long before any of the
- // 5-second sleeps have finished.
- print("\n-----\n\n")
- raiseproc(_SIGQUIT)
- usleep(5 * 1000 * 1000)
- }
- crash()
- }
-
- printDebugLog()
-
- exit(2)
-}
diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go
index 365f5dd..29f9443 100644
--- a/libgo/go/runtime/signal_unix.go
+++ b/libgo/go/runtime/signal_unix.go
@@ -42,6 +42,38 @@ const (
_SIG_IGN uintptr = 1
)
+// sigPreempt is the signal used for non-cooperative preemption.
+//
+// There's no good way to choose this signal, but there are some
+// heuristics:
+//
+// 1. It should be a signal that's passed-through by debuggers by
+// default. On Linux, this is SIGALRM, SIGURG, SIGCHLD, SIGIO,
+// SIGVTALRM, SIGPROF, and SIGWINCH, plus some glibc-internal signals.
+//
+// 2. It shouldn't be used internally by libc in mixed Go/C binaries
+// because libc may assume it's the only thing that can handle these
+// signals. For example SIGCANCEL or SIGSETXID.
+//
+// 3. It should be a signal that can happen spuriously without
+// consequences. For example, SIGALRM is a bad choice because the
+// signal handler can't tell if it was caused by the real process
+// alarm or not (arguably this means the signal is broken, but I
+// digress). SIGUSR1 and SIGUSR2 are also bad because those are often
+// used in meaningful ways by applications.
+//
+// 4. We need to deal with platforms without real-time signals (like
+// macOS), so those are out.
+//
+// We use SIGURG because it meets all of these criteria, is extremely
+// unlikely to be used by an application for its "real" meaning (both
+// because out-of-band data is basically unused and because SIGURG
+// doesn't report which socket has the condition, making it pretty
+// useless), and even if it is, the application has to be ready for
+// spurious SIGURG. SIGIO wouldn't be a bad choice either, but is more
+// likely to be used for real.
+const sigPreempt = _SIGURG
+
// Stores the signal handlers registered before Go installed its own.
// These signal handlers will be invoked in cases where Go doesn't want to
// handle a particular signal (e.g., signal occurred on a non-Go thread).
@@ -251,10 +283,26 @@ func setProcessCPUProfiler(hz int32) {
}
} else {
// If the Go signal handler should be disabled by default,
- // disable it if it is enabled.
+ // switch back to the signal handler that was installed
+ // when we enabled profiling. We don't try to handle the case
+ // of a program that changes the SIGPROF handler while Go
+ // profiling is enabled.
+ //
+ // If no signal handler was installed before, then start
+ // ignoring SIGPROF signals. We do this, rather than change
+ // to SIG_DFL, because there may be a pending SIGPROF
+ // signal that has not yet been delivered to some other thread.
+ // If we change to SIG_DFL here, the program will crash
+ // when that SIGPROF is delivered. We assume that programs
+ // that use profiling don't want to crash on a stray SIGPROF.
+ // See issue 19320.
if !sigInstallGoHandler(_SIGPROF) {
if atomic.Cas(&handlingSig[_SIGPROF], 1, 0) {
- setsig(_SIGPROF, atomic.Loaduintptr(&fwdSig[_SIGPROF]))
+ h := atomic.Loaduintptr(&fwdSig[_SIGPROF])
+ if h == _SIG_DFL {
+ h = _SIG_IGN
+ }
+ setsig(_SIGPROF, h)
}
}
}
@@ -283,6 +331,51 @@ func sigpipe() {
dieFromSignal(_SIGPIPE)
}
+// doSigPreempt handles a preemption signal on gp.
+func doSigPreempt(gp *g, ctxt *sigctxt, sigpc uintptr) {
+ // Check if this G wants to be preempted and is safe to
+ // preempt.
+ if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, sigpc) {
+ // Inject a call to asyncPreempt.
+ // ctxt.pushCall(funcPC(asyncPreempt))
+ throw("pushCall not implemented")
+ }
+
+ // Acknowledge the preemption.
+ atomic.Xadd(&gp.m.preemptGen, 1)
+}
+
+// gccgo-specific definition.
+const pushCallSupported = false
+
+const preemptMSupported = pushCallSupported
+
+// preemptM sends a preemption request to mp. This request may be
+// handled asynchronously and may be coalesced with other requests to
+// the M. When the request is received, if the running G or P are
+// marked for preemption and the goroutine is at an asynchronous
+// safe-point, it will preempt the goroutine. It always atomically
+// increments mp.preemptGen after handling a preemption request.
+func preemptM(mp *m) {
+ if !pushCallSupported {
+ // This architecture doesn't support ctxt.pushCall
+ // yet, so doSigPreempt won't work.
+ return
+ }
+ if GOOS == "darwin" && (GOARCH == "arm" || GOARCH == "arm64") && !iscgo {
+ // On darwin, we use libc calls, and cgo is required on ARM and ARM64
+ // so we have TLS set up to save/restore G during C calls. If cgo is
+ // absent, we cannot save/restore G in TLS, and if a signal is
+ // received during C execution we cannot get the G. Therefore don't
+ // send signals.
+ // This can only happen in the go_bootstrap program (otherwise cgo is
+ // required).
+ return
+ }
+ // signalM(mp, sigPreempt)
+ throw("signalM not implemented")
+}
+
// sigtrampgo is called from the signal handler function, sigtramp,
// written in assembly code.
// This is called by the signal handler, and the world may be stopped.
@@ -315,6 +408,183 @@ func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) {
setg(g)
}
+// crashing is the number of m's we have waited for when implementing
+// GOTRACEBACK=crash when a signal is received.
+var crashing int32
+
+// testSigtrap and testSigusr1 are used by the runtime tests. If
+// non-nil, it is called on SIGTRAP/SIGUSR1. If it returns true, the
+// normal behavior on this signal is suppressed.
+var testSigtrap func(info *_siginfo_t, ctxt *sigctxt, gp *g) bool
+var testSigusr1 func(gp *g) bool
+
+// sighandler is invoked when a signal occurs. The global g will be
+// set to a gsignal goroutine and we will be running on the alternate
+// signal stack. The parameter g will be the value of the global g
+// when the signal occurred. The sig, info, and ctxt parameters are
+// from the system signal handler: they are the parameters passed when
+// the SA is passed to the sigaction system call.
+//
+// The garbage collector may have stopped the world, so write barriers
+// are not allowed.
+//
+//go:nowritebarrierrec
+func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) {
+ _g_ := getg()
+ c := &sigctxt{info, ctxt}
+
+ sigfault, sigpc := getSiginfo(info, ctxt)
+
+ if sig == _SIGURG && usestackmaps {
+ // We may be signaled to do a stack scan.
+ // The signal delivery races with enter/exitsyscall.
+ // We may be on g0 stack now. gp.m.curg is the g we
+ // want to scan.
+ // If we're not on g stack, give up. The sender will
+ // try again later.
+ // If we're not stopped at a safepoint (doscanstack will
+ // return false), also give up.
+ if s := readgstatus(gp.m.curg); s == _Gscansyscall {
+ if gp == gp.m.curg {
+ if doscanstack(gp, (*gcWork)(unsafe.Pointer(gp.scangcw))) {
+ gp.gcScannedSyscallStack = true
+ }
+ }
+ gp.m.curg.scangcw = 0
+ notewakeup(&gp.m.scannote)
+ return
+ }
+ }
+
+ if sig == _SIGPROF {
+ sigprof(sigpc, gp, _g_.m)
+ return
+ }
+
+ if sig == _SIGTRAP && testSigtrap != nil && testSigtrap(info, (*sigctxt)(noescape(unsafe.Pointer(c))), gp) {
+ return
+ }
+
+ if sig == _SIGUSR1 && testSigusr1 != nil && testSigusr1(gp) {
+ return
+ }
+
+ if sig == sigPreempt {
+ // Might be a preemption signal.
+ doSigPreempt(gp, c, sigpc)
+ // Even if this was definitely a preemption signal, it
+ // may have been coalesced with another signal, so we
+ // still let it through to the application.
+ }
+
+ flags := int32(_SigThrow)
+ if sig < uint32(len(sigtable)) {
+ flags = sigtable[sig].flags
+ }
+ if flags&_SigPanic != 0 && gp.throwsplit {
+ // We can't safely sigpanic because it may grow the
+ // stack. Abort in the signal handler instead.
+ flags = (flags &^ _SigPanic) | _SigThrow
+ }
+ if isAbortPC(sigpc) {
+ // On many architectures, the abort function just
+ // causes a memory fault. Don't turn that into a panic.
+ flags = _SigThrow
+ }
+ if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+ // Emulate gc by passing arguments out of band,
+ // although we don't really have to.
+ gp.sig = sig
+ gp.sigcode0 = uintptr(c.sigcode())
+ gp.sigcode1 = sigfault
+ gp.sigpc = sigpc
+
+ setg(gp)
+
+ // All signals were blocked due to the sigaction mask;
+ // unblock them.
+ var set sigset
+ sigfillset(&set)
+ sigprocmask(_SIG_UNBLOCK, &set, nil)
+
+ sigpanic()
+ throw("sigpanic returned")
+ }
+
+ if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+ if sigsend(sig) {
+ return
+ }
+ }
+
+ if c.sigcode() == _SI_USER && signal_ignored(sig) {
+ return
+ }
+
+ if flags&_SigKill != 0 {
+ dieFromSignal(sig)
+ }
+
+ if flags&_SigThrow == 0 {
+ return
+ }
+
+ _g_.m.throwing = 1
+ _g_.m.caughtsig.set(gp)
+
+ if crashing == 0 {
+ startpanic_m()
+ }
+
+ if sig < uint32(len(sigtable)) {
+ print(sigtable[sig].name, "\n")
+ } else {
+ print("Signal ", sig, "\n")
+ }
+
+ print("PC=", hex(sigpc), " m=", _g_.m.id, " sigcode=", c.sigcode(), "\n")
+ if _g_.m.lockedg != 0 && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+ print("signal arrived during cgo execution\n")
+ gp = _g_.m.lockedg.ptr()
+ }
+ print("\n")
+
+ level, _, docrash := gotraceback()
+ if level > 0 {
+ goroutineheader(gp)
+ traceback(0)
+ if crashing == 0 {
+ tracebackothers(gp)
+ print("\n")
+ }
+ dumpregs(info, ctxt)
+ }
+
+ if docrash {
+ crashing++
+ if crashing < mcount()-int32(extraMCount) {
+ // There are other m's that need to dump their stacks.
+ // Relay SIGQUIT to the next m by sending it to the current process.
+ // All m's that have already received SIGQUIT have signal masks blocking
+ // receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
+ // When the last m receives the SIGQUIT, it will fall through to the call to
+ // crash below. Just in case the relaying gets botched, each m involved in
+ // the relay sleeps for 5 seconds and then does the crash/exit itself.
+ // In expected operation, the last m has received the SIGQUIT and run
+ // crash/exit and the process is gone, all long before any of the
+ // 5-second sleeps have finished.
+ print("\n-----\n\n")
+ raiseproc(_SIGQUIT)
+ usleep(5 * 1000 * 1000)
+ }
+ crash()
+ }
+
+ printDebugLog()
+
+ exit(2)
+}
+
// sigpanic turns a synchronous signal into a run-time panic.
// If the signal handler sees a synchronous panic, it arranges the
// stack to look like the function where the signal occurred called
@@ -551,11 +821,22 @@ func signalDuringFork(sig uint32) {
throw("signal received during fork")
}
+var badginsignalMsg = "fatal: bad g in signal handler\n"
+
// This runs on a foreign stack, without an m or a g. No stack split.
//go:nosplit
//go:norace
//go:nowritebarrierrec
func badsignal(sig uintptr, c *sigctxt) {
+ if !iscgo && !cgoHasExtraM {
+ // There is no extra M. needm will not be able to grab
+ // an M. Instead of hanging, just crash.
+ // Cannot call split-stack function as there is no G.
+ s := stringStructOf(&badginsignalMsg)
+ write(2, s.str, int32(s.len))
+ exit(2)
+ *(*uintptr)(unsafe.Pointer(uintptr(123))) = 2
+ }
needm(0)
if !sigsend(uint32(sig)) {
// A foreign thread received the signal sig, and the
@@ -596,6 +877,13 @@ func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool {
return true
}
+ // This function and its caller sigtrampgo assumes SIGPIPE is delivered on the
+ // originating thread. This property does not hold on macOS (golang.org/issue/33384),
+ // so we have no choice but to ignore SIGPIPE.
+ if GOOS == "darwin" && sig == _SIGPIPE {
+ return true
+ }
+
// If there is no handler to forward to, no need to forward.
if fwdFn == _SIG_DFL {
return false
@@ -610,8 +898,9 @@ func sigfwdgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) bool {
return false
}
// Determine if the signal occurred inside Go code. We test that:
- // (1) we were in a goroutine (i.e., m.curg != nil), and
- // (2) we weren't in CGO.
+ // (1) we weren't in VDSO page,
+ // (2) we were in a goroutine (i.e., m.curg != nil), and
+ // (3) we weren't in CGO.
g := getg()
if g != nil && g.m != nil && g.m.curg != nil && !g.m.incgo {
return false
@@ -687,13 +976,15 @@ func minitSignals() {
// stack to the gsignal stack. If the alternate signal stack is set
// for the thread (the case when a non-Go thread sets the alternate
// signal stack and then calls a Go function) then set the gsignal
-// stack to the alternate signal stack. Record which choice was made
-// in newSigstack, so that it can be undone in unminit.
+// stack to the alternate signal stack. We also set the alternate
+// signal stack to the gsignal stack if cgo is not used (regardless
+// of whether it is already set). Record which choice was made in
+// newSigstack, so that it can be undone in unminit.
func minitSignalStack() {
_g_ := getg()
var st _stack_t
sigaltstack(nil, &st)
- if st.ss_flags&_SS_DISABLE != 0 {
+ if st.ss_flags&_SS_DISABLE != 0 || !iscgo {
signalstack(_g_.m.gsignalstack, _g_.m.gsignalstacksize)
_g_.m.newSigstack = true
} else {
diff --git a/libgo/go/runtime/signal_windows_test.go b/libgo/go/runtime/signal_windows_test.go
new file mode 100644
index 0000000..9748403
--- /dev/null
+++ b/libgo/go/runtime/signal_windows_test.go
@@ -0,0 +1,61 @@
+// +build windows
+
+package runtime_test
+
+import (
+ "internal/testenv"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "testing"
+)
+
+func TestVectoredHandlerDontCrashOnLibrary(t *testing.T) {
+ if *flagQuick {
+ t.Skip("-quick")
+ }
+ if runtime.GOARCH != "amd64" {
+ t.Skip("this test can only run on windows/amd64")
+ }
+ testenv.MustHaveGoBuild(t)
+ testenv.MustHaveExecPath(t, "gcc")
+ testprog.Lock()
+ defer testprog.Unlock()
+ dir, err := ioutil.TempDir("", "go-build")
+ if err != nil {
+ t.Fatalf("failed to create temp directory: %v", err)
+ }
+ defer os.RemoveAll(dir)
+
+ // build go dll
+ dll := filepath.Join(dir, "testwinlib.dll")
+ cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", dll, "--buildmode", "c-shared", "testdata/testwinlib/main.go")
+ out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build go library: %s\n%s", err, out)
+ }
+
+ // build c program
+ exe := filepath.Join(dir, "test.exe")
+ cmd = exec.Command("gcc", "-L"+dir, "-I"+dir, "-ltestwinlib", "-o", exe, "testdata/testwinlib/main.c")
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failed to build c exe: %s\n%s", err, out)
+ }
+
+ // run test program
+ cmd = exec.Command(exe)
+ out, err = testenv.CleanCmdEnv(cmd).CombinedOutput()
+ if err != nil {
+ t.Fatalf("failure while running executable: %s\n%s", err, out)
+ }
+ expectedOutput := "exceptionCount: 1\ncontinueCount: 1\n"
+ // cleaning output
+ cleanedOut := strings.ReplaceAll(string(out), "\r\n", "\n")
+ if cleanedOut != expectedOutput {
+ t.Errorf("expected output %q, got %q", expectedOutput, cleanedOut)
+ }
+}
diff --git a/libgo/go/runtime/sizeof_test.go b/libgo/go/runtime/sizeof_test.go
index ecda82a..d829c58 100644
--- a/libgo/go/runtime/sizeof_test.go
+++ b/libgo/go/runtime/sizeof_test.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !nacl
-
package runtime_test
import (
diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go
index 49d5a86..b61c2b1 100644
--- a/libgo/go/runtime/slice.go
+++ b/libgo/go/runtime/slice.go
@@ -26,7 +26,7 @@ type slice struct {
cap int
}
-// An notInHeapSlice is a slice backed by go:notinheap memory.
+// A notInHeapSlice is a slice backed by go:notinheap memory.
type notInHeapSlice struct {
array *notInHeap
len int
diff --git a/libgo/go/runtime/stack_test.go b/libgo/go/runtime/stack_test.go
index 6ed65e8..169dde2 100644
--- a/libgo/go/runtime/stack_test.go
+++ b/libgo/go/runtime/stack_test.go
@@ -116,6 +116,13 @@ func TestStackGrowth(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
+
+ if Compiler == "gccgo" && !*Pusestackmaps {
+ // This test is flaky for gccgo's
+ // conservative stack scanning.
+ return
+ }
+
done := make(chan bool)
var startTime time.Time
var started, progress uint32
@@ -599,9 +606,6 @@ func (s structWithMethod) callers() []uintptr {
return pc[:Callers(0, pc)]
}
-// The noinline prevents this function from being inlined
-// into a wrapper. TODO: remove this when issue 28640 is fixed.
-//go:noinline
func (s structWithMethod) stack() string {
buf := make([]byte, 4<<10)
return string(buf[:Stack(buf, false)])
diff --git a/libgo/go/runtime/string.go b/libgo/go/runtime/string.go
index 741b6b4..df4cae7 100644
--- a/libgo/go/runtime/string.go
+++ b/libgo/go/runtime/string.go
@@ -508,3 +508,37 @@ func __go_byte_array_to_string(p unsafe.Pointer, l int) string {
func __go_string_to_byte_array(s string) []byte {
return stringtoslicebyte(nil, s)
}
+
+// parseRelease parses a dot-separated version number. It follows the
+// semver syntax, but allows the minor and patch versions to be
+// elided.
+func parseRelease(rel string) (major, minor, patch int, ok bool) {
+ // Strip anything after a dash or plus.
+ for i := 0; i < len(rel); i++ {
+ if rel[i] == '-' || rel[i] == '+' {
+ rel = rel[:i]
+ break
+ }
+ }
+
+ next := func() (int, bool) {
+ for i := 0; i < len(rel); i++ {
+ if rel[i] == '.' {
+ ver, ok := atoi(rel[:i])
+ rel = rel[i+1:]
+ return ver, ok
+ }
+ }
+ ver, ok := atoi(rel)
+ rel = ""
+ return ver, ok
+ }
+ if major, ok = next(); !ok || rel == "" {
+ return
+ }
+ if minor, ok = next(); !ok || rel == "" {
+ return
+ }
+ patch, ok = next()
+ return
+}
diff --git a/libgo/go/runtime/string_test.go b/libgo/go/runtime/string_test.go
index ec83bb4..e388f70 100644
--- a/libgo/go/runtime/string_test.go
+++ b/libgo/go/runtime/string_test.go
@@ -462,3 +462,34 @@ func TestAtoi32(t *testing.T) {
}
}
}
+
+type parseReleaseTest struct {
+ in string
+ major, minor, patch int
+}
+
+var parseReleaseTests = []parseReleaseTest{
+ {"", -1, -1, -1},
+ {"x", -1, -1, -1},
+ {"5", 5, 0, 0},
+ {"5.12", 5, 12, 0},
+ {"5.12-x", 5, 12, 0},
+ {"5.12.1", 5, 12, 1},
+ {"5.12.1-x", 5, 12, 1},
+ {"5.12.1.0", 5, 12, 1},
+ {"5.20496382327982653440", -1, -1, -1},
+}
+
+func TestParseRelease(t *testing.T) {
+ for _, test := range parseReleaseTests {
+ major, minor, patch, ok := runtime.ParseRelease(test.in)
+ if !ok {
+ major, minor, patch = -1, -1, -1
+ }
+ if test.major != major || test.minor != minor || test.patch != patch {
+ t.Errorf("parseRelease(%q) = (%v, %v, %v) want (%v, %v, %v)",
+ test.in, major, minor, patch,
+ test.major, test.minor, test.patch)
+ }
+ }
+}
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index a2e1530..ae6134d 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -240,11 +240,16 @@ func asmcgocall(fn, arg unsafe.Pointer) int32 {
return 0
}
-// round n up to a multiple of a. a must be a power of 2.
-func round(n, a uintptr) uintptr {
+// alignUp rounds n up to a multiple of a. a must be a power of 2.
+func alignUp(n, a uintptr) uintptr {
return (n + a - 1) &^ (a - 1)
}
+// alignDown rounds n down to a multiple of a. a must be a power of 2.
+func alignDown(n, a uintptr) uintptr {
+ return n &^ (a - 1)
+}
+
// checkASM returns whether assembly runtime checks have passed.
func checkASM() bool {
return true
diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go
index dcbbffa..454afee 100644
--- a/libgo/go/runtime/stubs2.go
+++ b/libgo/go/runtime/stubs2.go
@@ -4,23 +4,27 @@
// +build !plan9
// +build !windows
-// +build !nacl
// +build !js
package runtime
import "unsafe"
+// read calls the read system call.
+// It returns a non-negative number of bytes written or a negative errno value.
//go:noescape
func read(fd int32, p unsafe.Pointer, n int32) int32
+
func closefd(fd int32) int32
//extern exit
func exit(code int32)
func usleep(usec uint32)
+// write calls the write system call.
+// It returns a non-negative number of bytes written or a negative errno value.
//go:noescape
-func write(fd uintptr, p unsafe.Pointer, n int32) int32
+func write1(fd uintptr, p unsafe.Pointer, n int32) int32
//go:noescape
func open(name *byte, mode, perm int32) int32
diff --git a/libgo/go/runtime/stubs3.go b/libgo/go/runtime/stubs3.go
index d339787..35feb10 100644
--- a/libgo/go/runtime/stubs3.go
+++ b/libgo/go/runtime/stubs3.go
@@ -4,4 +4,4 @@
package runtime
-func nanotime() int64
+func nanotime1() int64
diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go
index a2ecf38..5dd3894 100644
--- a/libgo/go/runtime/symtab.go
+++ b/libgo/go/runtime/symtab.go
@@ -79,7 +79,7 @@ func (ci *Frames) Next() (frame Frame, more bool) {
// Subtract 1 from PC to undo the 1 we added in callback in
// go-callers.c.
- function, file, line, _ := funcfileline(pc-1, int32(i))
+ function, file, line, _ := funcfileline(pc-1, int32(i), more)
if function == "" && file == "" {
return Frame{}, more
}
@@ -127,7 +127,7 @@ type Func struct {
// the a *Func describing the innermost function, but with an entry
// of the outermost function.
func FuncForPC(pc uintptr) *Func {
- name, _, _, _ := funcfileline(pc, -1)
+ name, _, _, _ := funcfileline(pc, -1, false)
if name == "" {
return nil
}
@@ -156,7 +156,7 @@ func (f *Func) Entry() uintptr {
// The result will not be accurate if pc is not a program
// counter within f.
func (f *Func) FileLine(pc uintptr) (file string, line int) {
- _, file, line, _ = funcfileline(pc, -1)
+ _, file, line, _ = funcfileline(pc, -1, false)
return file, line
}
@@ -230,5 +230,5 @@ func demangleSymbol(s string) string {
}
// implemented in go-caller.c
-func funcfileline(uintptr, int32) (string, string, int, int)
+func funcfileline(uintptr, int32, bool) (string, string, int, int)
func funcentry(uintptr) uintptr
diff --git a/libgo/go/runtime/testdata/testfaketime/faketime.go b/libgo/go/runtime/testdata/testfaketime/faketime.go
new file mode 100644
index 0000000..1fb15eb
--- /dev/null
+++ b/libgo/go/runtime/testdata/testfaketime/faketime.go
@@ -0,0 +1,28 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test faketime support. This is its own test program because we have
+// to build it with custom build tags and hence want to minimize
+// dependencies.
+
+package main
+
+import (
+ "os"
+ "time"
+)
+
+func main() {
+ println("line 1")
+ // Stream switch, increments time
+ os.Stdout.WriteString("line 2\n")
+ os.Stdout.WriteString("line 3\n")
+ // Stream switch, increments time
+ os.Stderr.WriteString("line 4\n")
+ // Time jump
+ time.Sleep(1 * time.Second)
+ os.Stdout.WriteString("line 5\n")
+ // Print the current time.
+ os.Stdout.WriteString(time.Now().UTC().Format(time.RFC3339))
+}
diff --git a/libgo/go/runtime/testdata/testprog/deadlock.go b/libgo/go/runtime/testdata/testprog/deadlock.go
index 5f0d120..105d6a5 100644
--- a/libgo/go/runtime/testdata/testprog/deadlock.go
+++ b/libgo/go/runtime/testdata/testprog/deadlock.go
@@ -22,6 +22,9 @@ func init() {
register("StackOverflow", StackOverflow)
register("ThreadExhaustion", ThreadExhaustion)
register("RecursivePanic", RecursivePanic)
+ register("RecursivePanic2", RecursivePanic2)
+ register("RecursivePanic3", RecursivePanic3)
+ register("RecursivePanic4", RecursivePanic4)
register("GoexitExit", GoexitExit)
register("GoNil", GoNil)
register("MainGoroutineID", MainGoroutineID)
@@ -29,6 +32,8 @@ func init() {
register("GoexitInPanic", GoexitInPanic)
register("PanicAfterGoexit", PanicAfterGoexit)
register("RecoveredPanicAfterGoexit", RecoveredPanicAfterGoexit)
+ register("RecoverBeforePanicAfterGoexit", RecoverBeforePanicAfterGoexit)
+ register("RecoverBeforePanicAfterGoexit2", RecoverBeforePanicAfterGoexit2)
register("PanicTraceback", PanicTraceback)
register("GoschedInPanic", GoschedInPanic)
register("SyscallInPanic", SyscallInPanic)
@@ -111,6 +116,50 @@ func RecursivePanic() {
panic("again")
}
+// Same as RecursivePanic, but do the first recover and the second panic in
+// separate defers, and make sure they are executed in the correct order.
+func RecursivePanic2() {
+ func() {
+ defer func() {
+ fmt.Println(recover())
+ }()
+ var x [8192]byte
+ func(x [8192]byte) {
+ defer func() {
+ panic("second panic")
+ }()
+ defer func() {
+ fmt.Println(recover())
+ }()
+ panic("first panic")
+ }(x)
+ }()
+ panic("third panic")
+}
+
+// Make sure that the first panic finished as a panic, even though the second
+// panic was recovered
+func RecursivePanic3() {
+ defer func() {
+ defer func() {
+ recover()
+ }()
+ panic("second panic")
+ }()
+ panic("first panic")
+}
+
+// Test case where a single defer recovers one panic but starts another panic. If
+// the second panic is never recovered, then the recovered first panic will still
+// appear on the panic stack (labeled '[recovered]') and the runtime stack.
+func RecursivePanic4() {
+ defer func() {
+ recover()
+ panic("second panic")
+ }()
+ panic("first panic")
+}
+
func GoexitExit() {
println("t1")
go func() {
@@ -202,6 +251,50 @@ func RecoveredPanicAfterGoexit() {
runtime.Goexit()
}
+func RecoverBeforePanicAfterGoexit() {
+ // 1. defer a function that recovers
+ // 2. defer a function that panics
+ // 3. call goexit
+ // Goexit runs the #2 defer. Its panic
+ // is caught by the #1 defer. For Goexit, we explicitly
+ // resume execution in the Goexit loop, instead of resuming
+ // execution in the caller (which would make the Goexit disappear!)
+ defer func() {
+ r := recover()
+ if r == nil {
+ panic("bad recover")
+ }
+ }()
+ defer func() {
+ panic("hello")
+ }()
+ runtime.Goexit()
+}
+
+func RecoverBeforePanicAfterGoexit2() {
+ for i := 0; i < 2; i++ {
+ defer func() {
+ }()
+ }
+ // 1. defer a function that recovers
+ // 2. defer a function that panics
+ // 3. call goexit
+ // Goexit runs the #2 defer. Its panic
+ // is caught by the #1 defer. For Goexit, we explicitly
+ // resume execution in the Goexit loop, instead of resuming
+ // execution in the caller (which would make the Goexit disappear!)
+ defer func() {
+ r := recover()
+ if r == nil {
+ panic("bad recover")
+ }
+ }()
+ defer func() {
+ panic("hello")
+ }()
+ runtime.Goexit()
+}
+
func PanicTraceback() {
pt1()
}
diff --git a/libgo/go/runtime/testdata/testprog/gc.go b/libgo/go/runtime/testdata/testprog/gc.go
index 3fd1cd8..cc16413 100644
--- a/libgo/go/runtime/testdata/testprog/gc.go
+++ b/libgo/go/runtime/testdata/testprog/gc.go
@@ -27,7 +27,6 @@ func GCSys() {
runtime.GC()
runtime.ReadMemStats(memstats)
sys := memstats.Sys
- fmt.Printf("original sys: %#x\n", sys)
runtime.MemProfileRate = 0 // disable profiler
@@ -39,8 +38,6 @@ func GCSys() {
// Should only be using a few MB.
// We allocated 100 MB or (if not short) 1 GB.
runtime.ReadMemStats(memstats)
- fmt.Printf("final sys: %#x\n", memstats.Sys)
- fmt.Printf("%#v\n", *memstats)
if sys > memstats.Sys {
sys = 0
} else {
@@ -150,9 +147,20 @@ func GCPhys() {
size = 4 << 20
split = 64 << 10
objects = 2
+
+ // The page cache could hide 64 8-KiB pages from the scavenger today.
+ maxPageCache = (8 << 10) * 64
)
// Set GOGC so that this test operates under consistent assumptions.
debug.SetGCPercent(100)
+ // Reduce GOMAXPROCS down to 4 if it's greater. We need to bound the amount
+ // of memory held in the page cache because the scavenger can't reach it.
+ // The page cache will hold at most maxPageCache of memory per-P, so this
+ // bounds the amount of memory hidden from the scavenger to 4*maxPageCache.
+ procs := runtime.GOMAXPROCS(-1)
+ if procs > 4 {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(4))
+ }
// Save objects which we want to survive, and condemn objects which we don't.
// Note that we condemn objects in this way and release them all at once in
// order to avoid having the GC start freeing up these objects while the loop
@@ -200,10 +208,22 @@ func GCPhys() {
// Since the runtime should scavenge the entirety of the remaining holes,
// theoretically there should be no more free and unscavenged memory. However due
// to other allocations that happen during this test we may still see some physical
- // memory over-use. 10% here is an arbitrary but very conservative threshold which
- // should easily account for any other allocations this test may have done.
+ // memory over-use.
overuse := (float64(heapBacked) - float64(stats.HeapAlloc)) / float64(stats.HeapAlloc)
- if overuse <= 0.10 {
+ // Compute the threshold.
+ //
+ // In theory, this threshold should just be zero, but that's not possible in practice.
+ // Firstly, the runtime's page cache can hide up to maxPageCache of free memory from the
+ // scavenger per P. To account for this, we increase the threshold by the ratio between the
+ // total amount the runtime could hide from the scavenger to the amount of memory we expect
+ // to be able to scavenge here, which is (size-split)*objects. This computation is the crux
+ // GOMAXPROCS above; if GOMAXPROCS is too high the threshold just becomes 100%+ since the
+ // amount of memory being allocated is fixed. Then we add 5% to account for noise, such as
+ // other allocations this test may have performed that we don't explicitly account for The
+ // baseline threshold here is around 11% for GOMAXPROCS=1, capping out at around 30% for
+ // GOMAXPROCS=4.
+ threshold := 0.05 + float64(procs)*maxPageCache/float64((size-split)*objects)
+ if overuse <= threshold {
fmt.Println("OK")
return
}
@@ -213,8 +233,8 @@ func GCPhys() {
// In the context of this test, this indicates a large amount of
// fragmentation with physical pages that are otherwise unused but not
// returned to the OS.
- fmt.Printf("exceeded physical memory overuse threshold of 10%%: %3.2f%%\n"+
- "(alloc: %d, goal: %d, sys: %d, rel: %d, objs: %d)\n", overuse*100,
+ fmt.Printf("exceeded physical memory overuse threshold of %3.2f%%: %3.2f%%\n"+
+ "(alloc: %d, goal: %d, sys: %d, rel: %d, objs: %d)\n", threshold*100, overuse*100,
stats.HeapAlloc, stats.NextGC, stats.HeapSys, stats.HeapReleased, len(saved))
runtime.KeepAlive(saved)
}
diff --git a/libgo/go/runtime/testdata/testprog/preempt.go b/libgo/go/runtime/testdata/testprog/preempt.go
new file mode 100644
index 0000000..1c74d0e
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/preempt.go
@@ -0,0 +1,71 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "runtime"
+ "runtime/debug"
+ "sync/atomic"
+)
+
+func init() {
+ register("AsyncPreempt", AsyncPreempt)
+}
+
+func AsyncPreempt() {
+ // Run with just 1 GOMAXPROCS so the runtime is required to
+ // use scheduler preemption.
+ runtime.GOMAXPROCS(1)
+ // Disable GC so we have complete control of what we're testing.
+ debug.SetGCPercent(-1)
+
+ // Start a goroutine with no sync safe-points.
+ var ready, ready2 uint32
+ go func() {
+ for {
+ atomic.StoreUint32(&ready, 1)
+ dummy()
+ dummy()
+ }
+ }()
+ // Also start one with a frameless function.
+ // This is an especially interesting case for
+ // LR machines.
+ go func() {
+ atomic.AddUint32(&ready2, 1)
+ frameless()
+ }()
+ // Also test empty infinite loop.
+ go func() {
+ atomic.AddUint32(&ready2, 1)
+ for {
+ }
+ }()
+
+ // Wait for the goroutine to stop passing through sync
+ // safe-points.
+ for atomic.LoadUint32(&ready) == 0 || atomic.LoadUint32(&ready2) < 2 {
+ runtime.Gosched()
+ }
+
+ // Run a GC, which will have to stop the goroutine for STW and
+ // for stack scanning. If this doesn't work, the test will
+ // deadlock and timeout.
+ runtime.GC()
+
+ println("OK")
+}
+
+//go:noinline
+func frameless() {
+ for i := int64(0); i < 1<<62; i++ {
+ out += i * i * i * i * i * 12345
+ }
+}
+
+var out int64
+
+//go:noinline
+func dummy() {}
diff --git a/libgo/go/runtime/testdata/testprog/signal.go b/libgo/go/runtime/testdata/testprog/signal.go
index 2ccbada..417e105 100644
--- a/libgo/go/runtime/testdata/testprog/signal.go
+++ b/libgo/go/runtime/testdata/testprog/signal.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !windows,!plan9,!nacl
+// +build !windows,!plan9
package main
diff --git a/libgo/go/runtime/testdata/testprog/vdso.go b/libgo/go/runtime/testdata/testprog/vdso.go
new file mode 100644
index 0000000..ef92f48
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprog/vdso.go
@@ -0,0 +1,55 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Invoke signal hander in the VDSO context (see issue 32912).
+
+package main
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "runtime/pprof"
+ "time"
+)
+
+func init() {
+ register("SignalInVDSO", signalInVDSO)
+}
+
+func signalInVDSO() {
+ f, err := ioutil.TempFile("", "timeprofnow")
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ if err := pprof.StartCPUProfile(f); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ t0 := time.Now()
+ t1 := t0
+ // We should get a profiling signal 100 times a second,
+ // so running for 1 second should be sufficient.
+ for t1.Sub(t0) < time.Second {
+ t1 = time.Now()
+ }
+
+ pprof.StopCPUProfile()
+
+ name := f.Name()
+ if err := f.Close(); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ if err := os.Remove(name); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+ }
+
+ fmt.Println("success")
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.c b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.c
new file mode 100644
index 0000000..cd85ac8
--- /dev/null
+++ b/libgo/go/runtime/testdata/testprogcgo/bigstack_windows.c
@@ -0,0 +1,46 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This test source is used by both TestBigStackCallbackCgo (linked
+// directly into the Go binary) and TestBigStackCallbackSyscall
+// (compiled into a DLL).
+
+#include <windows.h>
+#include <stdio.h>
+
+#ifndef STACK_SIZE_PARAM_IS_A_RESERVATION
+#define STACK_SIZE_PARAM_IS_A_RESERVATION 0x00010000
+#endif
+
+typedef void callback(char*);
+
+// Allocate a stack that's much larger than the default.
+static const int STACK_SIZE = 16<<20;
+
+static callback *bigStackCallback;
+
+static void useStack(int bytes) {
+ // Windows doesn't like huge frames, so we grow the stack 64k at a time.
+ char x[64<<10];
+ if (bytes < sizeof x) {
+ bigStackCallback(x);
+ } else {
+ useStack(bytes - sizeof x);
+ }
+}
+
+static DWORD WINAPI threadEntry(LPVOID lpParam) {
+ useStack(STACK_SIZE - (128<<10));
+ return 0;
+}
+
+void bigStack(callback *cb) {
+ bigStackCallback = cb;
+ HANDLE hThread = CreateThread(NULL, STACK_SIZE, threadEntry, NULL, STACK_SIZE_PARAM_IS_A_RESERVATION, NULL);
+ if (hThread == NULL) {
+ fprintf(stderr, "CreateThread failed\n");
+ exit(1);
+ }
+ WaitForSingleObject(hThread, INFINITE);
+}
diff --git a/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go b/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go
index 68f1738..b7134a4 100644
--- a/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go
+++ b/libgo/go/runtime/testdata/testprogcgo/numgoroutine.go
@@ -41,13 +41,6 @@ func NumGoroutine() {
// Test that there are just the expected number of goroutines
// running. Specifically, test that the spare M's goroutine
// doesn't show up.
- //
- // On non-Windows platforms there's a signal handling thread
- // started by os/signal.init in addition to the main
- // goroutine.
- if runtime.GOOS != "windows" {
- baseGoroutines = 1
- }
if _, ok := checkNumGoroutine("first", 1+baseGoroutines); !ok {
return
}
diff --git a/libgo/go/runtime/testdata/testprognet/signal.go b/libgo/go/runtime/testdata/testprognet/signal.go
index a1559fe..4d2de79 100644
--- a/libgo/go/runtime/testdata/testprognet/signal.go
+++ b/libgo/go/runtime/testdata/testprognet/signal.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !windows,!plan9,!nacl
+// +build !windows,!plan9
// This is in testprognet instead of testprog because testprog
// must not import anything (like net, but also like os/signal)
diff --git a/libgo/go/runtime/testdata/testwinlib/main.c b/libgo/go/runtime/testdata/testwinlib/main.c
new file mode 100644
index 0000000..e84a32f
--- /dev/null
+++ b/libgo/go/runtime/testdata/testwinlib/main.c
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include <windows.h>
+#include "testwinlib.h"
+
+int exceptionCount;
+int continueCount;
+LONG WINAPI customExceptionHandlder(struct _EXCEPTION_POINTERS *ExceptionInfo)
+{
+ if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_BREAKPOINT)
+ {
+ exceptionCount++;
+ // prepare context to resume execution
+ CONTEXT *c = ExceptionInfo->ContextRecord;
+ c->Rip = *(ULONG_PTR *)c->Rsp;
+ c->Rsp += 8;
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+LONG WINAPI customContinueHandlder(struct _EXCEPTION_POINTERS *ExceptionInfo)
+{
+ if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_BREAKPOINT)
+ {
+ continueCount++;
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+void throwFromC()
+{
+ DebugBreak();
+}
+int main()
+{
+ // simulate a "lazily" attached debugger, by calling some go code before attaching the exception/continue handler
+ Dummy();
+ exceptionCount = 0;
+ continueCount = 0;
+ void *exceptionHandlerHandle = AddVectoredExceptionHandler(0, customExceptionHandlder);
+ if (NULL == exceptionHandlerHandle)
+ {
+ printf("cannot add vectored exception handler\n");
+ return 2;
+ }
+ void *continueHandlerHandle = AddVectoredContinueHandler(0, customContinueHandlder);
+ if (NULL == continueHandlerHandle)
+ {
+ printf("cannot add vectored continue handler\n");
+ return 2;
+ }
+ CallMeBack(throwFromC);
+ RemoveVectoredContinueHandler(continueHandlerHandle);
+ RemoveVectoredExceptionHandler(exceptionHandlerHandle);
+ printf("exceptionCount: %d\ncontinueCount: %d\n", exceptionCount, continueCount);
+ return 0;
+} \ No newline at end of file
diff --git a/libgo/go/runtime/testdata/testwinlib/main.go b/libgo/go/runtime/testdata/testwinlib/main.go
new file mode 100644
index 0000000..400eaa1
--- /dev/null
+++ b/libgo/go/runtime/testdata/testwinlib/main.go
@@ -0,0 +1,28 @@
+// +build windows,cgo
+
+package main
+
+// #include <windows.h>
+// typedef void(*callmeBackFunc)();
+// static void bridgeCallback(callmeBackFunc callback) {
+// callback();
+//}
+import "C"
+
+// CallMeBack call backs C code.
+//export CallMeBack
+func CallMeBack(callback C.callmeBackFunc) {
+ C.bridgeCallback(callback)
+}
+
+// Dummy is called by the C code before registering the exception/continue handlers simulating a debugger.
+// This makes sure that the Go runtime's lastcontinuehandler is reached before the C continue handler and thus,
+// validate that it does not crash the program before another handler could take an action.
+// The idea here is to reproduce what happens when you attach a debugger to a running program.
+// It also simulate the behavior of the .Net debugger, which register its exception/continue handlers lazily.
+//export Dummy
+func Dummy() int {
+ return 42
+}
+
+func main() {}
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index 71e7556..ded68ed 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -7,17 +7,17 @@
package runtime
import (
- "internal/cpu"
+ "runtime/internal/atomic"
"unsafe"
)
// Package time knows the layout of this structure.
// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
-// For GOOS=nacl, package syscall knows the layout of this structure.
-// If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer.
type timer struct {
- tb *timersBucket // the bucket the timer lives in
- i int // heap index
+ // If this timer is on a heap, which P's heap it is on.
+ // puintptr rather than *p to match uintptr in the versions
+ // of this struct defined in other packages.
+ pp puintptr
// Timer wakes up at when, and then at when+period, ... (period > 0 only)
// each time calling f(arg, now) in the timer goroutine, so f must be
@@ -27,52 +27,146 @@ type timer struct {
f func(interface{}, uintptr)
arg interface{}
seq uintptr
+
+ // What to set the when field to in timerModifiedXX status.
+ nextwhen int64
+
+ // The status field holds one of the values below.
+ status uint32
}
-// timersLen is the length of timers array.
+// Code outside this file has to be careful in using a timer value.
//
-// Ideally, this would be set to GOMAXPROCS, but that would require
-// dynamic reallocation
+// The pp, status, and nextwhen fields may only be used by code in this file.
//
-// The current value is a compromise between memory usage and performance
-// that should cover the majority of GOMAXPROCS values used in the wild.
-const timersLen = 64
-
-// timers contains "per-P" timer heaps.
+// Code that creates a new timer value can set the when, period, f,
+// arg, and seq fields.
+// A new timer value may be passed to addtimer (called by time.startTimer).
+// After doing that no fields may be touched.
//
-// Timers are queued into timersBucket associated with the current P,
-// so each P may work with its own timers independently of other P instances.
+// An active timer (one that has been passed to addtimer) may be
+// passed to deltimer (time.stopTimer), after which it is no longer an
+// active timer. It is an inactive timer.
+// In an inactive timer the period, f, arg, and seq fields may be modified,
+// but not the when field.
+// It's OK to just drop an inactive timer and let the GC collect it.
+// It's not OK to pass an inactive timer to addtimer.
+// Only newly allocated timer values may be passed to addtimer.
//
-// Each timersBucket may be associated with multiple P
-// if GOMAXPROCS > timersLen.
-var timers [timersLen]struct {
- timersBucket
-
- // The padding should eliminate false sharing
- // between timersBucket values.
- pad [cpu.CacheLinePadSize - unsafe.Sizeof(timersBucket{})%cpu.CacheLinePadSize]byte
-}
+// An active timer may be passed to modtimer. No fields may be touched.
+// It remains an active timer.
+//
+// An inactive timer may be passed to resettimer to turn into an
+// active timer with an updated when field.
+// It's OK to pass a newly allocated timer value to resettimer.
+//
+// Timer operations are addtimer, deltimer, modtimer, resettimer,
+// cleantimers, adjusttimers, and runtimer.
+//
+// We don't permit calling addtimer/deltimer/modtimer/resettimer simultaneously,
+// but adjusttimers and runtimer can be called at the same time as any of those.
+//
+// Active timers live in heaps attached to P, in the timers field.
+// Inactive timers live there too temporarily, until they are removed.
+//
+// addtimer:
+// timerNoStatus -> timerWaiting
+// anything else -> panic: invalid value
+// deltimer:
+// timerWaiting -> timerDeleted
+// timerModifiedXX -> timerDeleted
+// timerNoStatus -> do nothing
+// timerDeleted -> do nothing
+// timerRemoving -> do nothing
+// timerRemoved -> do nothing
+// timerRunning -> wait until status changes
+// timerMoving -> wait until status changes
+// timerModifying -> panic: concurrent deltimer/modtimer calls
+// modtimer:
+// timerWaiting -> timerModifying -> timerModifiedXX
+// timerModifiedXX -> timerModifying -> timerModifiedYY
+// timerNoStatus -> timerWaiting
+// timerRemoved -> timerWaiting
+// timerRunning -> wait until status changes
+// timerMoving -> wait until status changes
+// timerRemoving -> wait until status changes
+// timerDeleted -> panic: concurrent modtimer/deltimer calls
+// timerModifying -> panic: concurrent modtimer calls
+// resettimer:
+// timerNoStatus -> timerWaiting
+// timerRemoved -> timerWaiting
+// timerDeleted -> timerModifying -> timerModifiedXX
+// timerRemoving -> wait until status changes
+// timerRunning -> wait until status changes
+// timerWaiting -> panic: resettimer called on active timer
+// timerMoving -> panic: resettimer called on active timer
+// timerModifiedXX -> panic: resettimer called on active timer
+// timerModifying -> panic: resettimer called on active timer
+// cleantimers (looks in P's timer heap):
+// timerDeleted -> timerRemoving -> timerRemoved
+// timerModifiedXX -> timerMoving -> timerWaiting
+// adjusttimers (looks in P's timer heap):
+// timerDeleted -> timerRemoving -> timerRemoved
+// timerModifiedXX -> timerMoving -> timerWaiting
+// runtimer (looks in P's timer heap):
+// timerNoStatus -> panic: uninitialized timer
+// timerWaiting -> timerWaiting or
+// timerWaiting -> timerRunning -> timerNoStatus or
+// timerWaiting -> timerRunning -> timerWaiting
+// timerModifying -> wait until status changes
+// timerModifiedXX -> timerMoving -> timerWaiting
+// timerDeleted -> timerRemoving -> timerRemoved
+// timerRunning -> panic: concurrent runtimer calls
+// timerRemoved -> panic: inconsistent timer heap
+// timerRemoving -> panic: inconsistent timer heap
+// timerMoving -> panic: inconsistent timer heap
-func (t *timer) assignBucket() *timersBucket {
- id := uint8(getg().m.p.ptr().id) % timersLen
- t.tb = &timers[id].timersBucket
- return t.tb
-}
+// Values for the timer status field.
+const (
+ // Timer has no status set yet.
+ timerNoStatus = iota
-//go:notinheap
-type timersBucket struct {
- lock mutex
- gp *g
- created bool
- sleeping bool
- rescheduling bool
- sleepUntil int64
- waitnote note
- t []*timer
-}
+ // Waiting for timer to fire.
+ // The timer is in some P's heap.
+ timerWaiting
+
+ // Running the timer function.
+ // A timer will only have this status briefly.
+ timerRunning
+
+ // The timer is deleted and should be removed.
+ // It should not be run, but it is still in some P's heap.
+ timerDeleted
-// nacl fake time support - time in nanoseconds since 1970
-var faketime int64
+ // The timer is being removed.
+ // The timer will only have this status briefly.
+ timerRemoving
+
+ // The timer has been stopped.
+ // It is not in any P's heap.
+ timerRemoved
+
+ // The timer is being modified.
+ // The timer will only have this status briefly.
+ timerModifying
+
+ // The timer has been modified to an earlier time.
+ // The new when value is in the nextwhen field.
+ // The timer is in some P's heap, possibly in the wrong place.
+ timerModifiedEarlier
+
+ // The timer has been modified to the same or a later time.
+ // The new when value is in the nextwhen field.
+ // The timer is in some P's heap, possibly in the wrong place.
+ timerModifiedLater
+
+ // The timer has been modified and is being moved.
+ // The timer will only have this status briefly.
+ timerMoving
+)
+
+// maxWhen is the maximum value for timer's when field.
+const maxWhen = 1<<63 - 1
// Package time APIs.
// Godoc uses the comments in package time, not these.
@@ -92,17 +186,20 @@ func timeSleep(ns int64) {
t = new(timer)
gp.timer = t
}
- *t = timer{}
- t.when = nanotime() + ns
t.f = goroutineReady
t.arg = gp
- tb := t.assignBucket()
- lock(&tb.lock)
- if !tb.addtimerLocked(t) {
- unlock(&tb.lock)
- badTimer()
- }
- goparkunlock(&tb.lock, waitReasonSleep, traceEvGoSleep, 2)
+ t.nextwhen = nanotime() + ns
+ gopark(resetForSleep, unsafe.Pointer(t), waitReasonSleep, traceEvGoSleep, 1)
+}
+
+// resetForSleep is called after the goroutine is parked for timeSleep.
+// We can't call resettimer in timeSleep itself because if this is a short
+// sleep and there are many goroutines then the P can wind up running the
+// timer function, goroutineReady, before the goroutine has been parked.
+func resetForSleep(gp *g, ut unsafe.Pointer) bool {
+ t := (*timer)(ut)
+ resettimer(t, t.nextwhen)
+ return true
}
// startTimer adds t to the timer heap.
@@ -114,13 +211,22 @@ func startTimer(t *timer) {
addtimer(t)
}
-// stopTimer removes t from the timer heap if it is there.
-// It returns true if t was removed, false if t wasn't even there.
+// stopTimer stops a timer.
+// It reports whether t was stopped before being run.
//go:linkname stopTimer time.stopTimer
func stopTimer(t *timer) bool {
return deltimer(t)
}
+// resetTimer resets an inactive timer, adding it to the heap.
+//go:linkname resetTimer time.resetTimer
+func resetTimer(t *timer, when int64) {
+ if raceenabled {
+ racerelease(unsafe.Pointer(t))
+ }
+ resettimer(t, when)
+}
+
// Go runtime.
// Ready the goroutine arg.
@@ -128,252 +234,714 @@ func goroutineReady(arg interface{}, seq uintptr) {
goready(arg.(*g), 0)
}
+// addtimer adds a timer to the current P.
+// This should only be called with a newly created timer.
+// That avoids the risk of changing the when field of a timer in some P's heap,
+// which could cause the heap to become unsorted.
func addtimer(t *timer) {
- tb := t.assignBucket()
- lock(&tb.lock)
- ok := tb.addtimerLocked(t)
- unlock(&tb.lock)
- if !ok {
- badTimer()
- }
-}
-
-// Add a timer to the heap and start or kick timerproc if the new timer is
-// earlier than any of the others.
-// Timers are locked.
-// Returns whether all is well: false if the data structure is corrupt
-// due to user-level races.
-func (tb *timersBucket) addtimerLocked(t *timer) bool {
- // when must never be negative; otherwise timerproc will overflow
+ // when must never be negative; otherwise runtimer will overflow
// during its delta calculation and never expire other runtime timers.
if t.when < 0 {
- t.when = 1<<63 - 1
+ t.when = maxWhen
}
- t.i = len(tb.t)
- tb.t = append(tb.t, t)
- if !siftupTimer(tb.t, t.i) {
- return false
+ if t.status != timerNoStatus {
+ badTimer()
}
- if t.i == 0 {
- // siftup moved to top: new earliest deadline.
- if tb.sleeping && tb.sleepUntil > t.when {
- tb.sleeping = false
- notewakeup(&tb.waitnote)
- }
- if tb.rescheduling {
- tb.rescheduling = false
- goready(tb.gp, 0)
- }
- if !tb.created {
- tb.created = true
- expectSystemGoroutine()
- go timerproc(tb)
- }
+ t.status = timerWaiting
+
+ addInitializedTimer(t)
+}
+
+// addInitializedTimer adds an initialized timer to the current P.
+func addInitializedTimer(t *timer) {
+ when := t.when
+
+ pp := getg().m.p.ptr()
+ lock(&pp.timersLock)
+ ok := cleantimers(pp) && doaddtimer(pp, t)
+ unlock(&pp.timersLock)
+ if !ok {
+ badTimer()
}
- return true
+
+ wakeNetPoller(when)
}
-// Delete timer t from the heap.
-// Do not need to update the timerproc: if it wakes up early, no big deal.
-func deltimer(t *timer) bool {
- if t.tb == nil {
- // t.tb can be nil if the user created a timer
- // directly, without invoking startTimer e.g
- // time.Ticker{C: c}
- // In this case, return early without any deletion.
- // See Issue 21874.
- return false
+// doaddtimer adds t to the current P's heap.
+// It reports whether it saw no problems due to races.
+// The caller must have locked the timers for pp.
+func doaddtimer(pp *p, t *timer) bool {
+ // Timers rely on the network poller, so make sure the poller
+ // has started.
+ if netpollInited == 0 {
+ netpollGenericInit()
}
- tb := t.tb
+ if t.pp != 0 {
+ throw("doaddtimer: P already set in timer")
+ }
+ t.pp.set(pp)
+ i := len(pp.timers)
+ pp.timers = append(pp.timers, t)
+ return siftupTimer(pp.timers, i)
+}
- lock(&tb.lock)
- removed, ok := tb.deltimerLocked(t)
- unlock(&tb.lock)
- if !ok {
- badTimer()
+// deltimer deletes the timer t. It may be on some other P, so we can't
+// actually remove it from the timers heap. We can only mark it as deleted.
+// It will be removed in due course by the P whose heap it is on.
+// Reports whether the timer was removed before it was run.
+func deltimer(t *timer) bool {
+ for {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting, timerModifiedLater:
+ if atomic.Cas(&t.status, s, timerDeleted) {
+ // Timer was not yet run.
+ return true
+ }
+ case timerModifiedEarlier:
+ tpp := t.pp.ptr()
+ if atomic.Cas(&t.status, s, timerModifying) {
+ atomic.Xadd(&tpp.adjustTimers, -1)
+ if !atomic.Cas(&t.status, timerModifying, timerDeleted) {
+ badTimer()
+ }
+ // Timer was not yet run.
+ return true
+ }
+ case timerDeleted, timerRemoving, timerRemoved:
+ // Timer was already run.
+ return false
+ case timerRunning, timerMoving:
+ // The timer is being run or moved, by a different P.
+ // Wait for it to complete.
+ osyield()
+ case timerNoStatus:
+ // Removing timer that was never added or
+ // has already been run. Also see issue 21874.
+ return false
+ case timerModifying:
+ // Simultaneous calls to deltimer and modtimer.
+ badTimer()
+ default:
+ badTimer()
+ }
}
- return removed
}
-func (tb *timersBucket) deltimerLocked(t *timer) (removed, ok bool) {
- // t may not be registered anymore and may have
- // a bogus i (typically 0, if generated by Go).
- // Verify it before proceeding.
- i := t.i
- last := len(tb.t) - 1
- if i < 0 || i > last || tb.t[i] != t {
- return false, true
+// dodeltimer removes timer i from the current P's heap.
+// We are locked on the P when this is called.
+// It reports whether it saw no problems due to races.
+// The caller must have locked the timers for pp.
+func dodeltimer(pp *p, i int) bool {
+ if t := pp.timers[i]; t.pp.ptr() != pp {
+ throw("dodeltimer: wrong P")
+ } else {
+ t.pp = 0
}
+ last := len(pp.timers) - 1
if i != last {
- tb.t[i] = tb.t[last]
- tb.t[i].i = i
+ pp.timers[i] = pp.timers[last]
}
- tb.t[last] = nil
- tb.t = tb.t[:last]
- ok = true
+ pp.timers[last] = nil
+ pp.timers = pp.timers[:last]
+ ok := true
if i != last {
- if !siftupTimer(tb.t, i) {
+ // Moving to i may have moved the last timer to a new parent,
+ // so sift up to preserve the heap guarantee.
+ if !siftupTimer(pp.timers, i) {
ok = false
}
- if !siftdownTimer(tb.t, i) {
+ if !siftdownTimer(pp.timers, i) {
ok = false
}
}
- return true, ok
+ return ok
}
+// dodeltimer0 removes timer 0 from the current P's heap.
+// We are locked on the P when this is called.
+// It reports whether it saw no problems due to races.
+// The caller must have locked the timers for pp.
+func dodeltimer0(pp *p) bool {
+ if t := pp.timers[0]; t.pp.ptr() != pp {
+ throw("dodeltimer0: wrong P")
+ } else {
+ t.pp = 0
+ }
+ last := len(pp.timers) - 1
+ if last > 0 {
+ pp.timers[0] = pp.timers[last]
+ }
+ pp.timers[last] = nil
+ pp.timers = pp.timers[:last]
+ ok := true
+ if last > 0 {
+ ok = siftdownTimer(pp.timers, 0)
+ }
+ return ok
+}
+
+// modtimer modifies an existing timer.
+// This is called by the netpoll code.
func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) {
- tb := t.tb
+ if when < 0 {
+ when = maxWhen
+ }
- lock(&tb.lock)
- _, ok := tb.deltimerLocked(t)
- if ok {
- t.when = when
- t.period = period
- t.f = f
- t.arg = arg
- t.seq = seq
- ok = tb.addtimerLocked(t)
+ status := uint32(timerNoStatus)
+ wasRemoved := false
+loop:
+ for {
+ switch status = atomic.Load(&t.status); status {
+ case timerWaiting, timerModifiedEarlier, timerModifiedLater:
+ if atomic.Cas(&t.status, status, timerModifying) {
+ break loop
+ }
+ case timerNoStatus, timerRemoved:
+ // Timer was already run and t is no longer in a heap.
+ // Act like addtimer.
+ if atomic.Cas(&t.status, status, timerWaiting) {
+ wasRemoved = true
+ break loop
+ }
+ case timerRunning, timerRemoving, timerMoving:
+ // The timer is being run or moved, by a different P.
+ // Wait for it to complete.
+ osyield()
+ case timerDeleted:
+ // Simultaneous calls to modtimer and deltimer.
+ badTimer()
+ case timerModifying:
+ // Multiple simultaneous calls to modtimer.
+ badTimer()
+ default:
+ badTimer()
+ }
}
- unlock(&tb.lock)
- if !ok {
- badTimer()
+
+ t.period = period
+ t.f = f
+ t.arg = arg
+ t.seq = seq
+
+ if wasRemoved {
+ t.when = when
+ addInitializedTimer(t)
+ } else {
+ // The timer is in some other P's heap, so we can't change
+ // the when field. If we did, the other P's heap would
+ // be out of order. So we put the new when value in the
+ // nextwhen field, and let the other P set the when field
+ // when it is prepared to resort the heap.
+ t.nextwhen = when
+
+ newStatus := uint32(timerModifiedLater)
+ if when < t.when {
+ newStatus = timerModifiedEarlier
+ }
+
+ // Update the adjustTimers field. Subtract one if we
+ // are removing a timerModifiedEarlier, add one if we
+ // are adding a timerModifiedEarlier.
+ tpp := t.pp.ptr()
+ adjust := int32(0)
+ if status == timerModifiedEarlier {
+ adjust--
+ }
+ if newStatus == timerModifiedEarlier {
+ adjust++
+ }
+ if adjust != 0 {
+ atomic.Xadd(&tpp.adjustTimers, adjust)
+ }
+
+ // Set the new status of the timer.
+ if !atomic.Cas(&t.status, timerModifying, newStatus) {
+ badTimer()
+ }
+
+ // If the new status is earlier, wake up the poller.
+ if newStatus == timerModifiedEarlier {
+ wakeNetPoller(when)
+ }
}
}
-// Timerproc runs the time-driven events.
-// It sleeps until the next event in the tb heap.
-// If addtimer inserts a new earlier event, it wakes timerproc early.
-func timerproc(tb *timersBucket) {
- setSystemGoroutine()
+// resettimer resets an existing inactive timer to turn it into an active timer,
+// with a new time for when the timer should fire.
+// This should be called instead of addtimer if the timer value has been,
+// or may have been, used previously.
+func resettimer(t *timer, when int64) {
+ if when < 0 {
+ when = maxWhen
+ }
- tb.gp = getg()
for {
- lock(&tb.lock)
- tb.sleeping = false
- now := nanotime()
- delta := int64(-1)
- for {
- if len(tb.t) == 0 {
- delta = -1
- break
+ switch s := atomic.Load(&t.status); s {
+ case timerNoStatus, timerRemoved:
+ if atomic.Cas(&t.status, s, timerWaiting) {
+ t.when = when
+ addInitializedTimer(t)
+ return
}
- t := tb.t[0]
- delta = t.when - now
- if delta > 0 {
- break
- }
- ok := true
- if t.period > 0 {
- // leave in heap but adjust next time to fire
- t.when += t.period * (1 + -delta/t.period)
- if !siftdownTimer(tb.t, 0) {
- ok = false
+ case timerDeleted:
+ if atomic.Cas(&t.status, s, timerModifying) {
+ t.nextwhen = when
+ newStatus := uint32(timerModifiedLater)
+ if when < t.when {
+ newStatus = timerModifiedEarlier
+ atomic.Xadd(&t.pp.ptr().adjustTimers, 1)
}
- } else {
- // remove from heap
- last := len(tb.t) - 1
- if last > 0 {
- tb.t[0] = tb.t[last]
- tb.t[0].i = 0
+ if !atomic.Cas(&t.status, timerModifying, newStatus) {
+ badTimer()
}
- tb.t[last] = nil
- tb.t = tb.t[:last]
- if last > 0 {
- if !siftdownTimer(tb.t, 0) {
- ok = false
- }
+ if newStatus == timerModifiedEarlier {
+ wakeNetPoller(when)
}
- t.i = -1 // mark as removed
+ return
+ }
+ case timerRemoving:
+ // Wait for the removal to complete.
+ osyield()
+ case timerRunning:
+ // Even though the timer should not be active,
+ // we can see timerRunning if the timer function
+ // permits some other goroutine to call resettimer.
+ // Wait until the run is complete.
+ osyield()
+ case timerWaiting, timerModifying, timerModifiedEarlier, timerModifiedLater, timerMoving:
+ // Called resettimer on active timer.
+ badTimer()
+ default:
+ badTimer()
+ }
+ }
+}
+
+// cleantimers cleans up the head of the timer queue. This speeds up
+// programs that create and delete timers; leaving them in the heap
+// slows down addtimer. Reports whether no timer problems were found.
+// The caller must have locked the timers for pp.
+func cleantimers(pp *p) bool {
+ for {
+ if len(pp.timers) == 0 {
+ return true
+ }
+ t := pp.timers[0]
+ if t.pp.ptr() != pp {
+ throw("cleantimers: bad p")
+ }
+ switch s := atomic.Load(&t.status); s {
+ case timerDeleted:
+ if !atomic.Cas(&t.status, s, timerRemoving) {
+ continue
+ }
+ if !dodeltimer0(pp) {
+ return false
}
- f := t.f
- arg := t.arg
- seq := t.seq
- unlock(&tb.lock)
- if !ok {
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ return false
+ }
+ case timerModifiedEarlier, timerModifiedLater:
+ if !atomic.Cas(&t.status, s, timerMoving) {
+ continue
+ }
+ // Now we can change the when field.
+ t.when = t.nextwhen
+ // Move t to the right position.
+ if !dodeltimer0(pp) {
+ return false
+ }
+ if !doaddtimer(pp, t) {
+ return false
+ }
+ if s == timerModifiedEarlier {
+ atomic.Xadd(&pp.adjustTimers, -1)
+ }
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ return false
+ }
+ default:
+ // Head of timers does not need adjustment.
+ return true
+ }
+ }
+}
+
+// moveTimers moves a slice of timers to pp. The slice has been taken
+// from a different P.
+// This is currently called when the world is stopped, but the caller
+// is expected to have locked the timers for pp.
+func moveTimers(pp *p, timers []*timer) {
+ for _, t := range timers {
+ loop:
+ for {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ t.pp = 0
+ if !doaddtimer(pp, t) {
+ badTimer()
+ }
+ break loop
+ case timerModifiedEarlier, timerModifiedLater:
+ if !atomic.Cas(&t.status, s, timerMoving) {
+ continue
+ }
+ t.when = t.nextwhen
+ t.pp = 0
+ if !doaddtimer(pp, t) {
+ badTimer()
+ }
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+ break loop
+ case timerDeleted:
+ if !atomic.Cas(&t.status, s, timerRemoved) {
+ continue
+ }
+ t.pp = 0
+ // We no longer need this timer in the heap.
+ break loop
+ case timerModifying:
+ // Loop until the modification is complete.
+ osyield()
+ case timerNoStatus, timerRemoved:
+ // We should not see these status values in a timers heap.
badTimer()
+ case timerRunning, timerRemoving, timerMoving:
+ // Some other P thinks it owns this timer,
+ // which should not happen.
+ badTimer()
+ default:
+ badTimer()
+ }
+ }
+ }
+}
+
+// adjusttimers looks through the timers in the current P's heap for
+// any timers that have been modified to run earlier, and puts them in
+// the correct place in the heap. While looking for those timers,
+// it also moves timers that have been modified to run later,
+// and removes deleted timers. The caller must have locked the timers for pp.
+func adjusttimers(pp *p) {
+ if len(pp.timers) == 0 {
+ return
+ }
+ if atomic.Load(&pp.adjustTimers) == 0 {
+ return
+ }
+ var moved []*timer
+ for i := 0; i < len(pp.timers); i++ {
+ t := pp.timers[i]
+ if t.pp.ptr() != pp {
+ throw("adjusttimers: bad p")
+ }
+ switch s := atomic.Load(&t.status); s {
+ case timerDeleted:
+ if atomic.Cas(&t.status, s, timerRemoving) {
+ if !dodeltimer(pp, i) {
+ badTimer()
+ }
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ badTimer()
+ }
+ // Look at this heap position again.
+ i--
}
- if raceenabled {
- raceacquire(unsafe.Pointer(t))
+ case timerModifiedEarlier, timerModifiedLater:
+ if atomic.Cas(&t.status, s, timerMoving) {
+ // Now we can change the when field.
+ t.when = t.nextwhen
+ // Take t off the heap, and hold onto it.
+ // We don't add it back yet because the
+ // heap manipulation could cause our
+ // loop to skip some other timer.
+ if !dodeltimer(pp, i) {
+ badTimer()
+ }
+ moved = append(moved, t)
+ if s == timerModifiedEarlier {
+ if n := atomic.Xadd(&pp.adjustTimers, -1); int32(n) <= 0 {
+ addAdjustedTimers(pp, moved)
+ return
+ }
+ }
}
- f(arg, seq)
- lock(&tb.lock)
+ case timerNoStatus, timerRunning, timerRemoving, timerRemoved, timerMoving:
+ badTimer()
+ case timerWaiting:
+ // OK, nothing to do.
+ case timerModifying:
+ // Check again after modification is complete.
+ osyield()
+ i--
+ default:
+ badTimer()
}
- if delta < 0 || faketime > 0 {
- // No timers left - put goroutine to sleep.
- tb.rescheduling = true
- goparkunlock(&tb.lock, waitReasonTimerGoroutineIdle, traceEvGoBlock, 1)
- continue
+ }
+
+ if len(moved) > 0 {
+ addAdjustedTimers(pp, moved)
+ }
+}
+
+// addAdjustedTimers adds any timers we adjusted in adjusttimers
+// back to the timer heap.
+func addAdjustedTimers(pp *p, moved []*timer) {
+ for _, t := range moved {
+ if !doaddtimer(pp, t) {
+ badTimer()
+ }
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
}
- // At least one timer pending. Sleep until then.
- tb.sleeping = true
- tb.sleepUntil = now + delta
- noteclear(&tb.waitnote)
- unlock(&tb.lock)
- notetsleepg(&tb.waitnote, delta)
}
}
-func timejump() *g {
- if faketime == 0 {
- return nil
+// nobarrierWakeTime looks at P's timers and returns the time when we
+// should wake up the netpoller. It returns 0 if there are no timers.
+// This function is invoked when dropping a P, and must run without
+// any write barriers. Therefore, if there are any timers that needs
+// to be moved earlier, it conservatively returns the current time.
+// The netpoller M will wake up and adjust timers before sleeping again.
+//go:nowritebarrierrec
+func nobarrierWakeTime(pp *p) int64 {
+ lock(&pp.timersLock)
+ ret := int64(0)
+ if len(pp.timers) > 0 {
+ if atomic.Load(&pp.adjustTimers) > 0 {
+ ret = nanotime()
+ } else {
+ ret = pp.timers[0].when
+ }
}
+ unlock(&pp.timersLock)
+ return ret
+}
+
+// runtimer examines the first timer in timers. If it is ready based on now,
+// it runs the timer and removes or updates it.
+// Returns 0 if it ran a timer, -1 if there are no more timers, or the time
+// when the first timer should run.
+// The caller must have locked the timers for pp.
+// If a timer is run, this will temporarily unlock the timers.
+//go:systemstack
+func runtimer(pp *p, now int64) int64 {
+ for {
+ t := pp.timers[0]
+ if t.pp.ptr() != pp {
+ throw("runtimer: bad p")
+ }
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ if t.when > now {
+ // Not ready to run.
+ return t.when
+ }
+
+ if !atomic.Cas(&t.status, s, timerRunning) {
+ continue
+ }
+ // Note that runOneTimer may temporarily unlock
+ // pp.timersLock.
+ runOneTimer(pp, t, now)
+ return 0
- for i := range timers {
- lock(&timers[i].lock)
+ case timerDeleted:
+ if !atomic.Cas(&t.status, s, timerRemoving) {
+ continue
+ }
+ if !dodeltimer0(pp) {
+ badTimer()
+ }
+ if !atomic.Cas(&t.status, timerRemoving, timerRemoved) {
+ badTimer()
+ }
+ if len(pp.timers) == 0 {
+ return -1
+ }
+
+ case timerModifiedEarlier, timerModifiedLater:
+ if !atomic.Cas(&t.status, s, timerMoving) {
+ continue
+ }
+ t.when = t.nextwhen
+ if !dodeltimer0(pp) {
+ badTimer()
+ }
+ if !doaddtimer(pp, t) {
+ badTimer()
+ }
+ if s == timerModifiedEarlier {
+ atomic.Xadd(&pp.adjustTimers, -1)
+ }
+ if !atomic.Cas(&t.status, timerMoving, timerWaiting) {
+ badTimer()
+ }
+
+ case timerModifying:
+ // Wait for modification to complete.
+ osyield()
+
+ case timerNoStatus, timerRemoved:
+ // Should not see a new or inactive timer on the heap.
+ badTimer()
+ case timerRunning, timerRemoving, timerMoving:
+ // These should only be set when timers are locked,
+ // and we didn't do it.
+ badTimer()
+ default:
+ badTimer()
+ }
}
- gp := timejumpLocked()
- for i := range timers {
- unlock(&timers[i].lock)
+}
+
+// runOneTimer runs a single timer.
+// The caller must have locked the timers for pp.
+// This will temporarily unlock the timers while running the timer function.
+//go:systemstack
+func runOneTimer(pp *p, t *timer, now int64) {
+ f := t.f
+ arg := t.arg
+ seq := t.seq
+
+ if t.period > 0 {
+ // Leave in heap but adjust next time to fire.
+ delta := t.when - now
+ t.when += t.period * (1 + -delta/t.period)
+ if !siftdownTimer(pp.timers, 0) {
+ badTimer()
+ }
+ if !atomic.Cas(&t.status, timerRunning, timerWaiting) {
+ badTimer()
+ }
+ } else {
+ // Remove from heap.
+ if !dodeltimer0(pp) {
+ badTimer()
+ }
+ if !atomic.Cas(&t.status, timerRunning, timerNoStatus) {
+ badTimer()
+ }
}
- return gp
+ unlock(&pp.timersLock)
+
+ f(arg, seq)
+
+ lock(&pp.timersLock)
}
-func timejumpLocked() *g {
+func timejump() *p {
+ if faketime == 0 {
+ return nil
+ }
+
+ // Nothing is running, so we can look at all the P's.
// Determine a timer bucket with minimum when.
- var minT *timer
- for i := range timers {
- tb := &timers[i]
- if !tb.created || len(tb.t) == 0 {
+ var (
+ minT *timer
+ minWhen int64
+ minP *p
+ )
+ for _, pp := range allp {
+ if pp.status != _Pidle && pp.status != _Pdead {
+ throw("non-idle P in timejump")
+ }
+ if len(pp.timers) == 0 {
continue
}
- t := tb.t[0]
- if minT == nil || t.when < minT.when {
- minT = t
+ c := pp.adjustTimers
+ for _, t := range pp.timers {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ if minT == nil || t.when < minWhen {
+ minT = t
+ minWhen = t.when
+ minP = pp
+ }
+ case timerModifiedEarlier, timerModifiedLater:
+ if minT == nil || t.nextwhen < minWhen {
+ minT = t
+ minWhen = t.nextwhen
+ minP = pp
+ }
+ if s == timerModifiedEarlier {
+ c--
+ }
+ case timerRunning, timerModifying, timerMoving:
+ badTimer()
+ }
+ // The timers are sorted, so we only have to check
+ // the first timer for each P, unless there are
+ // some timerModifiedEarlier timers. The number
+ // of timerModifiedEarlier timers is in the adjustTimers
+ // field, used to initialize c, above.
+ if c == 0 {
+ break
+ }
}
}
- if minT == nil || minT.when <= faketime {
- return nil
- }
- faketime = minT.when
- tb := minT.tb
- if !tb.rescheduling {
+ if minT == nil || minWhen <= faketime {
return nil
}
- tb.rescheduling = false
- return tb.gp
+
+ faketime = minWhen
+ return minP
}
+// timeSleepUntil returns the time when the next timer should fire.
+// This is only called by sysmon.
func timeSleepUntil() int64 {
- next := int64(1<<63 - 1)
+ next := int64(maxWhen)
- // Determine minimum sleepUntil across all the timer buckets.
- //
- // The function can not return a precise answer,
- // as another timer may pop in as soon as timers have been unlocked.
- // So lock the timers one by one instead of all at once.
- for i := range timers {
- tb := &timers[i]
+ // Prevent allp slice changes. This is like retake.
+ lock(&allpLock)
+ for _, pp := range allp {
+ if pp == nil {
+ // This can happen if procresize has grown
+ // allp but not yet created new Ps.
+ continue
+ }
- lock(&tb.lock)
- if tb.sleeping && tb.sleepUntil < next {
- next = tb.sleepUntil
+ lock(&pp.timersLock)
+ c := atomic.Load(&pp.adjustTimers)
+ for _, t := range pp.timers {
+ switch s := atomic.Load(&t.status); s {
+ case timerWaiting:
+ if t.when < next {
+ next = t.when
+ }
+ case timerModifiedEarlier, timerModifiedLater:
+ if t.nextwhen < next {
+ next = t.nextwhen
+ }
+ if s == timerModifiedEarlier {
+ c--
+ }
+ }
+ // The timers are sorted, so we only have to check
+ // the first timer for each P, unless there are
+ // some timerModifiedEarlier timers. The number
+ // of timerModifiedEarlier timers is in the adjustTimers
+ // field, used to initialize c, above.
+ //
+ // We don't worry about cases like timerModifying.
+ // New timers can show up at any time,
+ // so this function is necessarily imprecise.
+ // Do a signed check here since we aren't
+ // synchronizing the read of pp.adjustTimers
+ // with the check of a timer status.
+ if int32(c) <= 0 {
+ break
+ }
}
- unlock(&tb.lock)
+ unlock(&pp.timersLock)
}
+ unlock(&allpLock)
return next
}
@@ -385,9 +953,6 @@ func timeSleepUntil() int64 {
// it will cause the program to crash with a mysterious
// "panic holding locks" message. Instead, we panic while not
// holding a lock.
-// The races can occur despite the bucket locks because assignBucket
-// itself is called without locks, so racy calls can cause a timer to
-// change buckets while executing these functions.
func siftupTimer(t []*timer, i int) bool {
if i >= len(t) {
@@ -401,12 +966,10 @@ func siftupTimer(t []*timer, i int) bool {
break
}
t[i] = t[p]
- t[i].i = i
i = p
}
if tmp != t[i] {
t[i] = tmp
- t[i].i = i
}
return true
}
@@ -444,12 +1007,10 @@ func siftdownTimer(t []*timer, i int) bool {
break
}
t[i] = t[c]
- t[i].i = i
i = c
}
if tmp != t[i] {
t[i] = tmp
- t[i].i = i
}
return true
}
diff --git a/libgo/go/runtime/time_fake.go b/libgo/go/runtime/time_fake.go
new file mode 100644
index 0000000..c64d299
--- /dev/null
+++ b/libgo/go/runtime/time_fake.go
@@ -0,0 +1,100 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build faketime
+// +build !windows
+
+// Faketime isn't currently supported on Windows. This would require:
+//
+// 1. Shadowing time_now, which is implemented in assembly on Windows.
+// Since that's exported directly to the time package from runtime
+// assembly, this would involve moving it from sys_windows_*.s into
+// its own assembly files build-tagged with !faketime and using the
+// implementation of time_now from timestub.go in faketime mode.
+//
+// 2. Modifying syscall.Write to call syscall.faketimeWrite,
+// translating the Stdout and Stderr handles into FDs 1 and 2.
+// (See CL 192739 PS 3.)
+
+package runtime
+
+import "unsafe"
+
+// faketime is the simulated time in nanoseconds since 1970 for the
+// playground.
+var faketime int64 = 1257894000000000000
+
+var faketimeState struct {
+ lock mutex
+
+ // lastfaketime is the last faketime value written to fd 1 or 2.
+ lastfaketime int64
+
+ // lastfd is the fd to which lastfaketime was written.
+ //
+ // Subsequent writes to the same fd may use the same
+ // timestamp, but the timestamp must increase if the fd
+ // changes.
+ lastfd uintptr
+}
+
+//go:nosplit
+func nanotime() int64 {
+ return faketime
+}
+
+func walltime() (sec int64, nsec int32) {
+ return faketime / 1000000000, int32(faketime % 1000000000)
+}
+
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+ if !(fd == 1 || fd == 2) {
+ // Do an ordinary write.
+ return write1(fd, p, n)
+ }
+
+ // Write with the playback header.
+
+ // First, lock to avoid interleaving writes.
+ lock(&faketimeState.lock)
+
+ // If the current fd doesn't match the fd of the previous write,
+ // ensure that the timestamp is strictly greater. That way, we can
+ // recover the original order even if we read the fds separately.
+ t := faketimeState.lastfaketime
+ if fd != faketimeState.lastfd {
+ t++
+ faketimeState.lastfd = fd
+ }
+ if faketime > t {
+ t = faketime
+ }
+ faketimeState.lastfaketime = t
+
+ // Playback header: 0 0 P B <8-byte time> <4-byte data length> (big endian)
+ var buf [4 + 8 + 4]byte
+ buf[2] = 'P'
+ buf[3] = 'B'
+ tu := uint64(t)
+ buf[4] = byte(tu >> (7 * 8))
+ buf[5] = byte(tu >> (6 * 8))
+ buf[6] = byte(tu >> (5 * 8))
+ buf[7] = byte(tu >> (4 * 8))
+ buf[8] = byte(tu >> (3 * 8))
+ buf[9] = byte(tu >> (2 * 8))
+ buf[10] = byte(tu >> (1 * 8))
+ buf[11] = byte(tu >> (0 * 8))
+ nu := uint32(n)
+ buf[12] = byte(nu >> (3 * 8))
+ buf[13] = byte(nu >> (2 * 8))
+ buf[14] = byte(nu >> (1 * 8))
+ buf[15] = byte(nu >> (0 * 8))
+ write1(fd, unsafe.Pointer(&buf[0]), int32(len(buf)))
+
+ // Write actual data.
+ res := write1(fd, p, n)
+
+ unlock(&faketimeState.lock)
+ return res
+}
diff --git a/libgo/go/runtime/time_nofake.go b/libgo/go/runtime/time_nofake.go
new file mode 100644
index 0000000..1912a94
--- /dev/null
+++ b/libgo/go/runtime/time_nofake.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !faketime
+
+package runtime
+
+import "unsafe"
+
+// faketime is the simulated time in nanoseconds since 1970 for the
+// playground.
+//
+// Zero means not to use faketime.
+var faketime int64
+
+//go:nosplit
+func nanotime() int64 {
+ return nanotime1()
+}
+
+func walltime() (sec int64, nsec int32) {
+ return walltime1()
+}
+
+// write must be nosplit on Windows (see write1)
+//
+//go:nosplit
+func write(fd uintptr, p unsafe.Pointer, n int32) int32 {
+ return write1(fd, p, n)
+}
diff --git a/libgo/go/runtime/time_test.go b/libgo/go/runtime/time_test.go
new file mode 100644
index 0000000..9b1922d
--- /dev/null
+++ b/libgo/go/runtime/time_test.go
@@ -0,0 +1,96 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "internal/testenv"
+ "os/exec"
+ "reflect"
+ "runtime"
+ "testing"
+)
+
+func TestFakeTime(t *testing.T) {
+ if runtime.GOOS == "windows" {
+ t.Skip("faketime not supported on windows")
+ }
+ if runtime.Compiler == "gccgo" {
+ t.Skip("faketime not supported for gccgo")
+ }
+
+ t.Parallel()
+
+ exe, err := buildTestProg(t, "testfaketime", "-tags=faketime")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var stdout, stderr bytes.Buffer
+ cmd := exec.Command(exe)
+ cmd.Stdout = &stdout
+ cmd.Stderr = &stderr
+
+ err = testenv.CleanCmdEnv(cmd).Run()
+ if err != nil {
+ t.Fatalf("exit status: %v\n%s", err, stderr.String())
+ }
+
+ t.Logf("raw stdout: %q", stdout.String())
+ t.Logf("raw stderr: %q", stdout.String())
+
+ f1, err1 := parseFakeTime(stdout.Bytes())
+ if err1 != nil {
+ t.Fatal(err1)
+ }
+ f2, err2 := parseFakeTime(stderr.Bytes())
+ if err2 != nil {
+ t.Fatal(err2)
+ }
+
+ const time0 = 1257894000000000000
+ got := [][]fakeTimeFrame{f1, f2}
+ var want = [][]fakeTimeFrame{{
+ {time0 + 1, "line 2\n"},
+ {time0 + 1, "line 3\n"},
+ {time0 + 1e9, "line 5\n"},
+ {time0 + 1e9, "2009-11-10T23:00:01Z"},
+ }, {
+ {time0, "line 1\n"},
+ {time0 + 2, "line 4\n"},
+ }}
+ if !reflect.DeepEqual(want, got) {
+ t.Fatalf("want %v, got %v", want, got)
+ }
+}
+
+type fakeTimeFrame struct {
+ time uint64
+ data string
+}
+
+func parseFakeTime(x []byte) ([]fakeTimeFrame, error) {
+ var frames []fakeTimeFrame
+ for len(x) != 0 {
+ if len(x) < 4+8+4 {
+ return nil, errors.New("truncated header")
+ }
+ const magic = "\x00\x00PB"
+ if string(x[:len(magic)]) != magic {
+ return nil, errors.New("bad magic")
+ }
+ x = x[len(magic):]
+ time := binary.BigEndian.Uint64(x)
+ x = x[8:]
+ dlen := binary.BigEndian.Uint32(x)
+ x = x[4:]
+ data := string(x[:dlen])
+ x = x[dlen:]
+ frames = append(frames, fakeTimeFrame{time, data})
+ }
+ return frames, nil
+}
diff --git a/libgo/go/runtime/timestub2.go b/libgo/go/runtime/timestub2.go
index 7a28603..38446fb 100644
--- a/libgo/go/runtime/timestub2.go
+++ b/libgo/go/runtime/timestub2.go
@@ -5,6 +5,7 @@
// +build !darwin
// +build !windows
// +build !freebsd
+
package runtime
-func walltime() (sec int64, nsec int32)
+func walltime1() (sec int64, nsec int32)
diff --git a/libgo/go/runtime/trace.go b/libgo/go/runtime/trace.go
index 6db5b62..81ff0ca 100644
--- a/libgo/go/runtime/trace.go
+++ b/libgo/go/runtime/trace.go
@@ -54,7 +54,7 @@ const (
traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
traceEvHeapAlloc = 33 // memstats.heap_live change [timestamp, heap_alloc]
traceEvNextGC = 34 // memstats.next_gc change [timestamp, next_gc]
- traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
+ traceEvTimerGoroutine = 35 // not currently used; previously denoted timer goroutine [timer goroutine id]
traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
traceEvString = 37 // string dictionary entry [ID, length, string]
traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
@@ -84,7 +84,7 @@ const (
// and ppc64le.
// Tracing won't work reliably for architectures where cputicks is emulated
// by nanotime, so the value doesn't matter for those architectures.
- traceTickDiv = 16 + 48*(sys.Goarch386|sys.GoarchAmd64|sys.GoarchAmd64p32)
+ traceTickDiv = 16 + 48*(sys.Goarch386|sys.GoarchAmd64)
// Maximum number of PCs in a single stack trace.
// Since events contain only stack id rather than whole stack trace,
// we can allow quite large values here.
@@ -181,9 +181,12 @@ func traceBufPtrOf(b *traceBuf) traceBufPtr {
// Most clients should use the runtime/trace package or the testing package's
// -test.trace flag instead of calling StartTrace directly.
func StartTrace() error {
- // Stop the world, so that we can take a consistent snapshot
+ // Stop the world so that we can take a consistent snapshot
// of all goroutines at the beginning of the trace.
- stopTheWorld("start tracing")
+ // Do not stop the world during GC so we ensure we always see
+ // a consistent view of GC-related events (e.g. a start is always
+ // paired with an end).
+ stopTheWorldGC("start tracing")
// We are in stop-the-world, but syscalls can finish and write to trace concurrently.
// Exitsyscall could check trace.enabled long before and then suddenly wake up
@@ -194,7 +197,7 @@ func StartTrace() error {
if trace.enabled || trace.shutdown {
unlock(&trace.bufLock)
- startTheWorld()
+ startTheWorldGC()
return errorString("tracing is already enabled")
}
@@ -265,7 +268,7 @@ func StartTrace() error {
unlock(&trace.bufLock)
- startTheWorld()
+ startTheWorldGC()
return nil
}
@@ -274,14 +277,14 @@ func StartTrace() error {
func StopTrace() {
// Stop the world so that we can collect the trace buffers from all p's below,
// and also to avoid races with traceEvent.
- stopTheWorld("stop tracing")
+ stopTheWorldGC("stop tracing")
// See the comment in StartTrace.
lock(&trace.bufLock)
if !trace.enabled {
unlock(&trace.bufLock)
- startTheWorld()
+ startTheWorldGC()
return
}
@@ -318,7 +321,7 @@ func StopTrace() {
trace.shutdown = true
unlock(&trace.bufLock)
- startTheWorld()
+ startTheWorldGC()
// The world is started but we've set trace.shutdown, so new tracing can't start.
// Wait for the trace reader to flush pending buffers and stop.
@@ -414,13 +417,6 @@ func ReadTrace() []byte {
var data []byte
data = append(data, traceEvFrequency|0<<traceArgCountShift)
data = traceAppend(data, uint64(freq))
- for i := range timers {
- tb := &timers[i]
- if tb.gp != nil {
- data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
- data = traceAppend(data, uint64(tb.gp.goid))
- }
- }
// This will emit a bunch of full buffers, we will pick them up
// on the next iteration.
trace.stackTab.dump()
@@ -922,7 +918,7 @@ func (p *traceAllocBlockPtr) set(x *traceAllocBlock) { *p = traceAllocBlockPtr(u
// alloc allocates n-byte block.
func (a *traceAlloc) alloc(n uintptr) unsafe.Pointer {
- n = round(n, sys.PtrSize)
+ n = alignUp(n, sys.PtrSize)
if a.head == 0 || a.off+n > uintptr(len(a.head.ptr().data)) {
if n > uintptr(len(a.head.ptr().data)) {
throw("trace: alloc too large")
diff --git a/libgo/go/runtime/trace/trace_stack_test.go b/libgo/go/runtime/trace/trace_stack_test.go
index 62c06e6..e3608c6 100644
--- a/libgo/go/runtime/trace/trace_stack_test.go
+++ b/libgo/go/runtime/trace/trace_stack_test.go
@@ -233,6 +233,7 @@ func TestTraceSymbolize(t *testing.T) {
}},
{trace.EvGomaxprocs, []frame{
{"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
+ {"runtime.startTheWorldGC", 0},
{"runtime.GOMAXPROCS", 0},
{"runtime/trace_test.TestTraceSymbolize", 0},
{"testing.tRunner", 0},
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 4134d28..1ba91af 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -20,7 +20,7 @@ func printcreatedby(gp *g) {
if entry != 0 && tracepc > entry {
tracepc -= sys.PCQuantum
}
- function, file, line, _ := funcfileline(tracepc, -1)
+ function, file, line, _ := funcfileline(tracepc, -1, false)
if function != "" && showframe(function, gp, false) && gp.goid != 1 {
printcreatedby1(function, file, line, entry, pc)
}
@@ -93,7 +93,7 @@ func traceback(skip int32) {
func printAncestorTraceback(ancestor ancestorInfo) {
print("[originating from goroutine ", ancestor.goid, "]:\n")
for fidx, pc := range ancestor.pcs {
- function, file, line, _ := funcfileline(pc, -1)
+ function, file, line, _ := funcfileline(pc, -1, false)
if showfuncinfo(function, fidx == 0) {
printAncestorTracebackFuncInfo(function, file, line, pc)
}
@@ -102,7 +102,7 @@ func printAncestorTraceback(ancestor ancestorInfo) {
print("...additional frames elided...\n")
}
// Show what created goroutine, except main goroutine (goid 1).
- function, file, line, _ := funcfileline(ancestor.gopc, -1)
+ function, file, line, _ := funcfileline(ancestor.gopc, -1, false)
if function != "" && showfuncinfo(function, false) && ancestor.goid != 1 {
printcreatedby1(function, file, line, funcentry(ancestor.gopc), ancestor.gopc)
}
diff --git a/libgo/go/runtime/treap_test.go b/libgo/go/runtime/treap_test.go
deleted file mode 100644
index 110f51c..0000000
--- a/libgo/go/runtime/treap_test.go
+++ /dev/null
@@ -1,270 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime_test
-
-import (
- "fmt"
- "runtime"
- "testing"
-)
-
-var spanDesc = map[uintptr]struct {
- pages uintptr
- scav bool
-}{
- 0xc0000000: {2, false},
- 0xc0006000: {1, false},
- 0xc0010000: {8, false},
- 0xc0022000: {7, false},
- 0xc0034000: {4, true},
- 0xc0040000: {5, false},
- 0xc0050000: {5, true},
- 0xc0060000: {5000, false},
-}
-
-// Wrap the Treap one more time because go:notinheap doesn't
-// actually follow a structure across package boundaries.
-//
-//go:notinheap
-type treap struct {
- runtime.Treap
-}
-
-func maskMatchName(mask, match runtime.TreapIterType) string {
- return fmt.Sprintf("%0*b-%0*b", runtime.TreapIterBits, uint8(mask), runtime.TreapIterBits, uint8(match))
-}
-
-func TestTreapFilter(t *testing.T) {
- var iterTypes = [...]struct {
- mask, match runtime.TreapIterType
- filter runtime.TreapIterFilter // expected filter
- }{
- {0, 0, 0xf},
- {runtime.TreapIterScav, 0, 0x5},
- {runtime.TreapIterScav, runtime.TreapIterScav, 0xa},
- {runtime.TreapIterScav | runtime.TreapIterHuge, runtime.TreapIterHuge, 0x4},
- {runtime.TreapIterScav | runtime.TreapIterHuge, 0, 0x1},
- {0, runtime.TreapIterScav, 0x0},
- }
- for _, it := range iterTypes {
- t.Run(maskMatchName(it.mask, it.match), func(t *testing.T) {
- if f := runtime.TreapFilter(it.mask, it.match); f != it.filter {
- t.Fatalf("got %#x, want %#x", f, it.filter)
- }
- })
- }
-}
-
-// This test ensures that the treap implementation in the runtime
-// maintains all stated invariants after different sequences of
-// insert, removeSpan, find, and erase. Invariants specific to the
-// treap data structure are checked implicitly: after each mutating
-// operation, treap-related invariants are checked for the entire
-// treap.
-func TestTreap(t *testing.T) {
- // Set up a bunch of spans allocated into mheap_.
- // Also, derive a set of typeCounts of each type of span
- // according to runtime.TreapIterType so we can verify against
- // them later.
- spans := make([]runtime.Span, 0, len(spanDesc))
- typeCounts := [1 << runtime.TreapIterBits][1 << runtime.TreapIterBits]int{}
- for base, de := range spanDesc {
- s := runtime.AllocSpan(base, de.pages, de.scav)
- defer s.Free()
- spans = append(spans, s)
-
- for i := runtime.TreapIterType(0); i < 1<<runtime.TreapIterBits; i++ {
- for j := runtime.TreapIterType(0); j < 1<<runtime.TreapIterBits; j++ {
- if s.MatchesIter(i, j) {
- typeCounts[i][j]++
- }
- }
- }
- }
- t.Run("TypeCountsSanity", func(t *testing.T) {
- // Just sanity check type counts for a few values.
- check := func(mask, match runtime.TreapIterType, count int) {
- tc := typeCounts[mask][match]
- if tc != count {
- name := maskMatchName(mask, match)
- t.Fatalf("failed a sanity check for mask/match %s counts: got %d, wanted %d", name, tc, count)
- }
- }
- check(0, 0, len(spanDesc))
- check(runtime.TreapIterScav, 0, 6)
- check(runtime.TreapIterScav, runtime.TreapIterScav, 2)
- })
- t.Run("Insert", func(t *testing.T) {
- tr := treap{}
- // Test just a very basic insert/remove for sanity.
- tr.Insert(spans[0])
- tr.RemoveSpan(spans[0])
- })
- t.Run("FindTrivial", func(t *testing.T) {
- tr := treap{}
- // Test just a very basic find operation for sanity.
- tr.Insert(spans[0])
- i := tr.Find(1)
- if i.Span() != spans[0] {
- t.Fatal("found unknown span in treap")
- }
- tr.RemoveSpan(spans[0])
- })
- t.Run("FindFirstFit", func(t *testing.T) {
- // Run this 10 times, recreating the treap each time.
- // Because of the non-deterministic structure of a treap,
- // we'll be able to test different structures this way.
- for i := 0; i < 10; i++ {
- tr := runtime.Treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Find(5)
- if i.Span().Base() != 0xc0010000 {
- t.Fatalf("expected span at lowest address which could fit 5 pages, instead found span at %x", i.Span().Base())
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- }
- })
- t.Run("Iterate", func(t *testing.T) {
- for mask := runtime.TreapIterType(0); mask < 1<<runtime.TreapIterBits; mask++ {
- for match := runtime.TreapIterType(0); match < 1<<runtime.TreapIterBits; match++ {
- iterName := maskMatchName(mask, match)
- t.Run(iterName, func(t *testing.T) {
- t.Run("StartToEnd", func(t *testing.T) {
- // Ensure progressing an iterator actually goes over the whole treap
- // from the start and that it iterates over the elements in order.
- // Furthermore, ensure that it only iterates over the relevant parts
- // of the treap.
- // Finally, ensures that Start returns a valid iterator.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- nspans := 0
- lastBase := uintptr(0)
- for i := tr.Start(mask, match); i.Valid(); i = i.Next() {
- nspans++
- if lastBase > i.Span().Base() {
- t.Fatalf("not iterating in correct order: encountered base %x before %x", lastBase, i.Span().Base())
- }
- lastBase = i.Span().Base()
- if !i.Span().MatchesIter(mask, match) {
- t.Fatalf("found non-matching span while iteration over mask/match %s: base %x", iterName, i.Span().Base())
- }
- }
- if nspans != typeCounts[mask][match] {
- t.Fatal("failed to iterate forwards over full treap")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- t.Run("EndToStart", func(t *testing.T) {
- // See StartToEnd tests.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- nspans := 0
- lastBase := ^uintptr(0)
- for i := tr.End(mask, match); i.Valid(); i = i.Prev() {
- nspans++
- if lastBase < i.Span().Base() {
- t.Fatalf("not iterating in correct order: encountered base %x before %x", lastBase, i.Span().Base())
- }
- lastBase = i.Span().Base()
- if !i.Span().MatchesIter(mask, match) {
- t.Fatalf("found non-matching span while iteration over mask/match %s: base %x", iterName, i.Span().Base())
- }
- }
- if nspans != typeCounts[mask][match] {
- t.Fatal("failed to iterate backwards over full treap")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- })
- }
- }
- t.Run("Prev", func(t *testing.T) {
- // Test the iterator invariant that i.prev().next() == i.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Start(0, 0).Next().Next()
- p := i.Prev()
- if !p.Valid() {
- t.Fatal("i.prev() is invalid")
- }
- if p.Next().Span() != i.Span() {
- t.Fatal("i.prev().next() != i")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- t.Run("Next", func(t *testing.T) {
- // Test the iterator invariant that i.next().prev() == i.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Start(0, 0).Next().Next()
- n := i.Next()
- if !n.Valid() {
- t.Fatal("i.next() is invalid")
- }
- if n.Prev().Span() != i.Span() {
- t.Fatal("i.next().prev() != i")
- }
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- })
- t.Run("EraseOne", func(t *testing.T) {
- // Test that erasing one iterator correctly retains
- // all relationships between elements.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- i := tr.Start(0, 0).Next().Next().Next()
- s := i.Span()
- n := i.Next()
- p := i.Prev()
- tr.Erase(i)
- if n.Prev().Span() != p.Span() {
- t.Fatal("p, n := i.Prev(), i.Next(); n.prev() != p after i was erased")
- }
- if p.Next().Span() != n.Span() {
- t.Fatal("p, n := i.Prev(), i.Next(); p.next() != n after i was erased")
- }
- tr.Insert(s)
- for _, s := range spans {
- tr.RemoveSpan(s)
- }
- })
- t.Run("EraseAll", func(t *testing.T) {
- // Test that erasing iterators actually removes nodes from the treap.
- tr := treap{}
- for _, s := range spans {
- tr.Insert(s)
- }
- for i := tr.Start(0, 0); i.Valid(); {
- n := i.Next()
- tr.Erase(i)
- i = n
- }
- if size := tr.Size(); size != 0 {
- t.Fatalf("should have emptied out treap, %d spans left", size)
- }
- })
-}