diff options
Diffstat (limited to 'libgo/go')
152 files changed, 14893 insertions, 909 deletions
diff --git a/libgo/go/bytes/bytes_test.go b/libgo/go/bytes/bytes_test.go index 26eac5e..ad01952 100644 --- a/libgo/go/bytes/bytes_test.go +++ b/libgo/go/bytes/bytes_test.go @@ -10,6 +10,7 @@ import ( "internal/testenv" "math/rand" "reflect" + "runtime" "strings" "testing" "unicode" @@ -392,7 +393,11 @@ func TestIndexRune(t *testing.T) { } }) if allocs != 0 { - t.Errorf("expected no allocations, got %f", allocs) + if runtime.Compiler == "gccgo" { + t.Log("does not work on gccgo without better escape analysis") + } else { + t.Errorf("expected no allocations, got %f", allocs) + } } } diff --git a/libgo/go/crypto/rand/eagain.go b/libgo/go/crypto/rand/eagain.go index 7ed2f47..045d037 100644 --- a/libgo/go/crypto/rand/eagain.go +++ b/libgo/go/crypto/rand/eagain.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package rand diff --git a/libgo/go/crypto/rand/rand_unix.go b/libgo/go/crypto/rand/rand_unix.go index 631972b..ec474d3 100644 --- a/libgo/go/crypto/rand/rand_unix.go +++ b/libgo/go/crypto/rand/rand_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd plan9 solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd plan9 solaris // Unix cryptographically secure pseudorandom number // generator. diff --git a/libgo/go/crypto/x509/root_aix.go b/libgo/go/crypto/x509/root_aix.go new file mode 100644 index 0000000..de5702d --- /dev/null +++ b/libgo/go/crypto/x509/root_aix.go @@ -0,0 +1,8 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package x509 + +// Possible certificate files; stop after finding one. +var certFiles []string diff --git a/libgo/go/crypto/x509/root_unix.go b/libgo/go/crypto/x509/root_unix.go index 7bcb3d6..c44a524 100644 --- a/libgo/go/crypto/x509/root_unix.go +++ b/libgo/go/crypto/x509/root_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix dragonfly freebsd linux nacl netbsd openbsd solaris package x509 @@ -16,6 +16,7 @@ import ( var certDirectories = []string{ "/etc/ssl/certs", // SLES10/SLES11, https://golang.org/issue/12139 "/system/etc/security/cacerts", // Android + "/var/ssl/certs", // AIX } func (c *Certificate) systemVerify(opts *VerifyOptions) (chains [][]*Certificate, err error) { diff --git a/libgo/go/go/build/syslist.go b/libgo/go/go/build/syslist.go index ea316ea..fe9820e 100644 --- a/libgo/go/go/build/syslist.go +++ b/libgo/go/go/build/syslist.go @@ -4,5 +4,5 @@ package build -const goosList = "android darwin dragonfly freebsd linux nacl netbsd openbsd plan9 solaris windows zos " +const goosList = "aix android darwin dragonfly freebsd linux nacl netbsd openbsd plan9 solaris windows zos " const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be alpha m68k ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le mipso32 mipsn32 mipsn64 mipso64 ppc s390 s390x sparc sparc64 " diff --git a/libgo/go/math/atan.go b/libgo/go/math/atan.go index d942bce..4c9eda4 100644 --- a/libgo/go/math/atan.go +++ b/libgo/go/math/atan.go @@ -97,6 +97,9 @@ func satan(x float64) float64 { func libc_atan(float64) float64 func Atan(x float64) float64 { + if x == 0 { + return x + } return libc_atan(x) } diff --git a/libgo/go/math/expm1.go b/libgo/go/math/expm1.go index a0a62d1..7494043 100644 --- a/libgo/go/math/expm1.go +++ b/libgo/go/math/expm1.go @@ -126,6 +126,9 @@ package math func libc_expm1(float64) float64 func Expm1(x float64) float64 { + if x == 0 { + return x + } return libc_expm1(x) } diff --git a/libgo/go/math/log1p.go b/libgo/go/math/log1p.go index ef1c7de..044495a 100644 --- a/libgo/go/math/log1p.go +++ b/libgo/go/math/log1p.go @@ -97,6 +97,9 @@ package math func libc_log1p(float64) float64 func Log1p(x float64) float64 { + if x == 0 { + return x + } return libc_log1p(x) } diff --git a/libgo/go/mime/type_unix.go b/libgo/go/mime/type_unix.go index bb06a77..8e177ca 100644 --- a/libgo/go/mime/type_unix.go +++ b/libgo/go/mime/type_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package mime diff --git a/libgo/go/net/addrselect.go b/libgo/go/net/addrselect.go index 1ab9fc53..7c0dfe2 100644 --- a/libgo/go/net/addrselect.go +++ b/libgo/go/net/addrselect.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // Minimal RFC 6724 address selection. diff --git a/libgo/go/net/cgo_aix.go b/libgo/go/net/cgo_aix.go new file mode 100644 index 0000000..4f23d9b --- /dev/null +++ b/libgo/go/net/cgo_aix.go @@ -0,0 +1,13 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build cgo,!netgo + +package net + +import ( + "syscall" +) + +const cgoAddrInfoFlags = syscall.AI_CANONNAME diff --git a/libgo/go/net/cgo_resnew.go b/libgo/go/net/cgo_resnew.go index ebca1bd..81b39c9 100644 --- a/libgo/go/net/cgo_resnew.go +++ b/libgo/go/net/cgo_resnew.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // +build cgo,!netgo -// +build darwin linux,!android netbsd solaris +// +build aix darwin linux,!android netbsd solaris package net diff --git a/libgo/go/net/cgo_sockold.go b/libgo/go/net/cgo_sockold.go index 432634b..25d4f67 100644 --- a/libgo/go/net/cgo_sockold.go +++ b/libgo/go/net/cgo_sockold.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // +build cgo,!netgo -// +build darwin dragonfly freebsd netbsd openbsd +// +build aix darwin dragonfly freebsd netbsd openbsd package net diff --git a/libgo/go/net/cgo_unix.go b/libgo/go/net/cgo_unix.go index a90aaa9..09cfb2a 100644 --- a/libgo/go/net/cgo_unix.go +++ b/libgo/go/net/cgo_unix.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // +build cgo,!netgo -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/cgo_unix_test.go b/libgo/go/net/cgo_unix_test.go index e861c7a..e579198 100644 --- a/libgo/go/net/cgo_unix_test.go +++ b/libgo/go/net/cgo_unix_test.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // +build cgo,!netgo -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/conf.go b/libgo/go/net/conf.go index c10aafe..a798699 100644 --- a/libgo/go/net/conf.go +++ b/libgo/go/net/conf.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/dial_unix_test.go b/libgo/go/net/dial_unix_test.go index 4705254..d5c6dde2 100644 --- a/libgo/go/net/dial_unix_test.go +++ b/libgo/go/net/dial_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/dnsclient_unix.go b/libgo/go/net/dnsclient_unix.go index 4dd4e16..0647b9c 100644 --- a/libgo/go/net/dnsclient_unix.go +++ b/libgo/go/net/dnsclient_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // DNS client: see RFC 1035. // Has to be linked into package net for Dial. diff --git a/libgo/go/net/dnsclient_unix_test.go b/libgo/go/net/dnsclient_unix_test.go index 85267bb..c66d2d1 100644 --- a/libgo/go/net/dnsclient_unix_test.go +++ b/libgo/go/net/dnsclient_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/dnsconfig_unix.go b/libgo/go/net/dnsconfig_unix.go index 9c8108d..24487af 100644 --- a/libgo/go/net/dnsconfig_unix.go +++ b/libgo/go/net/dnsconfig_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // Read system DNS config from /etc/resolv.conf diff --git a/libgo/go/net/dnsconfig_unix_test.go b/libgo/go/net/dnsconfig_unix_test.go index 37bdeb0..0797559 100644 --- a/libgo/go/net/dnsconfig_unix_test.go +++ b/libgo/go/net/dnsconfig_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/fd_poll_runtime.go b/libgo/go/net/fd_poll_runtime.go index 62b69fc..4ea92cb 100644 --- a/libgo/go/net/fd_poll_runtime.go +++ b/libgo/go/net/fd_poll_runtime.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd windows solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd windows solaris package net diff --git a/libgo/go/net/fd_posix.go b/libgo/go/net/fd_posix.go index b4b908a..7230479 100644 --- a/libgo/go/net/fd_posix.go +++ b/libgo/go/net/fd_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/net/fd_unix.go b/libgo/go/net/fd_unix.go index 9bc5ebc..b6ee059 100644 --- a/libgo/go/net/fd_unix.go +++ b/libgo/go/net/fd_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package net diff --git a/libgo/go/net/file_unix.go b/libgo/go/net/file_unix.go index 9e581fc..b47a614 100644 --- a/libgo/go/net/file_unix.go +++ b/libgo/go/net/file_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/hook_unix.go b/libgo/go/net/hook_unix.go index cf52567..b2522a2 100644 --- a/libgo/go/net/hook_unix.go +++ b/libgo/go/net/hook_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package net diff --git a/libgo/go/net/interface_stub.go b/libgo/go/net/interface_stub.go index 3b0a1ae..6d7147e 100644 --- a/libgo/go/net/interface_stub.go +++ b/libgo/go/net/interface_stub.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build nacl +// +build aix nacl package net diff --git a/libgo/go/net/internal/socktest/switch_unix.go b/libgo/go/net/internal/socktest/switch_unix.go index 14c0c22..8fb15f3 100644 --- a/libgo/go/net/internal/socktest/switch_unix.go +++ b/libgo/go/net/internal/socktest/switch_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package socktest diff --git a/libgo/go/net/iprawsock_posix.go b/libgo/go/net/iprawsock_posix.go index 8f4b702..16e65dc 100644 --- a/libgo/go/net/iprawsock_posix.go +++ b/libgo/go/net/iprawsock_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/net/ipsock_posix.go b/libgo/go/net/ipsock_posix.go index ff280c3..05bf939 100644 --- a/libgo/go/net/ipsock_posix.go +++ b/libgo/go/net/ipsock_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/net/lookup_unix.go b/libgo/go/net/lookup_unix.go index be2ced9..f96c8be 100644 --- a/libgo/go/net/lookup_unix.go +++ b/libgo/go/net/lookup_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/main_unix_test.go b/libgo/go/net/main_unix_test.go index 0cc129f..8c8f944 100644 --- a/libgo/go/net/main_unix_test.go +++ b/libgo/go/net/main_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package net diff --git a/libgo/go/net/nss.go b/libgo/go/net/nss.go index 08c3e6a..1650f5e 100644 --- a/libgo/go/net/nss.go +++ b/libgo/go/net/nss.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package net diff --git a/libgo/go/net/port_unix.go b/libgo/go/net/port_unix.go index 868d1e4..3120ba1 100644 --- a/libgo/go/net/port_unix.go +++ b/libgo/go/net/port_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris nacl +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris nacl // Read system port mappings from /etc/services diff --git a/libgo/go/net/sendfile_stub.go b/libgo/go/net/sendfile_stub.go index 905f1d6..f043062 100644 --- a/libgo/go/net/sendfile_stub.go +++ b/libgo/go/net/sendfile_stub.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin nacl netbsd openbsd +// +build aix darwin nacl netbsd openbsd package net diff --git a/libgo/go/net/sock_posix.go b/libgo/go/net/sock_posix.go index 16351e1..6bbfd12 100644 --- a/libgo/go/net/sock_posix.go +++ b/libgo/go/net/sock_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/net/sock_stub.go b/libgo/go/net/sock_stub.go index 5ac1e86..d1ec029 100644 --- a/libgo/go/net/sock_stub.go +++ b/libgo/go/net/sock_stub.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build nacl solaris +// +build aix nacl solaris package net diff --git a/libgo/go/net/sockopt_aix.go b/libgo/go/net/sockopt_aix.go new file mode 100644 index 0000000..7aef64b --- /dev/null +++ b/libgo/go/net/sockopt_aix.go @@ -0,0 +1,34 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package net + +import ( + "os" + "syscall" +) + +// This was copied from sockopt_linux.go + +func setDefaultSockopts(s, family, sotype int, ipv6only bool) error { + if family == syscall.AF_INET6 && sotype != syscall.SOCK_RAW { + // Allow both IP versions even if the OS default + // is otherwise. Note that some operating systems + // never admit this option. + syscall.SetsockoptInt(s, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, boolint(ipv6only)) + } + // Allow broadcast. + return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_BROADCAST, 1)) +} + +func setDefaultListenerSockopts(s int) error { + // Allow reuse of recently-used addresses. + return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_REUSEADDR, 1)) +} + +func setDefaultMulticastSockopts(s int) error { + // Allow multicast UDP and raw IP datagram sockets to listen + // concurrently across multiple listeners. + return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_REUSEADDR, 1)) +} diff --git a/libgo/go/net/sockopt_posix.go b/libgo/go/net/sockopt_posix.go index cd3d562..cacd048 100644 --- a/libgo/go/net/sockopt_posix.go +++ b/libgo/go/net/sockopt_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows package net diff --git a/libgo/go/net/sockoptip_aix.go b/libgo/go/net/sockoptip_aix.go new file mode 100644 index 0000000..1e28fe6 --- /dev/null +++ b/libgo/go/net/sockoptip_aix.go @@ -0,0 +1,15 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package net + +import "syscall" + +func setIPv4MulticastInterface(fd *netFD, ifi *Interface) error { + return syscall.ENOPROTOOPT +} + +func setIPv4MulticastLoopback(fd *netFD, v bool) error { + return syscall.ENOPROTOOPT +} diff --git a/libgo/go/net/sockoptip_posix.go b/libgo/go/net/sockoptip_posix.go index d508860..4afd4c8 100644 --- a/libgo/go/net/sockoptip_posix.go +++ b/libgo/go/net/sockoptip_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd windows +// +build aix darwin dragonfly freebsd linux netbsd openbsd windows package net diff --git a/libgo/go/net/sys_cloexec.go b/libgo/go/net/sys_cloexec.go index ba266e6..f2ea842 100644 --- a/libgo/go/net/sys_cloexec.go +++ b/libgo/go/net/sys_cloexec.go @@ -5,7 +5,7 @@ // This file implements sysSocket and accept for platforms that do not // provide a fast path for setting SetNonblock and CloseOnExec. -// +build darwin dragonfly nacl netbsd openbsd solaris +// +build aix darwin dragonfly nacl netbsd openbsd solaris package net diff --git a/libgo/go/net/tcpsock_posix.go b/libgo/go/net/tcpsock_posix.go index 9641e5c..7533c24 100644 --- a/libgo/go/net/tcpsock_posix.go +++ b/libgo/go/net/tcpsock_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/net/tcpsockopt_posix.go b/libgo/go/net/tcpsockopt_posix.go index 805b56b..36866ac 100644 --- a/libgo/go/net/tcpsockopt_posix.go +++ b/libgo/go/net/tcpsockopt_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows package net diff --git a/libgo/go/net/tcpsockopt_unix.go b/libgo/go/net/tcpsockopt_unix.go index 8d44fb2..46e5e6d 100644 --- a/libgo/go/net/tcpsockopt_unix.go +++ b/libgo/go/net/tcpsockopt_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build freebsd linux netbsd +// +build aix freebsd linux netbsd package net diff --git a/libgo/go/net/udpsock_posix.go b/libgo/go/net/udpsock_posix.go index 72aadca..0c905af 100644 --- a/libgo/go/net/udpsock_posix.go +++ b/libgo/go/net/udpsock_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/net/unixsock_posix.go b/libgo/go/net/unixsock_posix.go index a8f892e..945aa03 100644 --- a/libgo/go/net/unixsock_posix.go +++ b/libgo/go/net/unixsock_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package net diff --git a/libgo/go/os/dir_gccgo.go b/libgo/go/os/dir_gccgo.go index d811c9f..8923f0e 100644 --- a/libgo/go/os/dir_gccgo.go +++ b/libgo/go/os/dir_gccgo.go @@ -6,6 +6,7 @@ package os import ( "io" + "runtime" "sync/atomic" "syscall" "unsafe" @@ -81,6 +82,11 @@ func (file *File) readdirnames(n int) (names []string, err error) { syscall.Entersyscall() i := libc_readdir_r(file.dirinfo.dir, entryDirent, pr) syscall.Exitsyscall() + // On AIX when readdir_r hits EOF it sets dirent to nil and returns 9. + // https://www.ibm.com/support/knowledgecenter/ssw_aix_71/com.ibm.aix.basetrf2/readdir_r.htm + if runtime.GOOS == "aix" && i == 9 && dirent == nil { + break + } if i != 0 { return names, NewSyscallError("readdir_r", i) } diff --git a/libgo/go/os/dir_largefile.go b/libgo/go/os/dir_largefile.go index 2873342..75df6a4 100644 --- a/libgo/go/os/dir_largefile.go +++ b/libgo/go/os/dir_largefile.go @@ -5,7 +5,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build linux solaris,386 solaris,sparc +// +build aix linux solaris,386 solaris,sparc package os diff --git a/libgo/go/os/dir_regfile.go b/libgo/go/os/dir_regfile.go index 8b17f38..02ddd7b 100644 --- a/libgo/go/os/dir_regfile.go +++ b/libgo/go/os/dir_regfile.go @@ -5,6 +5,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !aix // +build !linux // +build !solaris !386 // +build !solaris !sparc diff --git a/libgo/go/os/dir_unix.go b/libgo/go/os/dir_unix.go index cd42f59..2dc6a89 100644 --- a/libgo/go/os/dir_unix.go +++ b/libgo/go/os/dir_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/error_unix.go b/libgo/go/os/error_unix.go index be1440c..2349851 100644 --- a/libgo/go/os/error_unix.go +++ b/libgo/go/os/error_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/exec/lp_unix.go b/libgo/go/os/exec/lp_unix.go index 7a30275..20ce7a4 100644 --- a/libgo/go/os/exec/lp_unix.go +++ b/libgo/go/os/exec/lp_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package exec diff --git a/libgo/go/os/exec_posix.go b/libgo/go/os/exec_posix.go index 3cf38b68..9e792b4 100644 --- a/libgo/go/os/exec_posix.go +++ b/libgo/go/os/exec_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package os diff --git a/libgo/go/os/exec_unix.go b/libgo/go/os/exec_unix.go index c4999db..d6433bf 100644 --- a/libgo/go/os/exec_unix.go +++ b/libgo/go/os/exec_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/executable_path.go b/libgo/go/os/executable_path.go new file mode 100644 index 0000000..117320d --- /dev/null +++ b/libgo/go/os/executable_path.go @@ -0,0 +1,104 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build aix + +package os + +// We query the working directory at init, to use it later to search for the +// executable file +// errWd will be checked later, if we need to use initWd +var initWd, errWd = Getwd() + +func executable() (string, error) { + var err error + var exePath string + if len(Args) == 0 || Args[0] == "" { + return "", ErrNotExist + } + // Args[0] is an absolute path : this is the executable + if IsPathSeparator(Args[0][0]) { + exePath = Args[0] + } else { + for i := 1; i < len(Args[0]); i++ { + // Args[0] is a relative path : append current directory + if IsPathSeparator(Args[0][i]) { + if errWd != nil { + return "", errWd + } + exePath = initWd + string(PathSeparator) + Args[0] + break + } + } + } + if exePath != "" { + err = isExecutable(exePath) + if err == nil { + return exePath, nil + } + // File does not exist or is not executable, + // this is an unexpected situation ! + return "", err + } + // Search for executable in $PATH + for _, dir := range splitPathList(Getenv("PATH")) { + if len(dir) == 0 { + continue + } + if !IsPathSeparator(dir[0]) { + if errWd != nil { + return "", errWd + } + dir = initWd + string(PathSeparator) + dir + } + exePath = dir + string(PathSeparator) + Args[0] + err = isExecutable(exePath) + if err == nil { + return exePath, nil + } + if err == ErrPermission { + return "", err + } + } + return "", ErrNotExist +} + +// isExecutable returns an error if a given file is not an executable. +func isExecutable(path string) error { + stat, err := Stat(path) + if err != nil { + return err + } + mode := stat.Mode() + if !mode.IsRegular() { + return ErrPermission + } + if (mode & 0111) != 0 { + return nil + } + return ErrPermission +} + +// splitPathList splits a path list. +// This is based on genSplit from strings/strings.go +func splitPathList(pathList string) []string { + n := 1 + for i := 0; i < len(pathList); i++ { + if pathList[i] == PathListSeparator { + n++ + } + } + start := 0 + a := make([]string, n) + na := 0 + for i := 0; i+1 <= len(pathList) && na+1 < n; i++ { + if pathList[i] == PathListSeparator { + a[na] = pathList[start:i] + na++ + start = i + 1 + } + } + a[na] = pathList[start:] + return a[:na+1] +} diff --git a/libgo/go/os/file_posix.go b/libgo/go/os/file_posix.go index d817f34..6634112 100644 --- a/libgo/go/os/file_posix.go +++ b/libgo/go/os/file_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package os @@ -19,6 +19,10 @@ func Readlink(name string) (string, error) { for len := 128; ; len *= 2 { b := make([]byte, len) n, e := fixCount(syscall.Readlink(fixLongPath(name), b)) + // buffer too small + if e == syscall.ERANGE { + continue + } if e != nil { return "", &PathError{"readlink", name, e} } diff --git a/libgo/go/os/file_unix.go b/libgo/go/os/file_unix.go index 54b5dfd..1bba4ed 100644 --- a/libgo/go/os/file_unix.go +++ b/libgo/go/os/file_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/path_unix.go b/libgo/go/os/path_unix.go index ecf098c..bc0f239 100644 --- a/libgo/go/os/path_unix.go +++ b/libgo/go/os/path_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/pipe_bsd.go b/libgo/go/os/pipe_bsd.go index 3b81ed2..ebe198b 100644 --- a/libgo/go/os/pipe_bsd.go +++ b/libgo/go/os/pipe_bsd.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/signal/signal_test.go b/libgo/go/os/signal/signal_test.go index 406102c..c8409e7 100644 --- a/libgo/go/os/signal/signal_test.go +++ b/libgo/go/os/signal/signal_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package signal diff --git a/libgo/go/os/signal/signal_unix.go b/libgo/go/os/signal/signal_unix.go index 01b1b14..5ec7e97 100644 --- a/libgo/go/os/signal/signal_unix.go +++ b/libgo/go/os/signal/signal_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package signal diff --git a/libgo/go/os/stat.go b/libgo/go/os/stat.go index 59cac9c..564215b 100644 --- a/libgo/go/os/stat.go +++ b/libgo/go/os/stat.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !aix // +build !darwin // +build !freebsd // +build !linux diff --git a/libgo/go/os/stat_atim.go b/libgo/go/os/stat_atim.go index ef8a574..82481c0 100644 --- a/libgo/go/os/stat_atim.go +++ b/libgo/go/os/stat_atim.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build linux openbsd solaristag +// +build aix linux openbsd solaristag package os diff --git a/libgo/go/os/stat_unix.go b/libgo/go/os/stat_unix.go index 1733d3f..043aefe 100644 --- a/libgo/go/os/stat_unix.go +++ b/libgo/go/os/stat_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package os diff --git a/libgo/go/os/sys_uname.go b/libgo/go/os/sys_uname.go index 71fa867..89fbff8 100644 --- a/libgo/go/os/sys_uname.go +++ b/libgo/go/os/sys_uname.go @@ -4,7 +4,7 @@ // For systems which only store the hostname in uname (Solaris). -// +build solaris irix rtems +// +build aix solaris irix rtems package os diff --git a/libgo/go/os/sys_unix.go b/libgo/go/os/sys_unix.go index 39c20dc..4caf8bd 100644 --- a/libgo/go/os/sys_unix.go +++ b/libgo/go/os/sys_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build dragonfly linux netbsd openbsd solaris +// +build aix dragonfly linux netbsd openbsd solaris package os diff --git a/libgo/go/os/user/decls_aix.go b/libgo/go/os/user/decls_aix.go new file mode 100644 index 0000000..64455b5 --- /dev/null +++ b/libgo/go/os/user/decls_aix.go @@ -0,0 +1,24 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package user + +import "syscall" + +// Declarations for the libc functions on AIX. + +//extern _posix_getpwnam_r +func libc_getpwnam_r(name *byte, pwd *syscall.Passwd, buf *byte, buflen syscall.Size_t, result **syscall.Passwd) int + +//extern _posix_getpwuid_r +func libc_getpwuid_r(uid syscall.Uid_t, pwd *syscall.Passwd, buf *byte, buflen syscall.Size_t, result **syscall.Passwd) int + +//extern _posix_getgrnam_r +func libc_getgrnam_r(name *byte, grp *syscall.Group, buf *byte, buflen syscall.Size_t, result **syscall.Group) int + +//extern _posix_getgrgid_r +func libc_getgrgid_r(gid syscall.Gid_t, grp *syscall.Group, buf *byte, buflen syscall.Size_t, result **syscall.Group) int + +//extern getgrset +func libc_getgrset(user *byte) *byte diff --git a/libgo/go/os/user/listgroups_aix.go b/libgo/go/os/user/listgroups_aix.go new file mode 100644 index 0000000..5b9f3f9 --- /dev/null +++ b/libgo/go/os/user/listgroups_aix.go @@ -0,0 +1,11 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package user + +import "fmt" + +func listGroups(u *User) ([]string, error) { + return nil, fmt.Errorf("user: list groups for %s: not supported on AIX", u.Username) +} diff --git a/libgo/go/os/user/lookup_unix.go b/libgo/go/os/user/lookup_unix.go index 8881366..9670ada 100644 --- a/libgo/go/os/user/lookup_unix.go +++ b/libgo/go/os/user/lookup_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd !android,linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd !android,linux netbsd openbsd solaris // +build cgo package user diff --git a/libgo/go/os/wait_unimp.go b/libgo/go/os/wait_unimp.go index 7059e59..0378b83 100644 --- a/libgo/go/os/wait_unimp.go +++ b/libgo/go/os/wait_unimp.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build dragonfly nacl netbsd openbsd solaris +// +build aix dragonfly nacl netbsd openbsd solaris package os diff --git a/libgo/go/path/filepath/path_unix.go b/libgo/go/path/filepath/path_unix.go index d77ff24..2d407a8 100644 --- a/libgo/go/path/filepath/path_unix.go +++ b/libgo/go/path/filepath/path_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package filepath diff --git a/libgo/go/reflect/type.go b/libgo/go/reflect/type.go index 0325260..3ae0f18 100644 --- a/libgo/go/reflect/type.go +++ b/libgo/go/reflect/type.go @@ -259,20 +259,21 @@ const ( // with a unique tag like `reflect:"array"` or `reflect:"ptr"` // so that code cannot convert from, say, *arrayType to *ptrType. type rtype struct { - kind uint8 // enumeration for C - align int8 // alignment of variable with this type - fieldAlign uint8 // alignment of struct field with this type - _ uint8 // unused/padding size uintptr - hash uint32 // hash of type; avoids computation in hash tables + ptrdata uintptr // size of memory prefix holding all pointers + hash uint32 // hash of type; avoids computation in hash tables + kind uint8 // enumeration for C + align int8 // alignment of variable with this type + fieldAlign uint8 // alignment of struct field with this type + _ uint8 // unused/padding hashfn func(unsafe.Pointer, uintptr) uintptr // hash function equalfn func(unsafe.Pointer, unsafe.Pointer) bool // equality function - gc unsafe.Pointer // garbage collection data - string *string // string form; unnecessary but undeniably useful - *uncommonType // (relatively) uncommon fields - ptrToThis *rtype // type for pointer to this type, if used in binary or has methods + gcdata *byte // garbage collection data + string *string // string form; unnecessary but undeniably useful + *uncommonType // (relatively) uncommon fields + ptrToThis *rtype // type for pointer to this type, if used in binary or has methods } // Method on non-interface type @@ -382,24 +383,6 @@ type structType struct { fields []structField // sorted by offset } -// NOTE: These are copied from ../runtime/mgc0.h. -// They must be kept in sync. -const ( - _GC_END = iota - _GC_PTR - _GC_APTR - _GC_ARRAY_START - _GC_ARRAY_NEXT - _GC_CALL - _GC_CHAN_PTR - _GC_STRING - _GC_EFACE - _GC_IFACE - _GC_SLICE - _GC_REGION - _GC_NUM_INSTR -) - /* * The compiler knows the exact layout of all the data structures above. * The compiler does not know about the data structures and methods below. @@ -1098,32 +1081,6 @@ var ptrMap struct { m map[*rtype]*ptrType } -// garbage collection bytecode program for pointer to memory without pointers. -// See ../../cmd/gc/reflect.c:/^dgcsym1 and :/^dgcsym. -type ptrDataGC struct { - width uintptr // sizeof(ptr) - op uintptr // _GC_APTR - off uintptr // 0 - end uintptr // _GC_END -} - -var ptrDataGCProg = ptrDataGC{ - width: unsafe.Sizeof((*byte)(nil)), - op: _GC_APTR, - off: 0, - end: _GC_END, -} - -// garbage collection bytecode program for pointer to memory with pointers. -// See ../../cmd/gc/reflect.c:/^dgcsym1 and :/^dgcsym. -type ptrGC struct { - width uintptr // sizeof(ptr) - op uintptr // _GC_PTR - off uintptr // 0 - elemgc unsafe.Pointer // element gc type - end uintptr // _GC_END -} - // PtrTo returns the pointer type with element t. // For example, if t represents type Foo, PtrTo(t) represents *Foo. func PtrTo(t Type) Type { @@ -1189,18 +1146,6 @@ func (t *rtype) ptrTo() *rtype { pp.ptrToThis = nil pp.elem = t - if t.kind&kindNoPointers != 0 { - pp.gc = unsafe.Pointer(&ptrDataGCProg) - } else { - pp.gc = unsafe.Pointer(&ptrGC{ - width: pp.size, - op: _GC_PTR, - off: 0, - elemgc: t.gc, - end: _GC_END, - }) - } - q := canonicalize(&pp.rtype) p = (*ptrType)(unsafe.Pointer(q.(*rtype))) @@ -1507,16 +1452,6 @@ func cachePut(k cacheKey, t *rtype) Type { return t } -// garbage collection bytecode program for chan. -// See ../../cmd/gc/reflect.c:/^dgcsym1 and :/^dgcsym. -type chanGC struct { - width uintptr // sizeof(map) - op uintptr // _GC_CHAN_PTR - off uintptr // 0 - typ *rtype // map type - end uintptr // _GC_END -} - // The funcLookupCache caches FuncOf lookups. // FuncOf does not share the common lookupCache since cacheKey is not // sufficient to represent functions unambiguously. @@ -1584,17 +1519,6 @@ func ChanOf(dir ChanDir, t Type) Type { ch.uncommonType = nil ch.ptrToThis = nil - ch.gc = unsafe.Pointer(&chanGC{ - width: ch.size, - op: _GC_CHAN_PTR, - off: 0, - typ: &ch.rtype, - end: _GC_END, - }) - - // INCORRECT. Uncomment to check that TestChanOfGC fails when ch.gc is wrong. - // ch.gc = unsafe.Pointer(&badGC{width: ch.size, end: _GC_END}) - return cachePut(ckey, &ch.rtype) } @@ -1733,9 +1657,6 @@ func FuncOf(in, out []Type, variadic bool) Type { ft.uncommonType = nil ft.ptrToThis = nil - // TODO(cmang): Generate GC data for funcs. - ft.gc = unsafe.Pointer(&ptrDataGCProg) - funcLookupCache.m[hash] = append(funcLookupCache.m[hash], &ft.rtype) return toType(&ft.rtype) @@ -1859,8 +1780,8 @@ func bucketOf(ktyp, etyp *rtype) *rtype { // and it's easier to generate a pointer bitmap than a GC program. // Note that since the key and value are known to be <= 128 bytes, // they're guaranteed to have bitmaps instead of GC programs. - // var gcdata *byte - // var ptrdata uintptr + var gcdata *byte + var ptrdata uintptr size := bucketSize size = align(size, uintptr(ktyp.fieldAlign)) @@ -1875,37 +1796,63 @@ func bucketOf(ktyp, etyp *rtype) *rtype { if maxAlign > ptrSize { size = align(size, maxAlign) size += align(ptrSize, maxAlign) - ptrSize + } else if maxAlign < ptrSize { + size = align(size, ptrSize) + maxAlign = ptrSize } ovoff := size size += ptrSize - if maxAlign < ptrSize { - maxAlign = ptrSize - } - var gcPtr unsafe.Pointer if kind != kindNoPointers { - gc := []uintptr{size} - base := bucketSize - base = align(base, uintptr(ktyp.fieldAlign)) + nptr := size / ptrSize + mask := make([]byte, (nptr+7)/8) + psize := bucketSize + psize = align(psize, uintptr(ktyp.fieldAlign)) + base := psize / ptrSize + if ktyp.kind&kindNoPointers == 0 { - gc = append(gc, _GC_ARRAY_START, base, bucketSize, ktyp.size) - gc = appendGCProgram(gc, ktyp, 0) - gc = append(gc, _GC_ARRAY_NEXT) + if ktyp.kind&kindGCProg != 0 { + panic("reflect: unexpected GC program in MapOf") + } + kmask := (*[16]byte)(unsafe.Pointer(ktyp.gcdata)) + for i := uintptr(0); i < ktyp.size/ptrSize; i++ { + if (kmask[i/8]>>(i%8))&1 != 0 { + for j := uintptr(0); j < bucketSize; j++ { + word := base + j*ktyp.size/ptrSize + i + mask[word/8] |= 1 << (word % 8) + } + } + } } - base += ktyp.size * bucketSize - base = align(base, uintptr(etyp.fieldAlign)) + psize += bucketSize * ktyp.size + psize = align(psize, uintptr(etyp.fieldAlign)) + base = psize / ptrSize + if etyp.kind&kindNoPointers == 0 { - gc = append(gc, _GC_ARRAY_START, base, bucketSize, etyp.size) - gc = appendGCProgram(gc, etyp, 0) - gc = append(gc, _GC_ARRAY_NEXT) + if etyp.kind&kindGCProg != 0 { + panic("reflect: unexpected GC program in MapOf") + } + emask := (*[16]byte)(unsafe.Pointer(etyp.gcdata)) + for i := uintptr(0); i < etyp.size/ptrSize; i++ { + if (emask[i/8]>>(i%8))&1 != 0 { + for j := uintptr(0); j < bucketSize; j++ { + word := base + j*etyp.size/ptrSize + i + mask[word/8] |= 1 << (word % 8) + } + } + } + } + + word := ovoff / ptrSize + mask[word/8] |= 1 << (word % 8) + gcdata = &mask[0] + ptrdata = (word + 1) * ptrSize + + // overflow word must be last + if ptrdata != size { + panic("reflect: bad layout computation in MapOf") } - gc = append(gc, _GC_APTR, ovoff, _GC_END) - gcPtr = unsafe.Pointer(&gc[0]) - } else { - // No pointers in bucket. - gc := [...]uintptr{size, _GC_END} - gcPtr = unsafe.Pointer(&gc[0]) } b := &rtype{ @@ -1913,102 +1860,14 @@ func bucketOf(ktyp, etyp *rtype) *rtype { fieldAlign: uint8(maxAlign), size: size, kind: kind, - gc: gcPtr, + ptrdata: ptrdata, + gcdata: gcdata, } s := "bucket(" + *ktyp.string + "," + *etyp.string + ")" b.string = &s return b } -// Take the GC program for "t" and append it to the GC program "gc". -func appendGCProgram(gc []uintptr, t *rtype, offset uintptr) []uintptr { - p := t.gc - p = unsafe.Pointer(uintptr(p) + unsafe.Sizeof(uintptr(0))) // skip size -loop: - for { - var argcnt int - switch *(*uintptr)(p) { - case _GC_END: - // Note: _GC_END not included in append - break loop - case _GC_ARRAY_NEXT: - argcnt = 0 - case _GC_APTR, _GC_STRING, _GC_EFACE, _GC_IFACE: - argcnt = 1 - case _GC_PTR, _GC_CALL, _GC_CHAN_PTR, _GC_SLICE: - argcnt = 2 - case _GC_ARRAY_START, _GC_REGION: - argcnt = 3 - default: - panic("unknown GC program op for " + *t.string + ": " + strconv.FormatUint(*(*uint64)(p), 10)) - } - for i := 0; i < argcnt+1; i++ { - v := *(*uintptr)(p) - if i == 1 { - v += offset - } - gc = append(gc, v) - p = unsafe.Pointer(uintptr(p) + unsafe.Sizeof(uintptr(0))) - } - } - return gc -} -func hMapOf(bucket *rtype) *rtype { - ptrsize := unsafe.Sizeof(uintptr(0)) - - // make gc program & compute hmap size - gc := make([]uintptr, 1) // first entry is size, filled in at the end - offset := unsafe.Sizeof(uint(0)) // count - offset += unsafe.Sizeof(uint32(0)) // flags - offset += unsafe.Sizeof(uint32(0)) // hash0 - offset += unsafe.Sizeof(uint8(0)) // B - offset += unsafe.Sizeof(uint8(0)) // keysize - offset += unsafe.Sizeof(uint8(0)) // valuesize - offset = (offset + 1) / 2 * 2 - offset += unsafe.Sizeof(uint16(0)) // bucketsize - offset = (offset + ptrsize - 1) / ptrsize * ptrsize - // gc = append(gc, _GC_PTR, offset, uintptr(bucket.gc)) // buckets - offset += ptrsize - // gc = append(gc, _GC_PTR, offset, uintptr(bucket.gc)) // oldbuckets - offset += ptrsize - offset += ptrsize // nevacuate - gc = append(gc, _GC_END) - gc[0] = offset - - h := new(rtype) - h.size = offset - // h.gc = unsafe.Pointer(&gc[0]) - s := "hmap(" + *bucket.string + ")" - h.string = &s - return h -} - -// garbage collection bytecode program for slice of non-zero-length values. -// See ../../cmd/gc/reflect.c:/^dgcsym1 and :/^dgcsym. -type sliceGC struct { - width uintptr // sizeof(slice) - op uintptr // _GC_SLICE - off uintptr // 0 - elemgc unsafe.Pointer // element gc program - end uintptr // _GC_END -} - -// garbage collection bytecode program for slice of zero-length values. -// See ../../cmd/gc/reflect.c:/^dgcsym1 and :/^dgcsym. -type sliceEmptyGC struct { - width uintptr // sizeof(slice) - op uintptr // _GC_APTR - off uintptr // 0 - end uintptr // _GC_END -} - -var sliceEmptyGCProg = sliceEmptyGC{ - width: unsafe.Sizeof([]byte(nil)), - op: _GC_APTR, - off: 0, - end: _GC_END, -} - // SliceOf returns the slice type with element type t. // For example, if t represents int, SliceOf(t) represents []int. func SliceOf(t Type) Type { @@ -2037,21 +1896,6 @@ func SliceOf(t Type) Type { slice.uncommonType = nil slice.ptrToThis = nil - if typ.size == 0 { - slice.gc = unsafe.Pointer(&sliceEmptyGCProg) - } else { - slice.gc = unsafe.Pointer(&sliceGC{ - width: slice.size, - op: _GC_SLICE, - off: 0, - elemgc: typ.gc, - end: _GC_END, - }) - } - - // INCORRECT. Uncomment to check that TestSliceOfOfGC fails when slice.gc is wrong. - // slice.gc = unsafe.Pointer(&badGC{width: slice.size, end: _GC_END}) - return cachePut(ckey, &slice.rtype) } @@ -2073,15 +1917,18 @@ var structLookupCache struct { // This limitation may be lifted in a future version. func StructOf(fields []StructField) Type { var ( - hash = uint32(0) - size uintptr - typalign int8 + hash = uint32(0) + size uintptr + typalign int8 + comparable = true + hashable = true fs = make([]structField, len(fields)) repr = make([]byte, 0, 64) fset = map[string]struct{}{} // fields' names - hasPtr = false // records whether at least one struct-field is a pointer + hasPtr = false // records whether at least one struct-field is a pointer + hasGCProg = false // records whether a struct-field type has a GCProg ) lastzero := uintptr(0) @@ -2092,6 +1939,9 @@ func StructOf(fields []StructField) Type { } f := runtimeStructField(field) ft := f.typ + if ft.kind&kindGCProg != 0 { + hasGCProg = true + } if ft.pointers() { hasPtr = true } @@ -2156,6 +2006,9 @@ func StructOf(fields []StructField) Type { repr = append(repr, ';') } + comparable = comparable && (ft.equalfn != nil) + hashable = hashable && (ft.hashfn != nil) + f.offset = align(size, uintptr(ft.fieldAlign)) if int8(ft.fieldAlign) > typalign { typalign = int8(ft.fieldAlign) @@ -2228,36 +2081,95 @@ func StructOf(fields []StructField) Type { typ.fieldAlign = uint8(typalign) if !hasPtr { typ.kind |= kindNoPointers - gc := [...]uintptr{size, _GC_END} - typ.gc = unsafe.Pointer(&gc[0]) } else { typ.kind &^= kindNoPointers - gc := []uintptr{size} - for _, ft := range fs { - gc = appendGCProgram(gc, ft.typ, ft.offset) + } + + if hasGCProg { + lastPtrField := 0 + for i, ft := range fs { + if ft.typ.pointers() { + lastPtrField = i + } + } + prog := []byte{0, 0, 0, 0} // will be length of prog + for i, ft := range fs { + if i > lastPtrField { + // gcprog should not include anything for any field after + // the last field that contains pointer data + break + } + // FIXME(sbinet) handle padding, fields smaller than a word + elemGC := (*[1 << 30]byte)(unsafe.Pointer(ft.typ.gcdata))[:] + elemPtrs := ft.typ.ptrdata / ptrSize + switch { + case ft.typ.kind&kindGCProg == 0 && ft.typ.ptrdata != 0: + // Element is small with pointer mask; use as literal bits. + mask := elemGC + // Emit 120-bit chunks of full bytes (max is 127 but we avoid using partial bytes). + var n uintptr + for n := elemPtrs; n > 120; n -= 120 { + prog = append(prog, 120) + prog = append(prog, mask[:15]...) + mask = mask[15:] + } + prog = append(prog, byte(n)) + prog = append(prog, mask[:(n+7)/8]...) + case ft.typ.kind&kindGCProg != 0: + // Element has GC program; emit one element. + elemProg := elemGC[4 : 4+*(*uint32)(unsafe.Pointer(&elemGC[0]))-1] + prog = append(prog, elemProg...) + } + // Pad from ptrdata to size. + elemWords := ft.typ.size / ptrSize + if elemPtrs < elemWords { + // Emit literal 0 bit, then repeat as needed. + prog = append(prog, 0x01, 0x00) + if elemPtrs+1 < elemWords { + prog = append(prog, 0x81) + prog = appendVarint(prog, elemWords-elemPtrs-1) + } + } + } + *(*uint32)(unsafe.Pointer(&prog[0])) = uint32(len(prog) - 4) + typ.kind |= kindGCProg + typ.gcdata = &prog[0] + } else { + typ.kind &^= kindGCProg + bv := new(bitVector) + addTypeBits(bv, 0, typ.common()) + if len(bv.data) > 0 { + typ.gcdata = &bv.data[0] } - gc = append(gc, _GC_END) - typ.gc = unsafe.Pointer(&gc[0]) } + typ.ptrdata = typeptrdata(typ.common()) - typ.hashfn = func(p unsafe.Pointer, seed uintptr) uintptr { - ret := seed - for _, ft := range typ.fields { - o := unsafe.Pointer(uintptr(p) + ft.offset) - ret = ft.typ.hashfn(o, ret) + if hashable { + typ.hashfn = func(p unsafe.Pointer, seed uintptr) uintptr { + o := seed + for _, ft := range typ.fields { + pi := unsafe.Pointer(uintptr(p) + ft.offset) + o = ft.typ.hashfn(pi, o) + } + return o } - return ret + } else { + typ.hashfn = nil } - typ.equalfn = func(p, q unsafe.Pointer) bool { - for _, ft := range typ.fields { - pi := unsafe.Pointer(uintptr(p) + ft.offset) - qi := unsafe.Pointer(uintptr(q) + ft.offset) - if !ft.typ.equalfn(pi, qi) { - return false + if comparable { + typ.equalfn = func(p, q unsafe.Pointer) bool { + for _, ft := range typ.fields { + pi := unsafe.Pointer(uintptr(p) + ft.offset) + qi := unsafe.Pointer(uintptr(q) + ft.offset) + if !ft.typ.equalfn(pi, qi) { + return false + } } + return true } - return true + } else { + typ.equalfn = nil } typ.kind &^= kindDirectIface @@ -2308,6 +2220,35 @@ func runtimeStructField(field StructField) structField { } } +// typeptrdata returns the length in bytes of the prefix of t +// containing pointer data. Anything after this offset is scalar data. +// keep in sync with ../cmd/compile/internal/gc/reflect.go +func typeptrdata(t *rtype) uintptr { + if !t.pointers() { + return 0 + } + switch t.Kind() { + case Struct: + st := (*structType)(unsafe.Pointer(t)) + // find the last field that has pointers. + field := 0 + for i := range st.fields { + ft := st.fields[i].typ + if ft.pointers() { + field = i + } + } + f := st.fields[field] + return f.offset + f.typ.ptrdata + + default: + panic("reflect.typeptrdata: unexpected type, " + t.String()) + } +} + +// See cmd/compile/internal/gc/reflect.go for derivation of constant. +const maxPtrmaskBytes = 2048 + // ArrayOf returns the array type with the given count and element type. // For example, if t represents int, ArrayOf(5, t) represents [5]int. // @@ -2350,9 +2291,9 @@ func ArrayOf(count int, elem Type) Type { panic("reflect.ArrayOf: array size would exceed virtual address space") } array.size = typ.size * uintptr(count) - // if count > 0 && typ.ptrdata != 0 { - // array.ptrdata = typ.size*uintptr(count-1) + typ.ptrdata - // } + if count > 0 && typ.ptrdata != 0 { + array.ptrdata = typ.size*uintptr(count-1) + typ.ptrdata + } array.align = typ.align array.fieldAlign = typ.fieldAlign array.uncommonType = nil @@ -2364,41 +2305,111 @@ func ArrayOf(count int, elem Type) Type { case typ.kind&kindNoPointers != 0 || array.size == 0: // No pointers. array.kind |= kindNoPointers - gc := [...]uintptr{array.size, _GC_END} - array.gc = unsafe.Pointer(&gc[0]) + array.gcdata = nil + array.ptrdata = 0 case count == 1: // In memory, 1-element array looks just like the element. array.kind |= typ.kind & kindGCProg - array.gc = typ.gc + array.gcdata = typ.gcdata + array.ptrdata = typ.ptrdata + + case typ.kind&kindGCProg == 0 && array.size <= maxPtrmaskBytes*8*ptrSize: + // Element is small with pointer mask; array is still small. + // Create direct pointer mask by turning each 1 bit in elem + // into count 1 bits in larger mask. + mask := make([]byte, (array.ptrdata/ptrSize+7)/8) + elemMask := (*[1 << 30]byte)(unsafe.Pointer(typ.gcdata))[:] + elemWords := typ.size / ptrSize + for j := uintptr(0); j < typ.ptrdata/ptrSize; j++ { + if (elemMask[j/8]>>(j%8))&1 != 0 { + for i := uintptr(0); i < array.len; i++ { + k := i*elemWords + j + mask[k/8] |= 1 << (k % 8) + } + } + } + array.gcdata = &mask[0] default: - gc := []uintptr{array.size, _GC_ARRAY_START, 0, uintptr(count), typ.size} - gc = appendGCProgram(gc, typ, 0) - gc = append(gc, _GC_ARRAY_NEXT, _GC_END) - array.gc = unsafe.Pointer(&gc[0]) + // Create program that emits one element + // and then repeats to make the array. + prog := []byte{0, 0, 0, 0} // will be length of prog + elemGC := (*[1 << 30]byte)(unsafe.Pointer(typ.gcdata))[:] + elemPtrs := typ.ptrdata / ptrSize + if typ.kind&kindGCProg == 0 { + // Element is small with pointer mask; use as literal bits. + mask := elemGC + // Emit 120-bit chunks of full bytes (max is 127 but we avoid using partial bytes). + var n uintptr + for n = elemPtrs; n > 120; n -= 120 { + prog = append(prog, 120) + prog = append(prog, mask[:15]...) + mask = mask[15:] + } + prog = append(prog, byte(n)) + prog = append(prog, mask[:(n+7)/8]...) + } else { + // Element has GC program; emit one element. + elemProg := elemGC[4 : 4+*(*uint32)(unsafe.Pointer(&elemGC[0]))-1] + prog = append(prog, elemProg...) + } + // Pad from ptrdata to size. + elemWords := typ.size / ptrSize + if elemPtrs < elemWords { + // Emit literal 0 bit, then repeat as needed. + prog = append(prog, 0x01, 0x00) + if elemPtrs+1 < elemWords { + prog = append(prog, 0x81) + prog = appendVarint(prog, elemWords-elemPtrs-1) + } + } + // Repeat count-1 times. + if elemWords < 0x80 { + prog = append(prog, byte(elemWords|0x80)) + } else { + prog = append(prog, 0x80) + prog = appendVarint(prog, elemWords) + } + prog = appendVarint(prog, uintptr(count)-1) + prog = append(prog, 0) + *(*uint32)(unsafe.Pointer(&prog[0])) = uint32(len(prog) - 4) + array.kind |= kindGCProg + array.gcdata = &prog[0] + array.ptrdata = array.size // overestimate but ok; must match program } array.kind &^= kindDirectIface - array.hashfn = func(p unsafe.Pointer, seed uintptr) uintptr { - ret := seed - for i := 0; i < count; i++ { - ret = typ.hashfn(p, ret) - p = unsafe.Pointer(uintptr(p) + typ.size) + esize := typ.size + + if typ.equalfn == nil { + array.equalfn = nil + } else { + eequal := typ.equalfn + array.equalfn = func(p, q unsafe.Pointer) bool { + for i := 0; i < count; i++ { + pi := arrayAt(p, i, esize) + qi := arrayAt(q, i, esize) + if !eequal(pi, qi) { + return false + } + } + return true } - return ret } - array.equalfn = func(p1, p2 unsafe.Pointer) bool { - for i := 0; i < count; i++ { - if !typ.equalfn(p1, p2) { - return false + if typ.hashfn == nil { + array.hashfn = nil + } else { + ehash := typ.hashfn + array.hashfn = func(ptr unsafe.Pointer, seed uintptr) uintptr { + o := seed + for i := 0; i < count; i++ { + o = ehash(arrayAt(ptr, i, esize), o) } - p1 = unsafe.Pointer(uintptr(p1) + typ.size) - p2 = unsafe.Pointer(uintptr(p2) + typ.size) + return o } - return true } return cachePut(ckey, &array.rtype) diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go index 4946269..174320f 100644 --- a/libgo/go/runtime/alg.go +++ b/libgo/go/runtime/alg.go @@ -131,7 +131,7 @@ func c128hash(p unsafe.Pointer, h uintptr) uintptr { return f64hash(unsafe.Pointer(&x[1]), f64hash(unsafe.Pointer(&x[0]), h)) } -func interhash(p unsafe.Pointer, h uintptr, size uintptr) uintptr { +func interhash(p unsafe.Pointer, h uintptr) uintptr { a := (*iface)(p) tab := a.tab if tab == nil { @@ -199,10 +199,10 @@ func c128equal(p, q unsafe.Pointer) bool { func strequal(p, q unsafe.Pointer) bool { return *(*string)(p) == *(*string)(q) } -func interequal(p, q unsafe.Pointer, size uintptr) bool { +func interequal(p, q unsafe.Pointer) bool { return ifaceeq(*(*iface)(p), *(*iface)(q)) } -func nilinterequal(p, q unsafe.Pointer, size uintptr) bool { +func nilinterequal(p, q unsafe.Pointer) bool { return efaceeq(*(*eface)(p), *(*eface)(q)) } func efaceeq(x, y eface) bool { @@ -361,6 +361,34 @@ var _ = nilinterequal var _ = pointerhash var _ = pointerequal +// Testing adapters for hash quality tests (see hash_test.go) +func stringHash(s string, seed uintptr) uintptr { + return strhash(noescape(unsafe.Pointer(&s)), seed) +} + +func bytesHash(b []byte, seed uintptr) uintptr { + s := (*slice)(unsafe.Pointer(&b)) + return memhash(s.array, seed, uintptr(s.len)) +} + +func int32Hash(i uint32, seed uintptr) uintptr { + return memhash32(noescape(unsafe.Pointer(&i)), seed) +} + +func int64Hash(i uint64, seed uintptr) uintptr { + return memhash64(noescape(unsafe.Pointer(&i)), seed) +} + +func efaceHash(i interface{}, seed uintptr) uintptr { + return nilinterhash(noescape(unsafe.Pointer(&i)), seed) +} + +func ifaceHash(i interface { + F() +}, seed uintptr) uintptr { + return interhash(noescape(unsafe.Pointer(&i)), seed) +} + const hashRandomBytes = sys.PtrSize / 4 * 64 // used in asm_{386,amd64}.s to seed the hash function diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go new file mode 100644 index 0000000..2e0e591 --- /dev/null +++ b/libgo/go/runtime/cgocall.go @@ -0,0 +1,307 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Cgo call and callback support. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// Pointer checking for cgo code. + +// We want to detect all cases where a program that does not use +// unsafe makes a cgo call passing a Go pointer to memory that +// contains a Go pointer. Here a Go pointer is defined as a pointer +// to memory allocated by the Go runtime. Programs that use unsafe +// can evade this restriction easily, so we don't try to catch them. +// The cgo program will rewrite all possibly bad pointer arguments to +// call cgoCheckPointer, where we can catch cases of a Go pointer +// pointing to a Go pointer. + +// Complicating matters, taking the address of a slice or array +// element permits the C program to access all elements of the slice +// or array. In that case we will see a pointer to a single element, +// but we need to check the entire data structure. + +// The cgoCheckPointer call takes additional arguments indicating that +// it was called on an address expression. An additional argument of +// true means that it only needs to check a single element. An +// additional argument of a slice or array means that it needs to +// check the entire slice/array, but nothing else. Otherwise, the +// pointer could be anything, and we check the entire heap object, +// which is conservative but safe. + +// When and if we implement a moving garbage collector, +// cgoCheckPointer will pin the pointer for the duration of the cgo +// call. (This is necessary but not sufficient; the cgo program will +// also have to change to pin Go pointers that cannot point to Go +// pointers.) + +// cgoCheckPointer checks if the argument contains a Go pointer that +// points to a Go pointer, and panics if it does. +func cgoCheckPointer(ptr interface{}, args ...interface{}) { + if debug.cgocheck == 0 { + return + } + + ep := (*eface)(unsafe.Pointer(&ptr)) + t := ep._type + + top := true + if len(args) > 0 && (t.kind&kindMask == kindPtr || t.kind&kindMask == kindUnsafePointer) { + p := ep.data + if t.kind&kindDirectIface == 0 { + p = *(*unsafe.Pointer)(p) + } + if !cgoIsGoPointer(p) { + return + } + aep := (*eface)(unsafe.Pointer(&args[0])) + switch aep._type.kind & kindMask { + case kindBool: + if t.kind&kindMask == kindUnsafePointer { + // We don't know the type of the element. + break + } + pt := (*ptrtype)(unsafe.Pointer(t)) + cgoCheckArg(pt.elem, p, true, false, cgoCheckPointerFail) + return + case kindSlice: + // Check the slice rather than the pointer. + ep = aep + t = ep._type + case kindArray: + // Check the array rather than the pointer. + // Pass top as false since we have a pointer + // to the array. + ep = aep + t = ep._type + top = false + default: + throw("can't happen") + } + } + + cgoCheckArg(t, ep.data, t.kind&kindDirectIface == 0, top, cgoCheckPointerFail) +} + +const cgoCheckPointerFail = "cgo argument has Go pointer to Go pointer" +const cgoResultFail = "cgo result has Go pointer" + +// cgoCheckArg is the real work of cgoCheckPointer. The argument p +// is either a pointer to the value (of type t), or the value itself, +// depending on indir. The top parameter is whether we are at the top +// level, where Go pointers are allowed. +func cgoCheckArg(t *_type, p unsafe.Pointer, indir, top bool, msg string) { + if t.kind&kindNoPointers != 0 { + // If the type has no pointers there is nothing to do. + return + } + + switch t.kind & kindMask { + default: + throw("can't happen") + case kindArray: + at := (*arraytype)(unsafe.Pointer(t)) + if !indir { + if at.len != 1 { + throw("can't happen") + } + cgoCheckArg(at.elem, p, at.elem.kind&kindDirectIface == 0, top, msg) + return + } + for i := uintptr(0); i < at.len; i++ { + cgoCheckArg(at.elem, p, true, top, msg) + p = add(p, at.elem.size) + } + case kindChan, kindMap: + // These types contain internal pointers that will + // always be allocated in the Go heap. It's never OK + // to pass them to C. + panic(errorString(msg)) + case kindFunc: + if indir { + p = *(*unsafe.Pointer)(p) + } + if !cgoIsGoPointer(p) { + return + } + panic(errorString(msg)) + case kindInterface: + it := *(**_type)(p) + if it == nil { + return + } + // A type known at compile time is OK since it's + // constant. A type not known at compile time will be + // in the heap and will not be OK. + if inheap(uintptr(unsafe.Pointer(it))) { + panic(errorString(msg)) + } + p = *(*unsafe.Pointer)(add(p, sys.PtrSize)) + if !cgoIsGoPointer(p) { + return + } + if !top { + panic(errorString(msg)) + } + cgoCheckArg(it, p, it.kind&kindDirectIface == 0, false, msg) + case kindSlice: + st := (*slicetype)(unsafe.Pointer(t)) + s := (*slice)(p) + p = s.array + if !cgoIsGoPointer(p) { + return + } + if !top { + panic(errorString(msg)) + } + if st.elem.kind&kindNoPointers != 0 { + return + } + for i := 0; i < s.cap; i++ { + cgoCheckArg(st.elem, p, true, false, msg) + p = add(p, st.elem.size) + } + case kindString: + ss := (*stringStruct)(p) + if !cgoIsGoPointer(ss.str) { + return + } + if !top { + panic(errorString(msg)) + } + case kindStruct: + st := (*structtype)(unsafe.Pointer(t)) + if !indir { + if len(st.fields) != 1 { + throw("can't happen") + } + cgoCheckArg(st.fields[0].typ, p, st.fields[0].typ.kind&kindDirectIface == 0, top, msg) + return + } + for _, f := range st.fields { + cgoCheckArg(f.typ, add(p, f.offset), true, top, msg) + } + case kindPtr, kindUnsafePointer: + if indir { + p = *(*unsafe.Pointer)(p) + } + + if !cgoIsGoPointer(p) { + return + } + if !top { + panic(errorString(msg)) + } + + cgoCheckUnknownPointer(p, msg) + } +} + +// cgoCheckUnknownPointer is called for an arbitrary pointer into Go +// memory. It checks whether that Go memory contains any other +// pointer into Go memory. If it does, we panic. +// The return values are unused but useful to see in panic tracebacks. +func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { + if cgoInRange(p, mheap_.arena_start, mheap_.arena_used) { + if !inheap(uintptr(p)) { + // On 32-bit systems it is possible for C's allocated memory + // to have addresses between arena_start and arena_used. + // Either this pointer is a stack or an unused span or it's + // a C allocation. Escape analysis should prevent the first, + // garbage collection should prevent the second, + // and the third is completely OK. + return + } + + b, hbits, span, _ := heapBitsForObject(uintptr(p), 0, 0, false) + base = b + if base == 0 { + return + } + n := span.elemsize + for i = uintptr(0); i < n; i += sys.PtrSize { + if i != 1*sys.PtrSize && !hbits.morePointers() { + // No more possible pointers. + break + } + if hbits.isPointer() { + if cgoIsGoPointer(*(*unsafe.Pointer)(unsafe.Pointer(base + i))) { + panic(errorString(msg)) + } + } + hbits = hbits.next() + } + + return + } + + roots := gcRoots + for roots != nil { + for j := 0; j < roots.count; j++ { + pr := roots.roots[j] + addr := uintptr(pr.decl) + if cgoInRange(p, addr, addr+pr.size) { + cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata) + return + } + } + roots = roots.next + } + + return +} + +// cgoIsGoPointer returns whether the pointer is a Go pointer--a +// pointer to Go memory. We only care about Go memory that might +// contain pointers. +//go:nosplit +//go:nowritebarrierrec +func cgoIsGoPointer(p unsafe.Pointer) bool { + if p == nil { + return false + } + + if inHeapOrStack(uintptr(p)) { + return true + } + + roots := gcRoots + for roots != nil { + for i := 0; i < roots.count; i++ { + pr := roots.roots[i] + addr := uintptr(pr.decl) + if cgoInRange(p, addr, addr+pr.size) { + return true + } + } + roots = roots.next + } + + return false +} + +// cgoInRange returns whether p is between start and end. +//go:nosplit +//go:nowritebarrierrec +func cgoInRange(p unsafe.Pointer, start, end uintptr) bool { + return start <= uintptr(p) && uintptr(p) < end +} + +// cgoCheckResult is called to check the result parameter of an +// exported Go function. It panics if the result is or contains a Go +// pointer. +func cgoCheckResult(val interface{}) { + if debug.cgocheck == 0 { + return + } + + ep := (*eface)(unsafe.Pointer(&val)) + t := ep._type + cgoCheckArg(t, ep.data, t.kind&kindDirectIface == 0, false, cgoResultFail) +} diff --git a/libgo/go/runtime/cgocheck.go b/libgo/go/runtime/cgocheck.go index fec3646..09d444d 100644 --- a/libgo/go/runtime/cgocheck.go +++ b/libgo/go/runtime/cgocheck.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore - // Code to check that pointer writes follow the cgo rules. // These functions are invoked via the write barrier when debug.cgocheck > 1. @@ -110,17 +108,18 @@ func cgoCheckTypedBlock(typ *_type, src unsafe.Pointer, off, size uintptr) { } // The type has a GC program. Try to find GC bits somewhere else. - for _, datap := range activeModules() { - if cgoInRange(src, datap.data, datap.edata) { - doff := uintptr(src) - datap.data - cgoCheckBits(add(src, -doff), datap.gcdatamask.bytedata, off+doff, size) - return - } - if cgoInRange(src, datap.bss, datap.ebss) { - boff := uintptr(src) - datap.bss - cgoCheckBits(add(src, -boff), datap.gcbssmask.bytedata, off+boff, size) - return + roots := gcRoots + for roots != nil { + for i := 0; i < roots.count; i++ { + pr := roots.roots[i] + addr := uintptr(pr.decl) + if cgoInRange(src, addr, addr+pr.size) { + doff := uintptr(src) - addr + cgoCheckBits(add(src, -doff), pr.gcdata, off+doff, size) + return + } } + roots = roots.next } aoff := uintptr(src) - mheap_.arena_start diff --git a/libgo/go/runtime/crash_unix_test.go b/libgo/go/runtime/crash_unix_test.go index 182c84b..67ef334 100644 --- a/libgo/go/runtime/crash_unix_test.go +++ b/libgo/go/runtime/crash_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package runtime_test diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go index a8827f2..6a9efcd 100644 --- a/libgo/go/runtime/debug.go +++ b/libgo/go/runtime/debug.go @@ -39,7 +39,9 @@ func GOMAXPROCS(n int) int { // The set of available CPUs is checked by querying the operating system // at process startup. Changes to operating system CPU allocation after // process startup are not reflected. -func NumCPU() int +func NumCPU() int { + return int(ncpu) +} // NumCgoCall returns the number of cgo calls made by the current process. func NumCgoCall() int64 { diff --git a/libgo/go/runtime/env_posix.go b/libgo/go/runtime/env_posix.go index e076edb..9bf7ddc 100644 --- a/libgo/go/runtime/env_posix.go +++ b/libgo/go/runtime/env_posix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package runtime diff --git a/libgo/go/runtime/export_test.go b/libgo/go/runtime/export_test.go index cc4b188..bf435f4 100644 --- a/libgo/go/runtime/export_test.go +++ b/libgo/go/runtime/export_test.go @@ -26,10 +26,11 @@ import ( var Entersyscall = entersyscall var Exitsyscall = exitsyscall var LockedOSThread = lockedOSThread +var Xadduintptr = atomic.Xadduintptr -// var Xadduintptr = xadduintptr +var FuncPC = funcPC -// var FuncPC = funcPC +var Fastlog2 = fastlog2 var Atoi = atoi var Atoi32 = atoi32 @@ -148,12 +149,12 @@ func RunSchedLocalQueueEmptyTest(iters int) { } } -//var StringHash = stringHash -//var BytesHash = bytesHash -//var Int32Hash = int32Hash -//var Int64Hash = int64Hash -//var EfaceHash = efaceHash -//var IfaceHash = ifaceHash +var StringHash = stringHash +var BytesHash = bytesHash +var Int32Hash = int32Hash +var Int64Hash = int64Hash +var EfaceHash = efaceHash +var IfaceHash = ifaceHash func MemclrBytes(b []byte) { s := (*slice)(unsafe.Pointer(&b)) @@ -182,7 +183,6 @@ func SetEnvs(e []string) { envs = e } // For benchmarking. -/* func BenchSetType(n int, x interface{}) { e := *efaceOf(&x) t := e._type @@ -213,7 +213,6 @@ func BenchSetType(n int, x interface{}) { const PtrSize = sys.PtrSize var ForceGCPeriod = &forcegcperiod -*/ // SetTracebackEnv is like runtime/debug.SetTraceback, but it raises // the "environment" traceback level, so later calls to @@ -223,7 +222,6 @@ func SetTracebackEnv(level string) { traceback_env = traceback_cache } -/* var ReadUnaligned32 = readUnaligned32 var ReadUnaligned64 = readUnaligned64 @@ -242,7 +240,6 @@ func CountPagesInUse() (pagesInUse, counted uintptr) { return } -*/ // BlockOnSystemStack switches to the system stack, prints "x\n" to // stderr, and blocks in a stack containing diff --git a/libgo/go/runtime/extern.go b/libgo/go/runtime/extern.go index 5aa76ac..5c50760 100644 --- a/libgo/go/runtime/extern.go +++ b/libgo/go/runtime/extern.go @@ -183,93 +183,6 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) // It returns the number of entries written to pc. func Callers(skip int, pc []uintptr) int -// SetFinalizer sets the finalizer associated with obj to the provided -// finalizer function. When the garbage collector finds an unreachable block -// with an associated finalizer, it clears the association and runs -// finalizer(obj) in a separate goroutine. This makes obj reachable again, -// but now without an associated finalizer. Assuming that SetFinalizer -// is not called again, the next time the garbage collector sees -// that obj is unreachable, it will free obj. -// -// SetFinalizer(obj, nil) clears any finalizer associated with obj. -// -// The argument obj must be a pointer to an object allocated by -// calling new or by taking the address of a composite literal. -// The argument finalizer must be a function that takes a single argument -// to which obj's type can be assigned, and can have arbitrary ignored return -// values. If either of these is not true, SetFinalizer aborts the -// program. -// -// Finalizers are run in dependency order: if A points at B, both have -// finalizers, and they are otherwise unreachable, only the finalizer -// for A runs; once A is freed, the finalizer for B can run. -// If a cyclic structure includes a block with a finalizer, that -// cycle is not guaranteed to be garbage collected and the finalizer -// is not guaranteed to run, because there is no ordering that -// respects the dependencies. -// -// The finalizer for obj is scheduled to run at some arbitrary time after -// obj becomes unreachable. -// There is no guarantee that finalizers will run before a program exits, -// so typically they are useful only for releasing non-memory resources -// associated with an object during a long-running program. -// For example, an os.File object could use a finalizer to close the -// associated operating system file descriptor when a program discards -// an os.File without calling Close, but it would be a mistake -// to depend on a finalizer to flush an in-memory I/O buffer such as a -// bufio.Writer, because the buffer would not be flushed at program exit. -// -// It is not guaranteed that a finalizer will run if the size of *obj is -// zero bytes. -// -// It is not guaranteed that a finalizer will run for objects allocated -// in initializers for package-level variables. Such objects may be -// linker-allocated, not heap-allocated. -// -// A finalizer may run as soon as an object becomes unreachable. -// In order to use finalizers correctly, the program must ensure that -// the object is reachable until it is no longer required. -// Objects stored in global variables, or that can be found by tracing -// pointers from a global variable, are reachable. For other objects, -// pass the object to a call of the KeepAlive function to mark the -// last point in the function where the object must be reachable. -// -// For example, if p points to a struct that contains a file descriptor d, -// and p has a finalizer that closes that file descriptor, and if the last -// use of p in a function is a call to syscall.Write(p.d, buf, size), then -// p may be unreachable as soon as the program enters syscall.Write. The -// finalizer may run at that moment, closing p.d, causing syscall.Write -// to fail because it is writing to a closed file descriptor (or, worse, -// to an entirely different file descriptor opened by a different goroutine). -// To avoid this problem, call runtime.KeepAlive(p) after the call to -// syscall.Write. -// -// A single goroutine runs all finalizers for a program, sequentially. -// If a finalizer must run for a long time, it should do so by starting -// a new goroutine. -func SetFinalizer(obj interface{}, finalizer interface{}) - -// KeepAlive marks its argument as currently reachable. -// This ensures that the object is not freed, and its finalizer is not run, -// before the point in the program where KeepAlive is called. -// -// A very simplified example showing where KeepAlive is required: -// type File struct { d int } -// d, err := syscall.Open("/file/path", syscall.O_RDONLY, 0) -// // ... do something if err != nil ... -// p := &File{d} -// runtime.SetFinalizer(p, func(p *File) { syscall.Close(p.d) }) -// var buf [10]byte -// n, err := syscall.Read(p.d, buf[:]) -// // Ensure p is not finalized until Read returns. -// runtime.KeepAlive(p) -// // No more uses of p after this point. -// -// Without the KeepAlive call, the finalizer could run at the start of -// syscall.Read, closing the file descriptor before syscall.Read makes -// the actual system call. -func KeepAlive(interface{}) - // GOROOT returns the root of the Go tree. // It uses the GOROOT environment variable, if set, // or else the root used during the Go build. diff --git a/libgo/go/runtime/fastlog2.go b/libgo/go/runtime/fastlog2.go index 6fbe572f4..5f3fb53 100644 --- a/libgo/go/runtime/fastlog2.go +++ b/libgo/go/runtime/fastlog2.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore - package runtime import "unsafe" diff --git a/libgo/go/runtime/fastlog2_test.go b/libgo/go/runtime/fastlog2_test.go index 6e9fcd4..ae0f40b 100644 --- a/libgo/go/runtime/fastlog2_test.go +++ b/libgo/go/runtime/fastlog2_test.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore - package runtime_test import ( diff --git a/libgo/go/runtime/fastlog2table.go b/libgo/go/runtime/fastlog2table.go index 47ae5e8..c36d583 100644 --- a/libgo/go/runtime/fastlog2table.go +++ b/libgo/go/runtime/fastlog2table.go @@ -2,8 +2,6 @@ // Run go generate from src/runtime to update. // See mkfastlog2table.go for comments. -// +build ignore - package runtime const fastlogNumBits = 5 diff --git a/libgo/go/runtime/gc_test.go b/libgo/go/runtime/gc_test.go index 2a6acf0..ec043ed 100644 --- a/libgo/go/runtime/gc_test.go +++ b/libgo/go/runtime/gc_test.go @@ -434,8 +434,6 @@ func testIfaceEqual(x interface{}) { } } -/* - func TestPageAccounting(t *testing.T) { // Grow the heap in small increments. This used to drop the // pages-in-use count below zero because of a rounding @@ -452,5 +450,3 @@ func TestPageAccounting(t *testing.T) { t.Fatalf("mheap_.pagesInUse is %d, but direct count is %d", pagesInUse, counted) } } - -*/ diff --git a/libgo/go/runtime/hash_test.go b/libgo/go/runtime/hash_test.go new file mode 100644 index 0000000..167c49e --- /dev/null +++ b/libgo/go/runtime/hash_test.go @@ -0,0 +1,710 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "fmt" + "math" + "math/rand" + . "runtime" + "strings" + "testing" + "unsafe" +) + +// Smhasher is a torture test for hash functions. +// https://code.google.com/p/smhasher/ +// This code is a port of some of the Smhasher tests to Go. +// +// The current AES hash function passes Smhasher. Our fallback +// hash functions don't, so we only enable the difficult tests when +// we know the AES implementation is available. + +// Sanity checks. +// hash should not depend on values outside key. +// hash should not depend on alignment. +func TestSmhasherSanity(t *testing.T) { + r := rand.New(rand.NewSource(1234)) + const REP = 10 + const KEYMAX = 128 + const PAD = 16 + const OFFMAX = 16 + for k := 0; k < REP; k++ { + for n := 0; n < KEYMAX; n++ { + for i := 0; i < OFFMAX; i++ { + var b [KEYMAX + OFFMAX + 2*PAD]byte + var c [KEYMAX + OFFMAX + 2*PAD]byte + randBytes(r, b[:]) + randBytes(r, c[:]) + copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) + if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { + t.Errorf("hash depends on bytes outside key") + } + } + } + } +} + +type HashSet struct { + m map[uintptr]struct{} // set of hashes added + n int // number of hashes added +} + +func newHashSet() *HashSet { + return &HashSet{make(map[uintptr]struct{}), 0} +} +func (s *HashSet) add(h uintptr) { + s.m[h] = struct{}{} + s.n++ +} +func (s *HashSet) addS(x string) { + s.add(StringHash(x, 0)) +} +func (s *HashSet) addB(x []byte) { + s.add(BytesHash(x, 0)) +} +func (s *HashSet) addS_seed(x string, seed uintptr) { + s.add(StringHash(x, seed)) +} +func (s *HashSet) check(t *testing.T) { + const SLOP = 10.0 + collisions := s.n - len(s.m) + //fmt.Printf("%d/%d\n", len(s.m), s.n) + pairs := int64(s.n) * int64(s.n-1) / 2 + expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) + stddev := math.Sqrt(expected) + if float64(collisions) > expected+SLOP*(3*stddev+1) { + t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) + } +} + +// a string plus adding zeros must make distinct hashes +func TestSmhasherAppendedZeros(t *testing.T) { + s := "hello" + strings.Repeat("\x00", 256) + h := newHashSet() + for i := 0; i <= len(s); i++ { + h.addS(s[:i]) + } + h.check(t) +} + +// All 0-3 byte strings have distinct hashes. +func TestSmhasherSmallKeys(t *testing.T) { + h := newHashSet() + var b [3]byte + for i := 0; i < 256; i++ { + b[0] = byte(i) + h.addB(b[:1]) + for j := 0; j < 256; j++ { + b[1] = byte(j) + h.addB(b[:2]) + if !testing.Short() { + for k := 0; k < 256; k++ { + b[2] = byte(k) + h.addB(b[:3]) + } + } + } + } + h.check(t) +} + +// Different length strings of all zeros have distinct hashes. +func TestSmhasherZeros(t *testing.T) { + N := 256 * 1024 + if testing.Short() { + N = 1024 + } + h := newHashSet() + b := make([]byte, N) + for i := 0; i <= N; i++ { + h.addB(b[:i]) + } + h.check(t) +} + +// Strings with up to two nonzero bytes all have distinct hashes. +func TestSmhasherTwoNonzero(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + h := newHashSet() + for n := 2; n <= 16; n++ { + twoNonZero(h, n) + } + h.check(t) +} +func twoNonZero(h *HashSet, n int) { + b := make([]byte, n) + + // all zero + h.addB(b[:]) + + // one non-zero byte + for i := 0; i < n; i++ { + for x := 1; x < 256; x++ { + b[i] = byte(x) + h.addB(b[:]) + b[i] = 0 + } + } + + // two non-zero bytes + for i := 0; i < n; i++ { + for x := 1; x < 256; x++ { + b[i] = byte(x) + for j := i + 1; j < n; j++ { + for y := 1; y < 256; y++ { + b[j] = byte(y) + h.addB(b[:]) + b[j] = 0 + } + } + b[i] = 0 + } + } +} + +// Test strings with repeats, like "abcdabcdabcdabcd..." +func TestSmhasherCyclic(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + r := rand.New(rand.NewSource(1234)) + const REPEAT = 8 + const N = 1000000 + for n := 4; n <= 12; n++ { + h := newHashSet() + b := make([]byte, REPEAT*n) + for i := 0; i < N; i++ { + b[0] = byte(i * 79 % 97) + b[1] = byte(i * 43 % 137) + b[2] = byte(i * 151 % 197) + b[3] = byte(i * 199 % 251) + randBytes(r, b[4:n]) + for j := n; j < n*REPEAT; j++ { + b[j] = b[j-n] + } + h.addB(b) + } + h.check(t) + } +} + +// Test strings with only a few bits set +func TestSmhasherSparse(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + sparse(t, 32, 6) + sparse(t, 40, 6) + sparse(t, 48, 5) + sparse(t, 56, 5) + sparse(t, 64, 5) + sparse(t, 96, 4) + sparse(t, 256, 3) + sparse(t, 2048, 2) +} +func sparse(t *testing.T, n int, k int) { + b := make([]byte, n/8) + h := newHashSet() + setbits(h, b, 0, k) + h.check(t) +} + +// set up to k bits at index i and greater +func setbits(h *HashSet, b []byte, i int, k int) { + h.addB(b) + if k == 0 { + return + } + for j := i; j < len(b)*8; j++ { + b[j/8] |= byte(1 << uint(j&7)) + setbits(h, b, j+1, k-1) + b[j/8] &= byte(^(1 << uint(j&7))) + } +} + +// Test all possible combinations of n blocks from the set s. +// "permutation" is a bad name here, but it is what Smhasher uses. +func TestSmhasherPermutation(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) + permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) + permutation(t, []uint32{0, 1}, 20) + permutation(t, []uint32{0, 1 << 31}, 20) + permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) +} +func permutation(t *testing.T, s []uint32, n int) { + b := make([]byte, n*4) + h := newHashSet() + genPerm(h, b, s, 0) + h.check(t) +} +func genPerm(h *HashSet, b []byte, s []uint32, n int) { + h.addB(b[:n]) + if n == len(b) { + return + } + for _, v := range s { + b[n] = byte(v) + b[n+1] = byte(v >> 8) + b[n+2] = byte(v >> 16) + b[n+3] = byte(v >> 24) + genPerm(h, b, s, n+4) + } +} + +type Key interface { + clear() // set bits all to 0 + random(r *rand.Rand) // set key to something random + bits() int // how many bits key has + flipBit(i int) // flip bit i of the key + hash() uintptr // hash the key + name() string // for error reporting +} + +type BytesKey struct { + b []byte +} + +func (k *BytesKey) clear() { + for i := range k.b { + k.b[i] = 0 + } +} +func (k *BytesKey) random(r *rand.Rand) { + randBytes(r, k.b) +} +func (k *BytesKey) bits() int { + return len(k.b) * 8 +} +func (k *BytesKey) flipBit(i int) { + k.b[i>>3] ^= byte(1 << uint(i&7)) +} +func (k *BytesKey) hash() uintptr { + return BytesHash(k.b, 0) +} +func (k *BytesKey) name() string { + return fmt.Sprintf("bytes%d", len(k.b)) +} + +type Int32Key struct { + i uint32 +} + +func (k *Int32Key) clear() { + k.i = 0 +} +func (k *Int32Key) random(r *rand.Rand) { + k.i = r.Uint32() +} +func (k *Int32Key) bits() int { + return 32 +} +func (k *Int32Key) flipBit(i int) { + k.i ^= 1 << uint(i) +} +func (k *Int32Key) hash() uintptr { + return Int32Hash(k.i, 0) +} +func (k *Int32Key) name() string { + return "int32" +} + +type Int64Key struct { + i uint64 +} + +func (k *Int64Key) clear() { + k.i = 0 +} +func (k *Int64Key) random(r *rand.Rand) { + k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 +} +func (k *Int64Key) bits() int { + return 64 +} +func (k *Int64Key) flipBit(i int) { + k.i ^= 1 << uint(i) +} +func (k *Int64Key) hash() uintptr { + return Int64Hash(k.i, 0) +} +func (k *Int64Key) name() string { + return "int64" +} + +type EfaceKey struct { + i interface{} +} + +func (k *EfaceKey) clear() { + k.i = nil +} +func (k *EfaceKey) random(r *rand.Rand) { + k.i = uint64(r.Int63()) +} +func (k *EfaceKey) bits() int { + // use 64 bits. This tests inlined interfaces + // on 64-bit targets and indirect interfaces on + // 32-bit targets. + return 64 +} +func (k *EfaceKey) flipBit(i int) { + k.i = k.i.(uint64) ^ uint64(1)<<uint(i) +} +func (k *EfaceKey) hash() uintptr { + return EfaceHash(k.i, 0) +} +func (k *EfaceKey) name() string { + return "Eface" +} + +type IfaceKey struct { + i interface { + F() + } +} +type fInter uint64 + +func (x fInter) F() { +} + +func (k *IfaceKey) clear() { + k.i = nil +} +func (k *IfaceKey) random(r *rand.Rand) { + k.i = fInter(r.Int63()) +} +func (k *IfaceKey) bits() int { + // use 64 bits. This tests inlined interfaces + // on 64-bit targets and indirect interfaces on + // 32-bit targets. + return 64 +} +func (k *IfaceKey) flipBit(i int) { + k.i = k.i.(fInter) ^ fInter(1)<<uint(i) +} +func (k *IfaceKey) hash() uintptr { + return IfaceHash(k.i, 0) +} +func (k *IfaceKey) name() string { + return "Iface" +} + +// Flipping a single bit of a key should flip each output bit with 50% probability. +func TestSmhasherAvalanche(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + avalancheTest1(t, &BytesKey{make([]byte, 2)}) + avalancheTest1(t, &BytesKey{make([]byte, 4)}) + avalancheTest1(t, &BytesKey{make([]byte, 8)}) + avalancheTest1(t, &BytesKey{make([]byte, 16)}) + avalancheTest1(t, &BytesKey{make([]byte, 32)}) + avalancheTest1(t, &BytesKey{make([]byte, 200)}) + avalancheTest1(t, &Int32Key{}) + avalancheTest1(t, &Int64Key{}) + avalancheTest1(t, &EfaceKey{}) + avalancheTest1(t, &IfaceKey{}) +} +func avalancheTest1(t *testing.T, k Key) { + const REP = 100000 + r := rand.New(rand.NewSource(1234)) + n := k.bits() + + // grid[i][j] is a count of whether flipping + // input bit i affects output bit j. + grid := make([][hashSize]int, n) + + for z := 0; z < REP; z++ { + // pick a random key, hash it + k.random(r) + h := k.hash() + + // flip each bit, hash & compare the results + for i := 0; i < n; i++ { + k.flipBit(i) + d := h ^ k.hash() + k.flipBit(i) + + // record the effects of that bit flip + g := &grid[i] + for j := 0; j < hashSize; j++ { + g[j] += int(d & 1) + d >>= 1 + } + } + } + + // Each entry in the grid should be about REP/2. + // More precisely, we did N = k.bits() * hashSize experiments where + // each is the sum of REP coin flips. We want to find bounds on the + // sum of coin flips such that a truly random experiment would have + // all sums inside those bounds with 99% probability. + N := n * hashSize + var c float64 + // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 + for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { + } + c *= 4.0 // allowed slack - we don't need to be perfectly random + mean := .5 * REP + stddev := .5 * math.Sqrt(REP) + low := int(mean - c*stddev) + high := int(mean + c*stddev) + for i := 0; i < n; i++ { + for j := 0; j < hashSize; j++ { + x := grid[i][j] + if x < low || x > high { + t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) + } + } + } +} + +// All bit rotations of a set of distinct keys +func TestSmhasherWindowed(t *testing.T) { + windowed(t, &Int32Key{}) + windowed(t, &Int64Key{}) + windowed(t, &BytesKey{make([]byte, 128)}) +} +func windowed(t *testing.T, k Key) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + const BITS = 16 + + for r := 0; r < k.bits(); r++ { + h := newHashSet() + for i := 0; i < 1<<BITS; i++ { + k.clear() + for j := 0; j < BITS; j++ { + if i>>uint(j)&1 != 0 { + k.flipBit((j + r) % k.bits()) + } + } + h.add(k.hash()) + } + h.check(t) + } +} + +// All keys of the form prefix + [A-Za-z0-9]*N + suffix. +func TestSmhasherText(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + text(t, "Foo", "Bar") + text(t, "FooBar", "") + text(t, "", "FooBar") +} +func text(t *testing.T, prefix, suffix string) { + const N = 4 + const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" + const L = len(S) + b := make([]byte, len(prefix)+N+len(suffix)) + copy(b, prefix) + copy(b[len(prefix)+N:], suffix) + h := newHashSet() + c := b[len(prefix):] + for i := 0; i < L; i++ { + c[0] = S[i] + for j := 0; j < L; j++ { + c[1] = S[j] + for k := 0; k < L; k++ { + c[2] = S[k] + for x := 0; x < L; x++ { + c[3] = S[x] + h.addB(b) + } + } + } + } + h.check(t) +} + +// Make sure different seed values generate different hashes. +func TestSmhasherSeed(t *testing.T) { + h := newHashSet() + const N = 100000 + s := "hello" + for i := 0; i < N; i++ { + h.addS_seed(s, uintptr(i)) + } + h.check(t) +} + +// size of the hash output (32 or 64 bits) +const hashSize = 32 + int(^uintptr(0)>>63<<5) + +func randBytes(r *rand.Rand, b []byte) { + for i := range b { + b[i] = byte(r.Uint32()) + } +} + +func benchmarkHash(b *testing.B, n int) { + s := strings.Repeat("A", n) + + for i := 0; i < b.N; i++ { + StringHash(s, 0) + } + b.SetBytes(int64(n)) +} + +func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } +func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } +func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } +func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } +func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) } + +func TestArrayHash(t *testing.T) { + if Compiler == "gccgo" { + t.Skip("does not work on gccgo without better escape analysis") + } + + // Make sure that "" in arrays hash correctly. The hash + // should at least scramble the input seed so that, e.g., + // {"","foo"} and {"foo",""} have different hashes. + + // If the hash is bad, then all (8 choose 4) = 70 keys + // have the same hash. If so, we allocate 70/8 = 8 + // overflow buckets. If the hash is good we don't + // normally allocate any overflow buckets, and the + // probability of even one or two overflows goes down rapidly. + // (There is always 1 allocation of the bucket array. The map + // header is allocated on the stack.) + f := func() { + // Make the key type at most 128 bytes. Otherwise, + // we get an allocation per key. + type key [8]string + m := make(map[key]bool, 70) + + // fill m with keys that have 4 "foo"s and 4 ""s. + for i := 0; i < 256; i++ { + var k key + cnt := 0 + for j := uint(0); j < 8; j++ { + if i>>j&1 != 0 { + k[j] = "foo" + cnt++ + } + } + if cnt == 4 { + m[k] = true + } + } + if len(m) != 70 { + t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) + } + } + if n := testing.AllocsPerRun(10, f); n > 6 { + t.Errorf("too many allocs %f - hash not balanced", n) + } +} +func TestStructHash(t *testing.T) { + // See the comment in TestArrayHash. + f := func() { + type key struct { + a, b, c, d, e, f, g, h string + } + m := make(map[key]bool, 70) + + // fill m with keys that have 4 "foo"s and 4 ""s. + for i := 0; i < 256; i++ { + var k key + cnt := 0 + if i&1 != 0 { + k.a = "foo" + cnt++ + } + if i&2 != 0 { + k.b = "foo" + cnt++ + } + if i&4 != 0 { + k.c = "foo" + cnt++ + } + if i&8 != 0 { + k.d = "foo" + cnt++ + } + if i&16 != 0 { + k.e = "foo" + cnt++ + } + if i&32 != 0 { + k.f = "foo" + cnt++ + } + if i&64 != 0 { + k.g = "foo" + cnt++ + } + if i&128 != 0 { + k.h = "foo" + cnt++ + } + if cnt == 4 { + m[k] = true + } + } + if len(m) != 70 { + t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) + } + } + if n := testing.AllocsPerRun(10, f); n > 6 { + t.Errorf("too many allocs %f - hash not balanced", n) + } +} + +var sink uint64 + +func BenchmarkAlignedLoad(b *testing.B) { + var buf [16]byte + p := unsafe.Pointer(&buf[0]) + var s uint64 + for i := 0; i < b.N; i++ { + s += ReadUnaligned64(p) + } + sink = s +} + +func BenchmarkUnalignedLoad(b *testing.B) { + var buf [16]byte + p := unsafe.Pointer(&buf[1]) + var s uint64 + for i := 0; i < b.N; i++ { + s += ReadUnaligned64(p) + } + sink = s +} + +func TestCollisions(t *testing.T) { + if testing.Short() { + t.Skip("Skipping in short mode") + } + for i := 0; i < 16; i++ { + for j := 0; j < 16; j++ { + if j == i { + continue + } + var a [16]byte + m := make(map[uint16]struct{}, 1<<16) + for n := 0; n < 1<<16; n++ { + a[i] = byte(n) + a[j] = byte(n >> 8) + m[uint16(BytesHash(a[:], 0))] = struct{}{} + } + if len(m) <= 1<<15 { + t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m)) + } + } + } +} diff --git a/libgo/go/runtime/heapdump.go b/libgo/go/runtime/heapdump.go new file mode 100644 index 0000000..0db53f5 --- /dev/null +++ b/libgo/go/runtime/heapdump.go @@ -0,0 +1,594 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Implementation of runtime/debug.WriteHeapDump. Writes all +// objects in the heap plus additional info (roots, threads, +// finalizers, etc.) to a file. + +// The format of the dumped file is described at +// https://golang.org/s/go15heapdump. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +//go:linkname runtime_debug_WriteHeapDump runtime_debug.WriteHeapDump +func runtime_debug_WriteHeapDump(fd uintptr) { + stopTheWorld("write heap dump") + + systemstack(func() { + writeheapdump_m(fd) + }) + + startTheWorld() +} + +const ( + fieldKindEol = 0 + fieldKindPtr = 1 + fieldKindIface = 2 + fieldKindEface = 3 + tagEOF = 0 + tagObject = 1 + tagOtherRoot = 2 + tagType = 3 + tagGoroutine = 4 + tagStackFrame = 5 + tagParams = 6 + tagFinalizer = 7 + tagItab = 8 + tagOSThread = 9 + tagMemStats = 10 + tagQueuedFinalizer = 11 + tagData = 12 + tagBSS = 13 + tagDefer = 14 + tagPanic = 15 + tagMemProf = 16 + tagAllocSample = 17 +) + +var dumpfd uintptr // fd to write the dump to. +var tmpbuf []byte + +// buffer of pending write data +const ( + bufSize = 4096 +) + +var buf [bufSize]byte +var nbuf uintptr + +func dwrite(data unsafe.Pointer, len uintptr) { + if len == 0 { + return + } + if nbuf+len <= bufSize { + copy(buf[nbuf:], (*[bufSize]byte)(data)[:len]) + nbuf += len + return + } + + write(dumpfd, unsafe.Pointer(&buf), int32(nbuf)) + if len >= bufSize { + write(dumpfd, data, int32(len)) + nbuf = 0 + } else { + copy(buf[:], (*[bufSize]byte)(data)[:len]) + nbuf = len + } +} + +func dwritebyte(b byte) { + dwrite(unsafe.Pointer(&b), 1) +} + +func flush() { + write(dumpfd, unsafe.Pointer(&buf), int32(nbuf)) + nbuf = 0 +} + +// Cache of types that have been serialized already. +// We use a type's hash field to pick a bucket. +// Inside a bucket, we keep a list of types that +// have been serialized so far, most recently used first. +// Note: when a bucket overflows we may end up +// serializing a type more than once. That's ok. +const ( + typeCacheBuckets = 256 + typeCacheAssoc = 4 +) + +type typeCacheBucket struct { + t [typeCacheAssoc]*_type +} + +var typecache [typeCacheBuckets]typeCacheBucket + +// dump a uint64 in a varint format parseable by encoding/binary +func dumpint(v uint64) { + var buf [10]byte + var n int + for v >= 0x80 { + buf[n] = byte(v | 0x80) + n++ + v >>= 7 + } + buf[n] = byte(v) + n++ + dwrite(unsafe.Pointer(&buf), uintptr(n)) +} + +func dumpbool(b bool) { + if b { + dumpint(1) + } else { + dumpint(0) + } +} + +// dump varint uint64 length followed by memory contents +func dumpmemrange(data unsafe.Pointer, len uintptr) { + dumpint(uint64(len)) + dwrite(data, len) +} + +func dumpslice(b []byte) { + dumpint(uint64(len(b))) + if len(b) > 0 { + dwrite(unsafe.Pointer(&b[0]), uintptr(len(b))) + } +} + +func dumpstr(s string) { + sp := stringStructOf(&s) + dumpmemrange(sp.str, uintptr(sp.len)) +} + +// dump information for a type +func dumptype(t *_type) { + if t == nil { + return + } + + // If we've definitely serialized the type before, + // no need to do it again. + b := &typecache[t.hash&(typeCacheBuckets-1)] + if t == b.t[0] { + return + } + for i := 1; i < typeCacheAssoc; i++ { + if t == b.t[i] { + // Move-to-front + for j := i; j > 0; j-- { + b.t[j] = b.t[j-1] + } + b.t[0] = t + return + } + } + + // Might not have been dumped yet. Dump it and + // remember we did so. + for j := typeCacheAssoc - 1; j > 0; j-- { + b.t[j] = b.t[j-1] + } + b.t[0] = t + + // dump the type + dumpint(tagType) + dumpint(uint64(uintptr(unsafe.Pointer(t)))) + dumpint(uint64(t.size)) + if x := t.uncommontype; x == nil || t.pkgPath == nil || *t.pkgPath == "" { + dumpstr(*t.string) + } else { + pkgpathstr := *t.pkgPath + pkgpath := stringStructOf(&pkgpathstr) + namestr := *t.name + name := stringStructOf(&namestr) + dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len))) + dwrite(pkgpath.str, uintptr(pkgpath.len)) + dwritebyte('.') + dwrite(name.str, uintptr(name.len)) + } + dumpbool(t.kind&kindDirectIface == 0 || t.kind&kindNoPointers == 0) +} + +// dump an object +func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) { + dumpbvtypes(&bv, obj) + dumpint(tagObject) + dumpint(uint64(uintptr(obj))) + dumpmemrange(obj, size) + dumpfields(bv) +} + +func dumpotherroot(description string, to unsafe.Pointer) { + dumpint(tagOtherRoot) + dumpstr(description) + dumpint(uint64(uintptr(to))) +} + +func dumpfinalizer(obj unsafe.Pointer, fn *funcval, ft *functype, ot *ptrtype) { + dumpint(tagFinalizer) + dumpint(uint64(uintptr(obj))) + dumpint(uint64(uintptr(unsafe.Pointer(fn)))) + dumpint(uint64(uintptr(unsafe.Pointer(fn.fn)))) + dumpint(uint64(uintptr(unsafe.Pointer(ft)))) + dumpint(uint64(uintptr(unsafe.Pointer(ot)))) +} + +type childInfo struct { + // Information passed up from the callee frame about + // the layout of the outargs region. + argoff uintptr // where the arguments start in the frame + arglen uintptr // size of args region + args bitvector // if args.n >= 0, pointer map of args region + sp *uint8 // callee sp + depth uintptr // depth in call stack (0 == most recent) +} + +// dump kinds & offsets of interesting fields in bv +func dumpbv(cbv *bitvector, offset uintptr) { + bv := gobv(*cbv) + for i := uintptr(0); i < bv.n; i++ { + if bv.bytedata[i/8]>>(i%8)&1 == 1 { + dumpint(fieldKindPtr) + dumpint(uint64(offset + i*sys.PtrSize)) + } + } +} + +func dumpgoroutine(gp *g) { + sp := gp.syscallsp + + dumpint(tagGoroutine) + dumpint(uint64(uintptr(unsafe.Pointer(gp)))) + dumpint(uint64(sp)) + dumpint(uint64(gp.goid)) + dumpint(uint64(gp.gopc)) + dumpint(uint64(readgstatus(gp))) + dumpbool(isSystemGoroutine(gp)) + dumpbool(false) // isbackground + dumpint(uint64(gp.waitsince)) + dumpstr(gp.waitreason) + dumpint(0) + dumpint(uint64(uintptr(unsafe.Pointer(gp.m)))) + dumpint(uint64(uintptr(unsafe.Pointer(gp._defer)))) + dumpint(uint64(uintptr(unsafe.Pointer(gp._panic)))) + + // dump defer & panic records + for d := gp._defer; d != nil; d = d.link { + dumpint(tagDefer) + dumpint(uint64(uintptr(unsafe.Pointer(d)))) + dumpint(uint64(uintptr(unsafe.Pointer(gp)))) + dumpint(0) + dumpint(0) + dumpint(uint64(uintptr(unsafe.Pointer(d.pfn)))) + dumpint(0) + dumpint(uint64(uintptr(unsafe.Pointer(d.link)))) + } + for p := gp._panic; p != nil; p = p.link { + dumpint(tagPanic) + dumpint(uint64(uintptr(unsafe.Pointer(p)))) + dumpint(uint64(uintptr(unsafe.Pointer(gp)))) + eface := efaceOf(&p.arg) + dumpint(uint64(uintptr(unsafe.Pointer(eface._type)))) + dumpint(uint64(uintptr(unsafe.Pointer(eface.data)))) + dumpint(0) // was p->defer, no longer recorded + dumpint(uint64(uintptr(unsafe.Pointer(p.link)))) + } +} + +func dumpgs() { + // goroutines & stacks + for i := 0; uintptr(i) < allglen; i++ { + gp := allgs[i] + status := readgstatus(gp) // The world is stopped so gp will not be in a scan state. + switch status { + default: + print("runtime: unexpected G.status ", hex(status), "\n") + throw("dumpgs in STW - bad status") + case _Gdead: + // ok + case _Grunnable, + _Gsyscall, + _Gwaiting: + dumpgoroutine(gp) + } + } +} + +func finq_callback(fn *funcval, obj unsafe.Pointer, ft *functype, ot *ptrtype) { + dumpint(tagQueuedFinalizer) + dumpint(uint64(uintptr(obj))) + dumpint(uint64(uintptr(unsafe.Pointer(fn)))) + dumpint(uint64(uintptr(unsafe.Pointer(fn.fn)))) + dumpint(uint64(uintptr(unsafe.Pointer(ft)))) + dumpint(uint64(uintptr(unsafe.Pointer(ot)))) +} + +func dumproots() { + // MSpan.types + for _, s := range mheap_.allspans { + if s.state == _MSpanInUse { + // Finalizers + for sp := s.specials; sp != nil; sp = sp.next { + if sp.kind != _KindSpecialFinalizer { + continue + } + spf := (*specialfinalizer)(unsafe.Pointer(sp)) + p := unsafe.Pointer(s.base() + uintptr(spf.special.offset)) + dumpfinalizer(p, spf.fn, spf.ft, spf.ot) + } + } + } + + // Finalizer queue + iterate_finq(finq_callback) +} + +// Bit vector of free marks. +// Needs to be as big as the largest number of objects per span. +var freemark [_PageSize / 8]bool + +func dumpobjs() { + for _, s := range mheap_.allspans { + if s.state != _MSpanInUse { + continue + } + p := s.base() + size := s.elemsize + n := (s.npages << _PageShift) / size + if n > uintptr(len(freemark)) { + throw("freemark array doesn't have enough entries") + } + + for freeIndex := uintptr(0); freeIndex < s.nelems; freeIndex++ { + if s.isFree(freeIndex) { + freemark[freeIndex] = true + } + } + + for j := uintptr(0); j < n; j, p = j+1, p+size { + if freemark[j] { + freemark[j] = false + continue + } + dumpobj(unsafe.Pointer(p), size, makeheapobjbv(p, size)) + } + } +} + +func dumpparams() { + dumpint(tagParams) + x := uintptr(1) + if *(*byte)(unsafe.Pointer(&x)) == 1 { + dumpbool(false) // little-endian ptrs + } else { + dumpbool(true) // big-endian ptrs + } + dumpint(sys.PtrSize) + dumpint(uint64(mheap_.arena_start)) + dumpint(uint64(mheap_.arena_used)) + dumpstr(sys.GOARCH) + dumpstr(sys.Goexperiment) + dumpint(uint64(ncpu)) +} + +func dumpms() { + for mp := allm; mp != nil; mp = mp.alllink { + dumpint(tagOSThread) + dumpint(uint64(uintptr(unsafe.Pointer(mp)))) + dumpint(uint64(mp.id)) + dumpint(mp.procid) + } +} + +func dumpmemstats() { + dumpint(tagMemStats) + dumpint(memstats.alloc) + dumpint(memstats.total_alloc) + dumpint(memstats.sys) + dumpint(memstats.nlookup) + dumpint(memstats.nmalloc) + dumpint(memstats.nfree) + dumpint(memstats.heap_alloc) + dumpint(memstats.heap_sys) + dumpint(memstats.heap_idle) + dumpint(memstats.heap_inuse) + dumpint(memstats.heap_released) + dumpint(memstats.heap_objects) + dumpint(memstats.stacks_inuse) + dumpint(memstats.stacks_sys) + dumpint(memstats.mspan_inuse) + dumpint(memstats.mspan_sys) + dumpint(memstats.mcache_inuse) + dumpint(memstats.mcache_sys) + dumpint(memstats.buckhash_sys) + dumpint(memstats.gc_sys) + dumpint(memstats.other_sys) + dumpint(memstats.next_gc) + dumpint(memstats.last_gc) + dumpint(memstats.pause_total_ns) + for i := 0; i < 256; i++ { + dumpint(memstats.pause_ns[i]) + } + dumpint(uint64(memstats.numgc)) +} + +func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *location, size, allocs, frees uintptr) { + stk := (*[100000]location)(unsafe.Pointer(pstk)) + dumpint(tagMemProf) + dumpint(uint64(uintptr(unsafe.Pointer(b)))) + dumpint(uint64(size)) + dumpint(uint64(nstk)) + for i := uintptr(0); i < nstk; i++ { + pc := stk[i].pc + fn := stk[i].function + file := stk[i].filename + line := stk[i].lineno + if fn == "" { + var buf [64]byte + n := len(buf) + n-- + buf[n] = ')' + if pc == 0 { + n-- + buf[n] = '0' + } else { + for pc > 0 { + n-- + buf[n] = "0123456789abcdef"[pc&15] + pc >>= 4 + } + } + n-- + buf[n] = 'x' + n-- + buf[n] = '0' + n-- + buf[n] = '(' + dumpslice(buf[n:]) + dumpstr("?") + dumpint(0) + } else { + dumpstr(fn) + dumpstr(file) + dumpint(uint64(line)) + } + } + dumpint(uint64(allocs)) + dumpint(uint64(frees)) +} + +func dumpmemprof() { + iterate_memprof(dumpmemprof_callback) + for _, s := range mheap_.allspans { + if s.state != _MSpanInUse { + continue + } + for sp := s.specials; sp != nil; sp = sp.next { + if sp.kind != _KindSpecialProfile { + continue + } + spp := (*specialprofile)(unsafe.Pointer(sp)) + p := s.base() + uintptr(spp.special.offset) + dumpint(tagAllocSample) + dumpint(uint64(p)) + dumpint(uint64(uintptr(unsafe.Pointer(spp.b)))) + } + } +} + +var dumphdr = []byte("go1.7 heap dump\n") + +func mdump() { + // make sure we're done sweeping + for _, s := range mheap_.allspans { + if s.state == _MSpanInUse { + s.ensureSwept() + } + } + memclrNoHeapPointers(unsafe.Pointer(&typecache), unsafe.Sizeof(typecache)) + dwrite(unsafe.Pointer(&dumphdr[0]), uintptr(len(dumphdr))) + dumpparams() + dumpobjs() + dumpgs() + dumpms() + dumproots() + dumpmemstats() + dumpmemprof() + dumpint(tagEOF) + flush() +} + +func writeheapdump_m(fd uintptr) { + _g_ := getg() + casgstatus(_g_.m.curg, _Grunning, _Gwaiting) + _g_.waitreason = "dumping heap" + + // Update stats so we can dump them. + // As a side effect, flushes all the MCaches so the MSpan.freelist + // lists contain all the free objects. + updatememstats(nil) + + // Set dump file. + dumpfd = fd + + // Call dump routine. + mdump() + + // Reset dump file. + dumpfd = 0 + if tmpbuf != nil { + sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys) + tmpbuf = nil + } + + casgstatus(_g_.m.curg, _Gwaiting, _Grunning) +} + +// dumpint() the kind & offset of each field in an object. +func dumpfields(bv bitvector) { + dumpbv(&bv, 0) + dumpint(fieldKindEol) +} + +// The heap dump reader needs to be able to disambiguate +// Eface entries. So it needs to know every type that might +// appear in such an entry. The following routine accomplishes that. +// TODO(rsc, khr): Delete - no longer possible. + +// Dump all the types that appear in the type field of +// any Eface described by this bit vector. +func dumpbvtypes(bv *bitvector, base unsafe.Pointer) { +} + +func makeheapobjbv(p uintptr, size uintptr) bitvector { + // Extend the temp buffer if necessary. + nptr := size / sys.PtrSize + if uintptr(len(tmpbuf)) < nptr/8+1 { + if tmpbuf != nil { + sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys) + } + n := nptr/8 + 1 + p := sysAlloc(n, &memstats.other_sys) + if p == nil { + throw("heapdump: out of memory") + } + tmpbuf = (*[1 << 30]byte)(p)[:n] + } + // Convert heap bitmap to pointer bitmap. + for i := uintptr(0); i < nptr/8+1; i++ { + tmpbuf[i] = 0 + } + i := uintptr(0) + hbits := heapBitsForAddr(p) + for ; i < nptr; i++ { + if i != 1 && !hbits.morePointers() { + break // end of object + } + if hbits.isPointer() { + tmpbuf[i/8] |= 1 << (i % 8) + } + hbits = hbits.next() + } + return bitvector{int32(i), &tmpbuf[0]} +} + +type gobitvector struct { + n uintptr + bytedata []uint8 +} + +func gobv(bv bitvector) gobitvector { + return gobitvector{ + uintptr(bv.n), + (*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8], + } +} diff --git a/libgo/go/runtime/iface_test.go b/libgo/go/runtime/iface_test.go index 7f27baa..3744a4f 100644 --- a/libgo/go/runtime/iface_test.go +++ b/libgo/go/runtime/iface_test.go @@ -223,6 +223,10 @@ func BenchmarkAssertE2E2Blank(b *testing.B) { } func TestNonEscapingConvT2E(t *testing.T) { + if runtime.Compiler == "gccgo" { + t.Skip("does not work on gccgo without better escape analysis") + } + m := make(map[interface{}]bool) m[42] = true if !m[42] { @@ -243,6 +247,10 @@ func TestNonEscapingConvT2E(t *testing.T) { } func TestNonEscapingConvT2I(t *testing.T) { + if runtime.Compiler == "gccgo" { + t.Skip("does not work on gccgo without better escape analysis") + } + m := make(map[I1]bool) m[TM(42)] = true if !m[TM(42)] { diff --git a/libgo/go/runtime/lock_futex.go b/libgo/go/runtime/lock_futex.go index 4d914b2..9877bc3 100644 --- a/libgo/go/runtime/lock_futex.go +++ b/libgo/go/runtime/lock_futex.go @@ -198,13 +198,10 @@ func notetsleep_internal(n *note, ns int64) bool { } func notetsleep(n *note, ns int64) bool { - // Currently OK to sleep in non-g0 for gccgo. It happens in - // stoptheworld because our version of systemstack does not - // change to g0. - // gp := getg() - // if gp != gp.m.g0 && gp.m.preemptoff != "" { - // throw("notetsleep not on g0") - // } + gp := getg() + if gp != gp.m.g0 && gp.m.preemptoff != "" { + throw("notetsleep not on g0") + } return notetsleep_internal(n, ns) } diff --git a/libgo/go/runtime/lock_sema.go b/libgo/go/runtime/lock_sema.go index 5c70a74..57fee19 100644 --- a/libgo/go/runtime/lock_sema.go +++ b/libgo/go/runtime/lock_sema.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin nacl netbsd openbsd plan9 solaris windows +// +build aix darwin nacl netbsd openbsd plan9 solaris windows package runtime @@ -251,14 +251,9 @@ func notetsleep_internal(n *note, ns int64, gp *g, deadline int64) bool { func notetsleep(n *note, ns int64) bool { gp := getg() - - // Currently OK to sleep in non-g0 for gccgo. It happens in - // stoptheworld because our version of systemstack does not - // change to g0. - // if gp != gp.m.g0 && gp.m.preemptoff != "" { - // throw("notetsleep not on g0") - // } - + if gp != gp.m.g0 && gp.m.preemptoff != "" { + throw("notetsleep not on g0") + } semacreate(gp.m) return notetsleep_internal(n, ns, nil, 0) } diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go new file mode 100644 index 0000000..ed25782 --- /dev/null +++ b/libgo/go/runtime/malloc.go @@ -0,0 +1,998 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Memory allocator. +// +// This was originally based on tcmalloc, but has diverged quite a bit. +// http://goog-perftools.sourceforge.net/doc/tcmalloc.html + +// The main allocator works in runs of pages. +// Small allocation sizes (up to and including 32 kB) are +// rounded to one of about 70 size classes, each of which +// has its own free set of objects of exactly that size. +// Any free page of memory can be split into a set of objects +// of one size class, which are then managed using a free bitmap. +// +// The allocator's data structures are: +// +// fixalloc: a free-list allocator for fixed-size off-heap objects, +// used to manage storage used by the allocator. +// mheap: the malloc heap, managed at page (8192-byte) granularity. +// mspan: a run of pages managed by the mheap. +// mcentral: collects all spans of a given size class. +// mcache: a per-P cache of mspans with free space. +// mstats: allocation statistics. +// +// Allocating a small object proceeds up a hierarchy of caches: +// +// 1. Round the size up to one of the small size classes +// and look in the corresponding mspan in this P's mcache. +// Scan the mspan's free bitmap to find a free slot. +// If there is a free slot, allocate it. +// This can all be done without acquiring a lock. +// +// 2. If the mspan has no free slots, obtain a new mspan +// from the mcentral's list of mspans of the required size +// class that have free space. +// Obtaining a whole span amortizes the cost of locking +// the mcentral. +// +// 3. If the mcentral's mspan list is empty, obtain a run +// of pages from the mheap to use for the mspan. +// +// 4. If the mheap is empty or has no page runs large enough, +// allocate a new group of pages (at least 1MB) from the +// operating system. Allocating a large run of pages +// amortizes the cost of talking to the operating system. +// +// Sweeping an mspan and freeing objects on it proceeds up a similar +// hierarchy: +// +// 1. If the mspan is being swept in response to allocation, it +// is returned to the mcache to satisfy the allocation. +// +// 2. Otherwise, if the mspan still has allocated objects in it, +// it is placed on the mcentral free list for the mspan's size +// class. +// +// 3. Otherwise, if all objects in the mspan are free, the mspan +// is now "idle", so it is returned to the mheap and no longer +// has a size class. +// This may coalesce it with adjacent idle mspans. +// +// 4. If an mspan remains idle for long enough, return its pages +// to the operating system. +// +// Allocating and freeing a large object uses the mheap +// directly, bypassing the mcache and mcentral. +// +// Free object slots in an mspan are zeroed only if mspan.needzero is +// false. If needzero is true, objects are zeroed as they are +// allocated. There are various benefits to delaying zeroing this way: +// +// 1. Stack frame allocation can avoid zeroing altogether. +// +// 2. It exhibits better temporal locality, since the program is +// probably about to write to the memory. +// +// 3. We don't zero pages that never get reused. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// C function to get the end of the program's memory. +func getEnd() uintptr + +// For gccgo, use go:linkname to rename compiler-called functions to +// themselves, so that the compiler will export them. +// +//go:linkname newobject runtime.newobject + +// Functions called by C code. +//go:linkname mallocgc runtime.mallocgc + +const ( + debugMalloc = false + + maxTinySize = _TinySize + tinySizeClass = _TinySizeClass + maxSmallSize = _MaxSmallSize + + pageShift = _PageShift + pageSize = _PageSize + pageMask = _PageMask + // By construction, single page spans of the smallest object class + // have the most objects per span. + maxObjsPerSpan = pageSize / 8 + + mSpanInUse = _MSpanInUse + + concurrentSweep = _ConcurrentSweep + + _PageSize = 1 << _PageShift + _PageMask = _PageSize - 1 + + // _64bit = 1 on 64-bit systems, 0 on 32-bit systems + _64bit = 1 << (^uintptr(0) >> 63) / 2 + + // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go. + _TinySize = 16 + _TinySizeClass = 2 + + _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc + _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. + _HeapAllocChunk = 1 << 20 // Chunk size for heap growth + + // Per-P, per order stack segment cache size. + _StackCacheSize = 32 * 1024 + + // Number of orders that get caching. Order 0 is FixedStack + // and each successive order is twice as large. + // We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks + // will be allocated directly. + // Since FixedStack is different on different systems, we + // must vary NumStackOrders to keep the same maximum cached size. + // OS | FixedStack | NumStackOrders + // -----------------+------------+--------------- + // linux/darwin/bsd | 2KB | 4 + // windows/32 | 4KB | 3 + // windows/64 | 8KB | 2 + // plan9 | 4KB | 3 + _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9 + + // Number of bits in page to span calculations (4k pages). + // On Windows 64-bit we limit the arena to 32GB or 35 bits. + // Windows counts memory used by page table into committed memory + // of the process, so we can't reserve too much memory. + // See https://golang.org/issue/5402 and https://golang.org/issue/5236. + // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits. + // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. + // The only exception is mips32 which only has access to low 2GB of virtual memory. + // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory, + // but as most devices have less than 4GB of physical memory anyway, we + // try to be conservative here, and only ask for a 2GB heap. + _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift + + _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1) + + // Max number of threads to run garbage collection. + // 2, 3, and 4 are all plausible maximums depending + // on the hardware details of the machine. The garbage + // collector scales well to 32 cpus. + _MaxGcproc = 32 + + _MaxArena32 = 1<<32 - 1 + + // minLegalPointer is the smallest possible legal pointer. + // This is the smallest possible architectural page size, + // since we assume that the first page is never mapped. + // + // This should agree with minZeroPage in the compiler. + minLegalPointer uintptr = 4096 +) + +// physPageSize is the size in bytes of the OS's physical pages. +// Mapping and unmapping operations must be done at multiples of +// physPageSize. +// +// This must be set by the OS init code (typically in osinit) before +// mallocinit. +var physPageSize uintptr + +// OS-defined helpers: +// +// sysAlloc obtains a large chunk of zeroed memory from the +// operating system, typically on the order of a hundred kilobytes +// or a megabyte. +// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator +// may use larger alignment, so the caller must be careful to realign the +// memory obtained by sysAlloc. +// +// SysUnused notifies the operating system that the contents +// of the memory region are no longer needed and can be reused +// for other purposes. +// SysUsed notifies the operating system that the contents +// of the memory region are needed again. +// +// SysFree returns it unconditionally; this is only used if +// an out-of-memory error has been detected midway through +// an allocation. It is okay if SysFree is a no-op. +// +// SysReserve reserves address space without allocating memory. +// If the pointer passed to it is non-nil, the caller wants the +// reservation there, but SysReserve can still choose another +// location if that one is unavailable. On some systems and in some +// cases SysReserve will simply check that the address space is +// available and not actually reserve it. If SysReserve returns +// non-nil, it sets *reserved to true if the address space is +// reserved, false if it has merely been checked. +// NOTE: SysReserve returns OS-aligned memory, but the heap allocator +// may use larger alignment, so the caller must be careful to realign the +// memory obtained by sysAlloc. +// +// SysMap maps previously reserved address space for use. +// The reserved argument is true if the address space was really +// reserved, not merely checked. +// +// SysFault marks a (already sysAlloc'd) region to fault +// if accessed. Used only for debugging the runtime. + +func mallocinit() { + if class_to_size[_TinySizeClass] != _TinySize { + throw("bad TinySizeClass") + } + + // Not used for gccgo. + // testdefersizes() + + // Copy class sizes out for statistics table. + for i := range class_to_size { + memstats.by_size[i].size = uint32(class_to_size[i]) + } + + // Check physPageSize. + if physPageSize == 0 { + // The OS init code failed to fetch the physical page size. + throw("failed to get system page size") + } + if physPageSize < minPhysPageSize { + print("system page size (", physPageSize, ") is smaller than minimum page size (", minPhysPageSize, ")\n") + throw("bad system page size") + } + if physPageSize&(physPageSize-1) != 0 { + print("system page size (", physPageSize, ") must be a power of 2\n") + throw("bad system page size") + } + + var p, bitmapSize, spansSize, pSize, limit uintptr + var reserved bool + + // limit = runtime.memlimit(); + // See https://golang.org/issue/5049 + // TODO(rsc): Fix after 1.1. + limit = 0 + + // Set up the allocation arena, a contiguous area of memory where + // allocated data will be found. The arena begins with a bitmap large + // enough to hold 2 bits per allocated word. + if sys.PtrSize == 8 && (limit == 0 || limit > 1<<30) { + // On a 64-bit machine, allocate from a single contiguous reservation. + // 512 GB (MaxMem) should be big enough for now. + // + // The code will work with the reservation at any address, but ask + // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f). + // Allocating a 512 GB region takes away 39 bits, and the amd64 + // doesn't let us choose the top 17 bits, so that leaves the 9 bits + // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means + // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df. + // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid + // UTF-8 sequences, and they are otherwise as far away from + // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 + // addresses. An earlier attempt to use 0x11f8 caused out of memory errors + // on OS X during thread allocations. 0x00c0 causes conflicts with + // AddressSanitizer which reserves all memory up to 0x0100. + // These choices are both for debuggability and to reduce the + // odds of a conservative garbage collector (as is still used in gccgo) + // not collecting memory because some non-pointer block of memory + // had a bit pattern that matched a memory address. + // + // Actually we reserve 544 GB (because the bitmap ends up being 32 GB) + // but it hardly matters: e0 00 is not valid UTF-8 either. + // + // If this fails we fall back to the 32 bit memory mechanism + // + // However, on arm64, we ignore all this advice above and slam the + // allocation at 0x40 << 32 because when using 4k pages with 3-level + // translation buffers, the user address space is limited to 39 bits + // On darwin/arm64, the address space is even smaller. + arenaSize := round(_MaxMem, _PageSize) + bitmapSize = arenaSize / (sys.PtrSize * 8 / 2) + spansSize = arenaSize / _PageSize * sys.PtrSize + spansSize = round(spansSize, _PageSize) + for i := 0; i <= 0x7f; i++ { + switch { + case GOARCH == "arm64" && GOOS == "darwin": + p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) + case GOARCH == "arm64": + p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) + default: + p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) + } + pSize = bitmapSize + spansSize + arenaSize + _PageSize + p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) + if p != 0 { + break + } + } + } + + if p == 0 { + // On a 32-bit machine, we can't typically get away + // with a giant virtual address space reservation. + // Instead we map the memory information bitmap + // immediately after the data segment, large enough + // to handle the entire 4GB address space (256 MB), + // along with a reservation for an initial arena. + // When that gets used up, we'll start asking the kernel + // for any memory anywhere. + + // If we fail to allocate, try again with a smaller arena. + // This is necessary on Android L where we share a process + // with ART, which reserves virtual memory aggressively. + // In the worst case, fall back to a 0-sized initial arena, + // in the hope that subsequent reservations will succeed. + arenaSizes := [...]uintptr{ + 512 << 20, + 256 << 20, + 128 << 20, + 0, + } + + for _, arenaSize := range &arenaSizes { + bitmapSize = (_MaxArena32 + 1) / (sys.PtrSize * 8 / 2) + spansSize = (_MaxArena32 + 1) / _PageSize * sys.PtrSize + if limit > 0 && arenaSize+bitmapSize+spansSize > limit { + bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1) + arenaSize = bitmapSize * 8 + spansSize = arenaSize / _PageSize * sys.PtrSize + } + spansSize = round(spansSize, _PageSize) + + // SysReserve treats the address we ask for, end, as a hint, + // not as an absolute requirement. If we ask for the end + // of the data segment but the operating system requires + // a little more space before we can start allocating, it will + // give out a slightly higher pointer. Except QEMU, which + // is buggy, as usual: it won't adjust the pointer upward. + // So adjust it upward a little bit ourselves: 1/4 MB to get + // away from the running binary image and then round up + // to a MB boundary. + p = round(getEnd()+(1<<18), 1<<20) + pSize = bitmapSize + spansSize + arenaSize + _PageSize + p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) + if p != 0 { + break + } + } + if p == 0 { + throw("runtime: cannot reserve arena virtual address space") + } + } + + // PageSize can be larger than OS definition of page size, + // so SysReserve can give us a PageSize-unaligned pointer. + // To overcome this we ask for PageSize more and round up the pointer. + p1 := round(p, _PageSize) + + spansStart := p1 + mheap_.bitmap = p1 + spansSize + bitmapSize + if sys.PtrSize == 4 { + // Set arena_start such that we can accept memory + // reservations located anywhere in the 4GB virtual space. + mheap_.arena_start = 0 + } else { + mheap_.arena_start = p1 + (spansSize + bitmapSize) + } + mheap_.arena_end = p + pSize + mheap_.arena_used = p1 + (spansSize + bitmapSize) + mheap_.arena_reserved = reserved + + if mheap_.arena_start&(_PageSize-1) != 0 { + println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) + throw("misrounded allocation in mallocinit") + } + + // Initialize the rest of the allocator. + mheap_.init(spansStart, spansSize) + _g_ := getg() + _g_.m.mcache = allocmcache() +} + +// sysAlloc allocates the next n bytes from the heap arena. The +// returned pointer is always _PageSize aligned and between +// h.arena_start and h.arena_end. sysAlloc returns nil on failure. +// There is no corresponding free function. +func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer { + if n > h.arena_end-h.arena_used { + // We are in 32-bit mode, maybe we didn't use all possible address space yet. + // Reserve some more space. + p_size := round(n+_PageSize, 256<<20) + new_end := h.arena_end + p_size // Careful: can overflow + if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxArena32 { + // TODO: It would be bad if part of the arena + // is reserved and part is not. + var reserved bool + p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved)) + if p == 0 { + return nil + } + if p == h.arena_end { + h.arena_end = new_end + h.arena_reserved = reserved + } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxArena32 { + // Keep everything page-aligned. + // Our pages are bigger than hardware pages. + h.arena_end = p + p_size + used := p + (-p & (_PageSize - 1)) + h.mapBits(used) + h.mapSpans(used) + h.arena_used = used + h.arena_reserved = reserved + } else { + // We haven't added this allocation to + // the stats, so subtract it from a + // fake stat (but avoid underflow). + stat := uint64(p_size) + sysFree(unsafe.Pointer(p), p_size, &stat) + } + } + } + + if n <= h.arena_end-h.arena_used { + // Keep taking from our reservation. + p := h.arena_used + sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys) + h.mapBits(p + n) + h.mapSpans(p + n) + h.arena_used = p + n + if raceenabled { + racemapshadow(unsafe.Pointer(p), n) + } + + if p&(_PageSize-1) != 0 { + throw("misrounded allocation in MHeap_SysAlloc") + } + return unsafe.Pointer(p) + } + + // If using 64-bit, our reservation is all we have. + if h.arena_end-h.arena_start > _MaxArena32 { + return nil + } + + // On 32-bit, once the reservation is gone we can + // try to get memory at a location chosen by the OS. + p_size := round(n, _PageSize) + _PageSize + p := uintptr(sysAlloc(p_size, &memstats.heap_sys)) + if p == 0 { + return nil + } + + if p < h.arena_start || p+p_size-h.arena_start > _MaxArena32 { + top := ^uintptr(0) + if top-h.arena_start-1 > _MaxArena32 { + top = h.arena_start + _MaxArena32 + 1 + } + print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n") + sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys) + return nil + } + + p_end := p + p_size + p += -p & (_PageSize - 1) + if p+n > h.arena_used { + h.mapBits(p + n) + h.mapSpans(p + n) + h.arena_used = p + n + if p_end > h.arena_end { + h.arena_end = p_end + } + if raceenabled { + racemapshadow(unsafe.Pointer(p), n) + } + } + + if p&(_PageSize-1) != 0 { + throw("misrounded allocation in MHeap_SysAlloc") + } + return unsafe.Pointer(p) +} + +// base address for all 0-byte allocations +var zerobase uintptr + +// nextFreeFast returns the next free object if one is quickly available. +// Otherwise it returns 0. +func nextFreeFast(s *mspan) gclinkptr { + theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache? + if theBit < 64 { + result := s.freeindex + uintptr(theBit) + if result < s.nelems { + freeidx := result + 1 + if freeidx%64 == 0 && freeidx != s.nelems { + return 0 + } + s.allocCache >>= (theBit + 1) + s.freeindex = freeidx + v := gclinkptr(result*s.elemsize + s.base()) + s.allocCount++ + return v + } + } + return 0 +} + +// nextFree returns the next free object from the cached span if one is available. +// Otherwise it refills the cache with a span with an available object and +// returns that object along with a flag indicating that this was a heavy +// weight allocation. If it is a heavy weight allocation the caller must +// determine whether a new GC cycle needs to be started or if the GC is active +// whether this goroutine needs to assist the GC. +func (c *mcache) nextFree(sizeclass uint8) (v gclinkptr, s *mspan, shouldhelpgc bool) { + s = c.alloc[sizeclass] + shouldhelpgc = false + freeIndex := s.nextFreeIndex() + if freeIndex == s.nelems { + // The span is full. + if uintptr(s.allocCount) != s.nelems { + println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) + throw("s.allocCount != s.nelems && freeIndex == s.nelems") + } + systemstack(func() { + c.refill(int32(sizeclass)) + }) + shouldhelpgc = true + s = c.alloc[sizeclass] + + freeIndex = s.nextFreeIndex() + } + + if freeIndex >= s.nelems { + throw("freeIndex is not valid") + } + + v = gclinkptr(freeIndex*s.elemsize + s.base()) + s.allocCount++ + if uintptr(s.allocCount) > s.nelems { + println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems) + throw("s.allocCount > s.nelems") + } + return +} + +// Allocate an object of size bytes. +// Small objects are allocated from the per-P cache's free lists. +// Large objects (> 32 kB) are allocated straight from the heap. +func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + + if size == 0 { + return unsafe.Pointer(&zerobase) + } + + if debug.sbrk != 0 { + align := uintptr(16) + if typ != nil { + align = uintptr(typ.align) + } + return persistentalloc(size, align, &memstats.other_sys) + } + + // When using gccgo, when a cgo or SWIG function has an + // interface return type and the function returns a + // non-pointer, memory allocation occurs after syscall.Cgocall + // but before syscall.CgocallDone. Treat this allocation as a + // callback. + incallback := false + if gomcache() == nil && getg().m.ncgo > 0 { + exitsyscall(0) + incallback = true + } + + // assistG is the G to charge for this allocation, or nil if + // GC is not currently active. + var assistG *g + if gcBlackenEnabled != 0 { + // Charge the current user G for this allocation. + assistG = getg() + if assistG.m.curg != nil { + assistG = assistG.m.curg + } + // Charge the allocation against the G. We'll account + // for internal fragmentation at the end of mallocgc. + assistG.gcAssistBytes -= int64(size) + + if assistG.gcAssistBytes < 0 { + // This G is in debt. Assist the GC to correct + // this before allocating. This must happen + // before disabling preemption. + gcAssistAlloc(assistG) + } + } + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + mp.mallocing = 1 + + shouldhelpgc := false + dataSize := size + c := gomcache() + var x unsafe.Pointer + noscan := typ == nil || typ.kind&kindNoPointers != 0 + if size <= maxSmallSize { + if noscan && size < maxTinySize { + // Tiny allocator. + // + // Tiny allocator combines several tiny allocation requests + // into a single memory block. The resulting memory block + // is freed when all subobjects are unreachable. The subobjects + // must be noscan (don't have pointers), this ensures that + // the amount of potentially wasted memory is bounded. + // + // Size of the memory block used for combining (maxTinySize) is tunable. + // Current setting is 16 bytes, which relates to 2x worst case memory + // wastage (when all but one subobjects are unreachable). + // 8 bytes would result in no wastage at all, but provides less + // opportunities for combining. + // 32 bytes provides more opportunities for combining, + // but can lead to 4x worst case wastage. + // The best case winning is 8x regardless of block size. + // + // Objects obtained from tiny allocator must not be freed explicitly. + // So when an object will be freed explicitly, we ensure that + // its size >= maxTinySize. + // + // SetFinalizer has a special case for objects potentially coming + // from tiny allocator, it such case it allows to set finalizers + // for an inner byte of a memory block. + // + // The main targets of tiny allocator are small strings and + // standalone escaping variables. On a json benchmark + // the allocator reduces number of allocations by ~12% and + // reduces heap size by ~20%. + off := c.tinyoffset + // Align tiny pointer for required (conservative) alignment. + if size&7 == 0 { + off = round(off, 8) + } else if size&3 == 0 { + off = round(off, 4) + } else if size&1 == 0 { + off = round(off, 2) + } + if off+size <= maxTinySize && c.tiny != 0 { + // The object fits into existing tiny block. + x = unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + size + c.local_tinyallocs++ + mp.mallocing = 0 + releasem(mp) + if incallback { + entersyscall(0) + } + return x + } + // Allocate a new maxTinySize block. + span := c.alloc[tinySizeClass] + v := nextFreeFast(span) + if v == 0 { + v, _, shouldhelpgc = c.nextFree(tinySizeClass) + } + x = unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + // See if we need to replace the existing tiny block with the new one + // based on amount of remaining free space. + if size < c.tinyoffset || c.tiny == 0 { + c.tiny = uintptr(x) + c.tinyoffset = size + } + size = maxTinySize + } else { + var sizeclass uint8 + if size <= smallSizeMax-8 { + sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv] + } else { + sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv] + } + size = uintptr(class_to_size[sizeclass]) + span := c.alloc[sizeclass] + v := nextFreeFast(span) + if v == 0 { + v, span, shouldhelpgc = c.nextFree(sizeclass) + } + x = unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(unsafe.Pointer(v), size) + } + } + } else { + var s *mspan + shouldhelpgc = true + systemstack(func() { + s = largeAlloc(size, needzero) + }) + s.freeindex = 1 + s.allocCount = 1 + x = unsafe.Pointer(s.base()) + size = s.elemsize + } + + var scanSize uintptr + if noscan { + heapBitsSetTypeNoScan(uintptr(x)) + } else { + heapBitsSetType(uintptr(x), size, dataSize, typ) + if dataSize > typ.size { + // Array allocation. If there are any + // pointers, GC has to scan to the last + // element. + if typ.ptrdata != 0 { + scanSize = dataSize - typ.size + typ.ptrdata + } + } else { + scanSize = typ.ptrdata + } + c.local_scan += scanSize + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + if gcphase != _GCoff { + gcmarknewobject(uintptr(x), size, scanSize) + } + + if raceenabled { + racemalloc(x, size) + } + + if msanenabled { + msanmalloc(x, size) + } + + mp.mallocing = 0 + releasem(mp) + + if debug.allocfreetrace != 0 { + tracealloc(x, size, typ) + } + + if rate := MemProfileRate; rate > 0 { + if size < uintptr(rate) && int32(size) < c.next_sample { + c.next_sample -= int32(size) + } else { + mp := acquirem() + profilealloc(mp, x, size) + releasem(mp) + } + } + + if assistG != nil { + // Account for internal fragmentation in the assist + // debt now that we know it. + assistG.gcAssistBytes -= int64(size - dataSize) + } + + if shouldhelpgc && gcShouldStart(false) { + gcStart(gcBackgroundMode, false) + } + + if getg().preempt { + checkPreempt() + } + + if incallback { + entersyscall(0) + } + + return x +} + +func largeAlloc(size uintptr, needzero bool) *mspan { + // print("largeAlloc size=", size, "\n") + + if size+_PageSize < size { + throw("out of memory") + } + npages := size >> _PageShift + if size&_PageMask != 0 { + npages++ + } + + // Deduct credit for this span allocation and sweep if + // necessary. mHeap_Alloc will also sweep npages, so this only + // pays the debt down to npage pages. + deductSweepCredit(npages*_PageSize, npages) + + s := mheap_.alloc(npages, 0, true, needzero) + if s == nil { + throw("out of memory") + } + s.limit = s.base() + size + heapBitsForSpan(s.base()).initSpan(s) + return s +} + +// implementation of new builtin +// compiler (both frontend and SSA backend) knows the signature +// of this function +func newobject(typ *_type) unsafe.Pointer { + return mallocgc(typ.size, typ, true) +} + +//go:linkname reflect_unsafe_New reflect.unsafe_New +func reflect_unsafe_New(typ *_type) unsafe.Pointer { + return newobject(typ) +} + +// newarray allocates an array of n elements of type typ. +func newarray(typ *_type, n int) unsafe.Pointer { + if n < 0 || uintptr(n) > maxSliceCap(typ.size) { + panic(plainError("runtime: allocation size out of range")) + } + return mallocgc(typ.size*uintptr(n), typ, true) +} + +//go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray +func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer { + return newarray(typ, n) +} + +func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { + mp.mcache.next_sample = nextSample() + mProf_Malloc(x, size) +} + +// nextSample returns the next sampling point for heap profiling. +// It produces a random variable with a geometric distribution and +// mean MemProfileRate. This is done by generating a uniformly +// distributed random number and applying the cumulative distribution +// function for an exponential. +func nextSample() int32 { + if GOOS == "plan9" { + // Plan 9 doesn't support floating point in note handler. + if g := getg(); g == g.m.gsignal { + return nextSampleNoFP() + } + } + + period := MemProfileRate + + // make nextSample not overflow. Maximum possible step is + // -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period. + switch { + case period > 0x7000000: + period = 0x7000000 + case period == 0: + return 0 + } + + // Let m be the sample rate, + // the probability distribution function is m*exp(-mx), so the CDF is + // p = 1 - exp(-mx), so + // q = 1 - p == exp(-mx) + // log_e(q) = -mx + // -log_e(q)/m = x + // x = -log_e(q) * period + // x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency + const randomBitCount = 26 + q := fastrand()%(1<<randomBitCount) + 1 + qlog := fastlog2(float64(q)) - randomBitCount + if qlog > 0 { + qlog = 0 + } + const minusLog2 = -0.6931471805599453 // -ln(2) + return int32(qlog*(minusLog2*float64(period))) + 1 +} + +// nextSampleNoFP is similar to nextSample, but uses older, +// simpler code to avoid floating point. +func nextSampleNoFP() int32 { + // Set first allocation sample size. + rate := MemProfileRate + if rate > 0x3fffffff { // make 2*rate not overflow + rate = 0x3fffffff + } + if rate != 0 { + return int32(int(fastrand()) % (2 * rate)) + } + return 0 +} + +type persistentAlloc struct { + base unsafe.Pointer + off uintptr +} + +var globalAlloc struct { + mutex + persistentAlloc +} + +// Wrapper around sysAlloc that can allocate small chunks. +// There is no associated free operation. +// Intended for things like function/type/debug-related persistent data. +// If align is 0, uses default align (currently 8). +// The returned memory will be zeroed. +// +// Consider marking persistentalloc'd types go:notinheap. +func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { + var p unsafe.Pointer + systemstack(func() { + p = persistentalloc1(size, align, sysStat) + }) + return p +} + +// Must run on system stack because stack growth can (re)invoke it. +// See issue 9174. +//go:systemstack +func persistentalloc1(size, align uintptr, sysStat *uint64) unsafe.Pointer { + const ( + chunk = 256 << 10 + maxBlock = 64 << 10 // VM reservation granularity is 64K on windows + ) + + if size == 0 { + throw("persistentalloc: size == 0") + } + if align != 0 { + if align&(align-1) != 0 { + throw("persistentalloc: align is not a power of 2") + } + if align > _PageSize { + throw("persistentalloc: align is too large") + } + } else { + align = 8 + } + + if size >= maxBlock { + return sysAlloc(size, sysStat) + } + + mp := acquirem() + var persistent *persistentAlloc + if mp != nil && mp.p != 0 { + persistent = &mp.p.ptr().palloc + } else { + lock(&globalAlloc.mutex) + persistent = &globalAlloc.persistentAlloc + } + persistent.off = round(persistent.off, align) + if persistent.off+size > chunk || persistent.base == nil { + persistent.base = sysAlloc(chunk, &memstats.other_sys) + if persistent.base == nil { + if persistent == &globalAlloc.persistentAlloc { + unlock(&globalAlloc.mutex) + } + throw("runtime: cannot allocate memory") + } + persistent.off = 0 + } + p := add(persistent.base, persistent.off) + persistent.off += size + releasem(mp) + if persistent == &globalAlloc.persistentAlloc { + unlock(&globalAlloc.mutex) + } + + if sysStat != &memstats.other_sys { + mSysStatInc(sysStat, size) + mSysStatDec(&memstats.other_sys, size) + } + return p +} diff --git a/libgo/go/runtime/mbarrier.go b/libgo/go/runtime/mbarrier.go new file mode 100644 index 0000000..3a463c8 --- /dev/null +++ b/libgo/go/runtime/mbarrier.go @@ -0,0 +1,418 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector: write barriers. +// +// For the concurrent garbage collector, the Go compiler implements +// updates to pointer-valued fields that may be in heap objects by +// emitting calls to write barriers. This file contains the actual write barrier +// implementation, gcmarkwb_m, and the various wrappers called by the +// compiler to implement pointer assignment, slice assignment, +// typed memmove, and so on. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// For gccgo, use go:linkname to rename compiler-called functions to +// themselves, so that the compiler will export them. +// +//go:linkname writebarrierptr runtime.writebarrierptr +//go:linkname typedmemmove runtime.typedmemmove +//go:linkname typedslicecopy runtime.typedslicecopy + +// gcmarkwb_m is the mark-phase write barrier, the only barrier we have. +// The rest of this file exists only to make calls to this function. +// +// This is a hybrid barrier that combines a Yuasa-style deletion +// barrier—which shades the object whose reference is being +// overwritten—with Dijkstra insertion barrier—which shades the object +// whose reference is being written. The insertion part of the barrier +// is necessary while the calling goroutine's stack is grey. In +// pseudocode, the barrier is: +// +// writePointer(slot, ptr): +// shade(*slot) +// if current stack is grey: +// shade(ptr) +// *slot = ptr +// +// slot is the destination in Go code. +// ptr is the value that goes into the slot in Go code. +// +// Shade indicates that it has seen a white pointer by adding the referent +// to wbuf as well as marking it. +// +// The two shades and the condition work together to prevent a mutator +// from hiding an object from the garbage collector: +// +// 1. shade(*slot) prevents a mutator from hiding an object by moving +// the sole pointer to it from the heap to its stack. If it attempts +// to unlink an object from the heap, this will shade it. +// +// 2. shade(ptr) prevents a mutator from hiding an object by moving +// the sole pointer to it from its stack into a black object in the +// heap. If it attempts to install the pointer into a black object, +// this will shade it. +// +// 3. Once a goroutine's stack is black, the shade(ptr) becomes +// unnecessary. shade(ptr) prevents hiding an object by moving it from +// the stack to the heap, but this requires first having a pointer +// hidden on the stack. Immediately after a stack is scanned, it only +// points to shaded objects, so it's not hiding anything, and the +// shade(*slot) prevents it from hiding any other pointers on its +// stack. +// +// For a detailed description of this barrier and proof of +// correctness, see https://github.com/golang/proposal/blob/master/design/17503-eliminate-rescan.md +// +// +// +// Dealing with memory ordering: +// +// Both the Yuasa and Dijkstra barriers can be made conditional on the +// color of the object containing the slot. We chose not to make these +// conditional because the cost of ensuring that the object holding +// the slot doesn't concurrently change color without the mutator +// noticing seems prohibitive. +// +// Consider the following example where the mutator writes into +// a slot and then loads the slot's mark bit while the GC thread +// writes to the slot's mark bit and then as part of scanning reads +// the slot. +// +// Initially both [slot] and [slotmark] are 0 (nil) +// Mutator thread GC thread +// st [slot], ptr st [slotmark], 1 +// +// ld r1, [slotmark] ld r2, [slot] +// +// Without an expensive memory barrier between the st and the ld, the final +// result on most HW (including 386/amd64) can be r1==r2==0. This is a classic +// example of what can happen when loads are allowed to be reordered with older +// stores (avoiding such reorderings lies at the heart of the classic +// Peterson/Dekker algorithms for mutual exclusion). Rather than require memory +// barriers, which will slow down both the mutator and the GC, we always grey +// the ptr object regardless of the slot's color. +// +// Another place where we intentionally omit memory barriers is when +// accessing mheap_.arena_used to check if a pointer points into the +// heap. On relaxed memory machines, it's possible for a mutator to +// extend the size of the heap by updating arena_used, allocate an +// object from this new region, and publish a pointer to that object, +// but for tracing running on another processor to observe the pointer +// but use the old value of arena_used. In this case, tracing will not +// mark the object, even though it's reachable. However, the mutator +// is guaranteed to execute a write barrier when it publishes the +// pointer, so it will take care of marking the object. A general +// consequence of this is that the garbage collector may cache the +// value of mheap_.arena_used. (See issue #9984.) +// +// +// Stack writes: +// +// The compiler omits write barriers for writes to the current frame, +// but if a stack pointer has been passed down the call stack, the +// compiler will generate a write barrier for writes through that +// pointer (because it doesn't know it's not a heap pointer). +// +// One might be tempted to ignore the write barrier if slot points +// into to the stack. Don't do it! Mark termination only re-scans +// frames that have potentially been active since the concurrent scan, +// so it depends on write barriers to track changes to pointers in +// stack frames that have not been active. +// +// +// Global writes: +// +// The Go garbage collector requires write barriers when heap pointers +// are stored in globals. Many garbage collectors ignore writes to +// globals and instead pick up global -> heap pointers during +// termination. This increases pause time, so we instead rely on write +// barriers for writes to globals so that we don't have to rescan +// global during mark termination. +// +// +// Publication ordering: +// +// The write barrier is *pre-publication*, meaning that the write +// barrier happens prior to the *slot = ptr write that may make ptr +// reachable by some goroutine that currently cannot reach it. +// +// +//go:nowritebarrierrec +//go:systemstack +func gcmarkwb_m(slot *uintptr, ptr uintptr) { + if writeBarrier.needed { + // Note: This turns bad pointer writes into bad + // pointer reads, which could be confusing. We avoid + // reading from obviously bad pointers, which should + // take care of the vast majority of these. We could + // patch this up in the signal handler, or use XCHG to + // combine the read and the write. Checking inheap is + // insufficient since we need to track changes to + // roots outside the heap. + if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize { + if optr := *slot; optr != 0 { + shade(optr) + } + } + // TODO: Make this conditional on the caller's stack color. + if ptr != 0 && inheap(ptr) { + shade(ptr) + } + } +} + +// writebarrierptr_prewrite1 invokes a write barrier for *dst = src +// prior to the write happening. +// +// Write barrier calls must not happen during critical GC and scheduler +// related operations. In particular there are times when the GC assumes +// that the world is stopped but scheduler related code is still being +// executed, dealing with syscalls, dealing with putting gs on runnable +// queues and so forth. This code cannot execute write barriers because +// the GC might drop them on the floor. Stopping the world involves removing +// the p associated with an m. We use the fact that m.p == nil to indicate +// that we are in one these critical section and throw if the write is of +// a pointer to a heap object. +//go:nosplit +func writebarrierptr_prewrite1(dst *uintptr, src uintptr) { + mp := acquirem() + if mp.inwb || mp.dying > 0 { + releasem(mp) + return + } + systemstack(func() { + if mp.p == 0 && memstats.enablegc && !mp.inwb && inheap(src) { + throw("writebarrierptr_prewrite1 called with mp.p == nil") + } + mp.inwb = true + gcmarkwb_m(dst, src) + }) + mp.inwb = false + releasem(mp) +} + +// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer, +// but if we do that, Go inserts a write barrier on *dst = src. +//go:nosplit +func writebarrierptr(dst *uintptr, src uintptr) { + if writeBarrier.cgo { + cgoCheckWriteBarrier(dst, src) + } + if !writeBarrier.needed { + *dst = src + return + } + if src != 0 && src < minPhysPageSize { + systemstack(func() { + print("runtime: writebarrierptr *", dst, " = ", hex(src), "\n") + throw("bad pointer in write barrier") + }) + } + writebarrierptr_prewrite1(dst, src) + *dst = src +} + +// writebarrierptr_prewrite is like writebarrierptr, but the store +// will be performed by the caller after this call. The caller must +// not allow preemption between this call and the write. +// +//go:nosplit +func writebarrierptr_prewrite(dst *uintptr, src uintptr) { + if writeBarrier.cgo { + cgoCheckWriteBarrier(dst, src) + } + if !writeBarrier.needed { + return + } + if src != 0 && src < minPhysPageSize { + systemstack(func() { throw("bad pointer in write barrier") }) + } + writebarrierptr_prewrite1(dst, src) +} + +// typedmemmove copies a value of type t to dst from src. +//go:nosplit +func typedmemmove(typ *_type, dst, src unsafe.Pointer) { + if typ.kind&kindNoPointers == 0 { + bulkBarrierPreWrite(uintptr(dst), uintptr(src), typ.size) + } + // There's a race here: if some other goroutine can write to + // src, it may change some pointer in src after we've + // performed the write barrier but before we perform the + // memory copy. This safe because the write performed by that + // other goroutine must also be accompanied by a write + // barrier, so at worst we've unnecessarily greyed the old + // pointer that was in src. + memmove(dst, src, typ.size) + if writeBarrier.cgo { + cgoCheckMemmove(typ, dst, src, 0, typ.size) + } +} + +//go:linkname reflect_typedmemmove reflect.typedmemmove +func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { + if raceenabled { + raceWriteObjectPC(typ, dst, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove)) + raceReadObjectPC(typ, src, getcallerpc(unsafe.Pointer(&typ)), funcPC(reflect_typedmemmove)) + } + if msanenabled { + msanwrite(dst, typ.size) + msanread(src, typ.size) + } + typedmemmove(typ, dst, src) +} + +// typedmemmovepartial is like typedmemmove but assumes that +// dst and src point off bytes into the value and only copies size bytes. +//go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial +func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) { + if writeBarrier.needed && typ.kind&kindNoPointers == 0 && size >= sys.PtrSize { + // Pointer-align start address for bulk barrier. + adst, asrc, asize := dst, src, size + if frag := -off & (sys.PtrSize - 1); frag != 0 { + adst = add(dst, frag) + asrc = add(src, frag) + asize -= frag + } + bulkBarrierPreWrite(uintptr(adst), uintptr(asrc), asize&^(sys.PtrSize-1)) + } + + memmove(dst, src, size) + if writeBarrier.cgo { + cgoCheckMemmove(typ, dst, src, off, size) + } +} + +//go:nosplit +func typedslicecopy(typ *_type, dst, src slice) int { + // TODO(rsc): If typedslicecopy becomes faster than calling + // typedmemmove repeatedly, consider using during func growslice. + n := dst.len + if n > src.len { + n = src.len + } + if n == 0 { + return 0 + } + dstp := dst.array + srcp := src.array + + if raceenabled { + callerpc := getcallerpc(unsafe.Pointer(&typ)) + pc := funcPC(slicecopy) + racewriterangepc(dstp, uintptr(n)*typ.size, callerpc, pc) + racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc) + } + if msanenabled { + msanwrite(dstp, uintptr(n)*typ.size) + msanread(srcp, uintptr(n)*typ.size) + } + + if writeBarrier.cgo { + cgoCheckSliceCopy(typ, dst, src, n) + } + + // Note: No point in checking typ.kind&kindNoPointers here: + // compiler only emits calls to typedslicecopy for types with pointers, + // and growslice and reflect_typedslicecopy check for pointers + // before calling typedslicecopy. + if !writeBarrier.needed { + memmove(dstp, srcp, uintptr(n)*typ.size) + return n + } + + systemstack(func() { + if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) { + // Overlap with src before dst. + // Copy backward, being careful not to move dstp/srcp + // out of the array they point into. + dstp = add(dstp, uintptr(n-1)*typ.size) + srcp = add(srcp, uintptr(n-1)*typ.size) + i := 0 + for { + typedmemmove(typ, dstp, srcp) + if i++; i >= n { + break + } + dstp = add(dstp, -typ.size) + srcp = add(srcp, -typ.size) + } + } else { + // Copy forward, being careful not to move dstp/srcp + // out of the array they point into. + i := 0 + for { + typedmemmove(typ, dstp, srcp) + if i++; i >= n { + break + } + dstp = add(dstp, typ.size) + srcp = add(srcp, typ.size) + } + } + }) + return n +} + +//go:linkname reflect_typedslicecopy reflect.typedslicecopy +func reflect_typedslicecopy(elemType *_type, dst, src slice) int { + if elemType.kind&kindNoPointers != 0 { + n := dst.len + if n > src.len { + n = src.len + } + if n == 0 { + return 0 + } + + size := uintptr(n) * elemType.size + if raceenabled { + callerpc := getcallerpc(unsafe.Pointer(&elemType)) + pc := funcPC(reflect_typedslicecopy) + racewriterangepc(dst.array, size, callerpc, pc) + racereadrangepc(src.array, size, callerpc, pc) + } + if msanenabled { + msanwrite(dst.array, size) + msanread(src.array, size) + } + + memmove(dst.array, src.array, size) + return n + } + return typedslicecopy(elemType, dst, src) +} + +// typedmemclr clears the typed memory at ptr with type typ. The +// memory at ptr must already be initialized (and hence in type-safe +// state). If the memory is being initialized for the first time, see +// memclrNoHeapPointers. +// +// If the caller knows that typ has pointers, it can alternatively +// call memclrHasPointers. +// +//go:nosplit +func typedmemclr(typ *_type, ptr unsafe.Pointer) { + if typ.kind&kindNoPointers == 0 { + bulkBarrierPreWrite(uintptr(ptr), 0, typ.size) + } + memclrNoHeapPointers(ptr, typ.size) +} + +// memclrHasPointers clears n bytes of typed memory starting at ptr. +// The caller must ensure that the type of the object at ptr has +// pointers, usually by checking typ.kind&kindNoPointers. However, ptr +// does not have to point to the start of the allocation. +// +//go:nosplit +func memclrHasPointers(ptr unsafe.Pointer, n uintptr) { + bulkBarrierPreWrite(uintptr(ptr), 0, n) + memclrNoHeapPointers(ptr, n) +} diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go new file mode 100644 index 0000000..2b00493 --- /dev/null +++ b/libgo/go/runtime/mbitmap.go @@ -0,0 +1,1874 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector: type and heap bitmaps. +// +// Stack, data, and bss bitmaps +// +// Stack frames and global variables in the data and bss sections are described +// by 1-bit bitmaps in which 0 means uninteresting and 1 means live pointer +// to be visited during GC. The bits in each byte are consumed starting with +// the low bit: 1<<0, 1<<1, and so on. +// +// Heap bitmap +// +// The allocated heap comes from a subset of the memory in the range [start, used), +// where start == mheap_.arena_start and used == mheap_.arena_used. +// The heap bitmap comprises 2 bits for each pointer-sized word in that range, +// stored in bytes indexed backward in memory from start. +// That is, the byte at address start-1 holds the 2-bit entries for the four words +// start through start+3*ptrSize, the byte at start-2 holds the entries for +// start+4*ptrSize through start+7*ptrSize, and so on. +// +// In each 2-bit entry, the lower bit holds the same information as in the 1-bit +// bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. +// The meaning of the high bit depends on the position of the word being described +// in its allocated object. In all words *except* the second word, the +// high bit indicates that the object is still being described. In +// these words, if a bit pair with a high bit 0 is encountered, the +// low bit can also be assumed to be 0, and the object description is +// over. This 00 is called the ``dead'' encoding: it signals that the +// rest of the words in the object are uninteresting to the garbage +// collector. +// +// In the second word, the high bit is the GC ``checkmarked'' bit (see below). +// +// The 2-bit entries are split when written into the byte, so that the top half +// of the byte contains 4 high bits and the bottom half contains 4 low (pointer) +// bits. +// This form allows a copy from the 1-bit to the 4-bit form to keep the +// pointer bits contiguous, instead of having to space them out. +// +// The code makes use of the fact that the zero value for a heap bitmap +// has no live pointer bit set and is (depending on position), not used, +// not checkmarked, and is the dead encoding. +// These properties must be preserved when modifying the encoding. +// +// Checkmarks +// +// In a concurrent garbage collector, one worries about failing to mark +// a live object due to mutations without write barriers or bugs in the +// collector implementation. As a sanity check, the GC has a 'checkmark' +// mode that retraverses the object graph with the world stopped, to make +// sure that everything that should be marked is marked. +// In checkmark mode, in the heap bitmap, the high bit of the 2-bit entry +// for the second word of the object holds the checkmark bit. +// When not in checkmark mode, this bit is set to 1. +// +// The smallest possible allocation is 8 bytes. On a 32-bit machine, that +// means every allocated object has two words, so there is room for the +// checkmark bit. On a 64-bit machine, however, the 8-byte allocation is +// just one word, so the second bit pair is not available for encoding the +// checkmark. However, because non-pointer allocations are combined +// into larger 16-byte (maxTinySize) allocations, a plain 8-byte allocation +// must be a pointer, so the type bit in the first word is not actually needed. +// It is still used in general, except in checkmark the type bit is repurposed +// as the checkmark bit and then reinitialized (to 1) as the type bit when +// finished. +// + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +const ( + bitPointer = 1 << 0 + bitScan = 1 << 4 + + heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entries + heapBitmapScale = sys.PtrSize * (8 / 2) // number of data bytes described by one heap bitmap byte + + // all scan/pointer bits in a byte + bitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) + bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift) +) + +// addb returns the byte pointer p+n. +//go:nowritebarrier +//go:nosplit +func addb(p *byte, n uintptr) *byte { + // Note: wrote out full expression instead of calling add(p, n) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) +} + +// subtractb returns the byte pointer p-n. +// subtractb is typically used when traversing the pointer tables referred to by hbits +// which are arranged in reverse order. +//go:nowritebarrier +//go:nosplit +func subtractb(p *byte, n uintptr) *byte { + // Note: wrote out full expression instead of calling add(p, -n) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) +} + +// add1 returns the byte pointer p+1. +//go:nowritebarrier +//go:nosplit +func add1(p *byte) *byte { + // Note: wrote out full expression instead of calling addb(p, 1) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) +} + +// subtract1 returns the byte pointer p-1. +// subtract1 is typically used when traversing the pointer tables referred to by hbits +// which are arranged in reverse order. +//go:nowritebarrier +// +// nosplit because it is used during write barriers and must not be preempted. +//go:nosplit +func subtract1(p *byte) *byte { + // Note: wrote out full expression instead of calling subtractb(p, 1) + // to reduce the number of temporaries generated by the + // compiler for this trivial expression during inlining. + return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) +} + +// mHeap_MapBits is called each time arena_used is extended. +// It maps any additional bitmap memory needed for the new arena memory. +// It must be called with the expected new value of arena_used, +// *before* h.arena_used has been updated. +// Waiting to update arena_used until after the memory has been mapped +// avoids faults when other threads try access the bitmap immediately +// after observing the change to arena_used. +// +//go:nowritebarrier +func (h *mheap) mapBits(arena_used uintptr) { + // Caller has added extra mappings to the arena. + // Add extra mappings of bitmap words as needed. + // We allocate extra bitmap pieces in chunks of bitmapChunk. + const bitmapChunk = 8192 + + n := (arena_used - mheap_.arena_start) / heapBitmapScale + n = round(n, bitmapChunk) + n = round(n, physPageSize) + if h.bitmap_mapped >= n { + return + } + + sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) + h.bitmap_mapped = n +} + +// heapBits provides access to the bitmap bits for a single heap word. +// The methods on heapBits take value receivers so that the compiler +// can more easily inline calls to those methods and registerize the +// struct fields independently. +type heapBits struct { + bitp *uint8 + shift uint32 +} + +// markBits provides access to the mark bit for an object in the heap. +// bytep points to the byte holding the mark bit. +// mask is a byte with a single bit set that can be &ed with *bytep +// to see if the bit has been set. +// *m.byte&m.mask != 0 indicates the mark bit is set. +// index can be used along with span information to generate +// the address of the object in the heap. +// We maintain one set of mark bits for allocation and one for +// marking purposes. +type markBits struct { + bytep *uint8 + mask uint8 + index uintptr +} + +//go:nosplit +func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { + whichByte := allocBitIndex / 8 + whichBit := allocBitIndex % 8 + bytePtr := addb(s.allocBits, whichByte) + return markBits{bytePtr, uint8(1 << whichBit), allocBitIndex} +} + +// refillaCache takes 8 bytes s.allocBits starting at whichByte +// and negates them so that ctz (count trailing zeros) instructions +// can be used. It then places these 8 bytes into the cached 64 bit +// s.allocCache. +func (s *mspan) refillAllocCache(whichByte uintptr) { + bytes := (*[8]uint8)(unsafe.Pointer(addb(s.allocBits, whichByte))) + aCache := uint64(0) + aCache |= uint64(bytes[0]) + aCache |= uint64(bytes[1]) << (1 * 8) + aCache |= uint64(bytes[2]) << (2 * 8) + aCache |= uint64(bytes[3]) << (3 * 8) + aCache |= uint64(bytes[4]) << (4 * 8) + aCache |= uint64(bytes[5]) << (5 * 8) + aCache |= uint64(bytes[6]) << (6 * 8) + aCache |= uint64(bytes[7]) << (7 * 8) + s.allocCache = ^aCache +} + +// nextFreeIndex returns the index of the next free object in s at +// or after s.freeindex. +// There are hardware instructions that can be used to make this +// faster if profiling warrants it. +func (s *mspan) nextFreeIndex() uintptr { + sfreeindex := s.freeindex + snelems := s.nelems + if sfreeindex == snelems { + return sfreeindex + } + if sfreeindex > snelems { + throw("s.freeindex > s.nelems") + } + + aCache := s.allocCache + + bitIndex := sys.Ctz64(aCache) + for bitIndex == 64 { + // Move index to start of next cached bits. + sfreeindex = (sfreeindex + 64) &^ (64 - 1) + if sfreeindex >= snelems { + s.freeindex = snelems + return snelems + } + whichByte := sfreeindex / 8 + // Refill s.allocCache with the next 64 alloc bits. + s.refillAllocCache(whichByte) + aCache = s.allocCache + bitIndex = sys.Ctz64(aCache) + // nothing available in cached bits + // grab the next 8 bytes and try again. + } + result := sfreeindex + uintptr(bitIndex) + if result >= snelems { + s.freeindex = snelems + return snelems + } + + s.allocCache >>= (bitIndex + 1) + sfreeindex = result + 1 + + if sfreeindex%64 == 0 && sfreeindex != snelems { + // We just incremented s.freeindex so it isn't 0. + // As each 1 in s.allocCache was encountered and used for allocation + // it was shifted away. At this point s.allocCache contains all 0s. + // Refill s.allocCache so that it corresponds + // to the bits at s.allocBits starting at s.freeindex. + whichByte := sfreeindex / 8 + s.refillAllocCache(whichByte) + } + s.freeindex = sfreeindex + return result +} + +// isFree returns whether the index'th object in s is unallocated. +func (s *mspan) isFree(index uintptr) bool { + if index < s.freeindex { + return false + } + whichByte := index / 8 + whichBit := index % 8 + byteVal := *addb(s.allocBits, whichByte) + return byteVal&uint8(1<<whichBit) == 0 +} + +func (s *mspan) objIndex(p uintptr) uintptr { + byteOffset := p - s.base() + if byteOffset == 0 { + return 0 + } + if s.baseMask != 0 { + // s.baseMask is 0, elemsize is a power of two, so shift by s.divShift + return byteOffset >> s.divShift + } + return uintptr(((uint64(byteOffset) >> s.divShift) * uint64(s.divMul)) >> s.divShift2) +} + +func markBitsForAddr(p uintptr) markBits { + s := spanOf(p) + objIndex := s.objIndex(p) + return s.markBitsForIndex(objIndex) +} + +func (s *mspan) markBitsForIndex(objIndex uintptr) markBits { + whichByte := objIndex / 8 + bitMask := uint8(1 << (objIndex % 8)) // low 3 bits hold the bit index + bytePtr := addb(s.gcmarkBits, whichByte) + return markBits{bytePtr, bitMask, objIndex} +} + +func (s *mspan) markBitsForBase() markBits { + return markBits{s.gcmarkBits, uint8(1), 0} +} + +// isMarked reports whether mark bit m is set. +func (m markBits) isMarked() bool { + return *m.bytep&m.mask != 0 +} + +// setMarked sets the marked bit in the markbits, atomically. Some compilers +// are not able to inline atomic.Or8 function so if it appears as a hot spot consider +// inlining it manually. +func (m markBits) setMarked() { + // Might be racing with other updates, so use atomic update always. + // We used to be clever here and use a non-atomic update in certain + // cases, but it's not worth the risk. + atomic.Or8(m.bytep, m.mask) +} + +// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically. +func (m markBits) setMarkedNonAtomic() { + *m.bytep |= m.mask +} + +// clearMarked clears the marked bit in the markbits, atomically. +func (m markBits) clearMarked() { + // Might be racing with other updates, so use atomic update always. + // We used to be clever here and use a non-atomic update in certain + // cases, but it's not worth the risk. + atomic.And8(m.bytep, ^m.mask) +} + +// clearMarkedNonAtomic clears the marked bit non-atomically. +func (m markBits) clearMarkedNonAtomic() { + *m.bytep ^= m.mask +} + +// markBitsForSpan returns the markBits for the span base address base. +func markBitsForSpan(base uintptr) (mbits markBits) { + if base < mheap_.arena_start || base >= mheap_.arena_used { + throw("markBitsForSpan: base out of range") + } + mbits = markBitsForAddr(base) + if mbits.mask != 1 { + throw("markBitsForSpan: unaligned start") + } + return mbits +} + +// advance advances the markBits to the next object in the span. +func (m *markBits) advance() { + if m.mask == 1<<7 { + m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) + m.mask = 1 + } else { + m.mask = m.mask << 1 + } + m.index++ +} + +// heapBitsForAddr returns the heapBits for the address addr. +// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used). +// +// nosplit because it is used during write barriers and must not be preempted. +//go:nosplit +func heapBitsForAddr(addr uintptr) heapBits { + // 2 bits per work, 4 pairs per byte, and a mask is hard coded. + off := (addr - mheap_.arena_start) / sys.PtrSize + return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)} +} + +// heapBitsForSpan returns the heapBits for the span base address base. +func heapBitsForSpan(base uintptr) (hbits heapBits) { + if base < mheap_.arena_start || base >= mheap_.arena_used { + throw("heapBitsForSpan: base out of range") + } + return heapBitsForAddr(base) +} + +// heapBitsForObject returns the base address for the heap object +// containing the address p, the heapBits for base, +// the object's span, and of the index of the object in s. +// If p does not point into a heap object, +// return base == 0 +// otherwise return the base of the object. +// +// For gccgo, the forStack parameter is true if the value came from the stack. +// The stack is collected conservatively and may contain invalid pointers. +// +// refBase and refOff optionally give the base address of the object +// in which the pointer p was found and the byte offset at which it +// was found. These are used for error reporting. +func heapBitsForObject(p, refBase, refOff uintptr, forStack bool) (base uintptr, hbits heapBits, s *mspan, objIndex uintptr) { + arenaStart := mheap_.arena_start + if p < arenaStart || p >= mheap_.arena_used { + return + } + off := p - arenaStart + idx := off >> _PageShift + // p points into the heap, but possibly to the middle of an object. + // Consult the span table to find the block beginning. + s = mheap_.spans[idx] + if s == nil || p < s.base() || p >= s.limit || s.state != mSpanInUse { + if s == nil || s.state == _MSpanStack || forStack { + // If s is nil, the virtual address has never been part of the heap. + // This pointer may be to some mmap'd region, so we allow it. + // Pointers into stacks are also ok, the runtime manages these explicitly. + return + } + + // The following ensures that we are rigorous about what data + // structures hold valid pointers. + if debug.invalidptr != 0 { + // Typically this indicates an incorrect use + // of unsafe or cgo to store a bad pointer in + // the Go heap. It may also indicate a runtime + // bug. + // + // TODO(austin): We could be more aggressive + // and detect pointers to unallocated objects + // in allocated spans. + printlock() + print("runtime: pointer ", hex(p)) + if s.state != mSpanInUse { + print(" to unallocated span") + } else { + print(" to unused region of span") + } + print(" idx=", hex(idx), " span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", s.state, "\n") + if refBase != 0 { + print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") + gcDumpObject("object", refBase, refOff) + } + throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") + } + return + } + + if forStack { + // A span can be entered in mheap_.spans, and be set + // to mSpanInUse, before it is fully initialized. + // All we need in practice is allocBits and gcmarkBits, + // so make sure they are set. + if s.allocBits == nil || s.gcmarkBits == nil { + return + } + } + + // If this span holds object of a power of 2 size, just mask off the bits to + // the interior of the object. Otherwise use the size to get the base. + if s.baseMask != 0 { + // optimize for power of 2 sized objects. + base = s.base() + base = base + (p-base)&uintptr(s.baseMask) + objIndex = (base - s.base()) >> s.divShift + // base = p & s.baseMask is faster for small spans, + // but doesn't work for large spans. + // Overall, it's faster to use the more general computation above. + } else { + base = s.base() + if p-base >= s.elemsize { + // n := (p - base) / s.elemsize, using division by multiplication + objIndex = uintptr(p-base) >> s.divShift * uintptr(s.divMul) >> s.divShift2 + base += objIndex * s.elemsize + } + } + // Now that we know the actual base, compute heapBits to return to caller. + hbits = heapBitsForAddr(base) + return +} + +// prefetch the bits. +func (h heapBits) prefetch() { + prefetchnta(uintptr(unsafe.Pointer((h.bitp)))) +} + +// next returns the heapBits describing the next pointer-sized word in memory. +// That is, if h describes address p, h.next() describes p+ptrSize. +// Note that next does not modify h. The caller must record the result. +// +// nosplit because it is used during write barriers and must not be preempted. +//go:nosplit +func (h heapBits) next() heapBits { + if h.shift < 3*heapBitsShift { + return heapBits{h.bitp, h.shift + heapBitsShift} + } + return heapBits{subtract1(h.bitp), 0} +} + +// forward returns the heapBits describing n pointer-sized words ahead of h in memory. +// That is, if h describes address p, h.forward(n) describes p+n*ptrSize. +// h.forward(1) is equivalent to h.next(), just slower. +// Note that forward does not modify h. The caller must record the result. +// bits returns the heap bits for the current word. +func (h heapBits) forward(n uintptr) heapBits { + n += uintptr(h.shift) / heapBitsShift + return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} +} + +// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer. +// The result includes in its higher bits the bits for subsequent words +// described by the same bitmap byte. +func (h heapBits) bits() uint32 { + // The (shift & 31) eliminates a test and conditional branch + // from the generated code. + return uint32(*h.bitp) >> (h.shift & 31) +} + +// morePointers returns true if this word and all remaining words in this object +// are scalars. +// h must not describe the second word of the object. +func (h heapBits) morePointers() bool { + return h.bits()&bitScan != 0 +} + +// isPointer reports whether the heap bits describe a pointer word. +// +// nosplit because it is used during write barriers and must not be preempted. +//go:nosplit +func (h heapBits) isPointer() bool { + return h.bits()&bitPointer != 0 +} + +// hasPointers reports whether the given object has any pointers. +// It must be told how large the object at h is for efficiency. +// h must describe the initial word of the object. +func (h heapBits) hasPointers(size uintptr) bool { + if size == sys.PtrSize { // 1-word objects are always pointers + return true + } + return (*h.bitp>>h.shift)&bitScan != 0 +} + +// isCheckmarked reports whether the heap bits have the checkmarked bit set. +// It must be told how large the object at h is, because the encoding of the +// checkmark bit varies by size. +// h must describe the initial word of the object. +func (h heapBits) isCheckmarked(size uintptr) bool { + if size == sys.PtrSize { + return (*h.bitp>>h.shift)&bitPointer != 0 + } + // All multiword objects are 2-word aligned, + // so we know that the initial word's 2-bit pair + // and the second word's 2-bit pair are in the + // same heap bitmap byte, *h.bitp. + return (*h.bitp>>(heapBitsShift+h.shift))&bitScan != 0 +} + +// setCheckmarked sets the checkmarked bit. +// It must be told how large the object at h is, because the encoding of the +// checkmark bit varies by size. +// h must describe the initial word of the object. +func (h heapBits) setCheckmarked(size uintptr) { + if size == sys.PtrSize { + atomic.Or8(h.bitp, bitPointer<<h.shift) + return + } + atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) +} + +// bulkBarrierPreWrite executes writebarrierptr_prewrite1 +// for every pointer slot in the memory range [src, src+size), +// using pointer/scalar information from [dst, dst+size). +// This executes the write barriers necessary before a memmove. +// src, dst, and size must be pointer-aligned. +// The range [dst, dst+size) must lie within a single object. +// +// As a special case, src == 0 indicates that this is being used for a +// memclr. bulkBarrierPreWrite will pass 0 for the src of each write +// barrier. +// +// Callers should call bulkBarrierPreWrite immediately before +// calling memmove(dst, src, size). This function is marked nosplit +// to avoid being preempted; the GC must not stop the goroutine +// between the memmove and the execution of the barriers. +// The caller is also responsible for cgo pointer checks if this +// may be writing Go pointers into non-Go memory. +// +// The pointer bitmap is not maintained for allocations containing +// no pointers at all; any caller of bulkBarrierPreWrite must first +// make sure the underlying allocation contains pointers, usually +// by checking typ.kind&kindNoPointers. +// +//go:nosplit +func bulkBarrierPreWrite(dst, src, size uintptr) { + if (dst|src|size)&(sys.PtrSize-1) != 0 { + throw("bulkBarrierPreWrite: unaligned arguments") + } + if !writeBarrier.needed { + return + } + if !inheap(dst) { + // If dst is a global, use the data or BSS bitmaps to + // execute write barriers. + roots := gcRoots + for roots != nil { + for i := 0; i < roots.count; i++ { + pr := roots.roots[i] + addr := uintptr(pr.decl) + if addr <= dst && dst < addr+pr.size { + if dst < addr+pr.ptrdata { + bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata) + } + return + } + } + roots = roots.next + } + return + } + + h := heapBitsForAddr(dst) + if src == 0 { + for i := uintptr(0); i < size; i += sys.PtrSize { + if h.isPointer() { + dstx := (*uintptr)(unsafe.Pointer(dst + i)) + writebarrierptr_prewrite1(dstx, 0) + } + h = h.next() + } + } else { + for i := uintptr(0); i < size; i += sys.PtrSize { + if h.isPointer() { + dstx := (*uintptr)(unsafe.Pointer(dst + i)) + srcx := (*uintptr)(unsafe.Pointer(src + i)) + writebarrierptr_prewrite1(dstx, *srcx) + } + h = h.next() + } + } +} + +// bulkBarrierBitmap executes write barriers for copying from [src, +// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is +// assumed to start maskOffset bytes into the data covered by the +// bitmap in bits (which may not be a multiple of 8). +// +// This is used by bulkBarrierPreWrite for writes to data and BSS. +// +//go:nosplit +func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { + word := maskOffset / sys.PtrSize + bits = addb(bits, word/8) + mask := uint8(1) << (word % 8) + + for i := uintptr(0); i < size; i += sys.PtrSize { + if mask == 0 { + bits = addb(bits, 1) + if *bits == 0 { + // Skip 8 words. + i += 7 * sys.PtrSize + continue + } + mask = 1 + } + if *bits&mask != 0 { + dstx := (*uintptr)(unsafe.Pointer(dst + i)) + if src == 0 { + writebarrierptr_prewrite1(dstx, 0) + } else { + srcx := (*uintptr)(unsafe.Pointer(src + i)) + writebarrierptr_prewrite1(dstx, *srcx) + } + } + mask <<= 1 + } +} + +// typeBitsBulkBarrier executes writebarrierptr_prewrite for every +// pointer that would be copied from [src, src+size) to [dst, +// dst+size) by a memmove using the type bitmap to locate those +// pointer slots. +// +// The type typ must correspond exactly to [src, src+size) and [dst, dst+size). +// dst, src, and size must be pointer-aligned. +// The type typ must have a plain bitmap, not a GC program. +// The only use of this function is in channel sends, and the +// 64 kB channel element limit takes care of this for us. +// +// Must not be preempted because it typically runs right before memmove, +// and the GC must observe them as an atomic action. +// +//go:nosplit +func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { + if typ == nil { + throw("runtime: typeBitsBulkBarrier without type") + } + if typ.size != size { + println("runtime: typeBitsBulkBarrier with type ", *typ.string, " of size ", typ.size, " but memory size", size) + throw("runtime: invalid typeBitsBulkBarrier") + } + if typ.kind&kindGCProg != 0 { + println("runtime: typeBitsBulkBarrier with type ", *typ.string, " with GC prog") + throw("runtime: invalid typeBitsBulkBarrier") + } + if !writeBarrier.needed { + return + } + ptrmask := typ.gcdata + var bits uint32 + for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize { + if i&(sys.PtrSize*8-1) == 0 { + bits = uint32(*ptrmask) + ptrmask = addb(ptrmask, 1) + } else { + bits = bits >> 1 + } + if bits&1 != 0 { + dstx := (*uintptr)(unsafe.Pointer(dst + i)) + srcx := (*uintptr)(unsafe.Pointer(src + i)) + writebarrierptr_prewrite(dstx, *srcx) + } + } +} + +// The methods operating on spans all require that h has been returned +// by heapBitsForSpan and that size, n, total are the span layout description +// returned by the mspan's layout method. +// If total > size*n, it means that there is extra leftover memory in the span, +// usually due to rounding. +// +// TODO(rsc): Perhaps introduce a different heapBitsSpan type. + +// initSpan initializes the heap bitmap for a span. +// It clears all checkmark bits. +// If this is a span of pointer-sized objects, it initializes all +// words to pointer/scan. +// Otherwise, it initializes all words to scalar/dead. +func (h heapBits) initSpan(s *mspan) { + size, n, total := s.layout() + + // Init the markbit structures + s.freeindex = 0 + s.allocCache = ^uint64(0) // all 1s indicating all free. + s.nelems = n + s.allocBits = nil + s.gcmarkBits = nil + s.gcmarkBits = newMarkBits(s.nelems) + s.allocBits = newAllocBits(s.nelems) + + // Clear bits corresponding to objects. + if total%heapBitmapScale != 0 { + throw("initSpan: unaligned length") + } + nbyte := total / heapBitmapScale + if sys.PtrSize == 8 && size == sys.PtrSize { + end := h.bitp + bitp := subtractb(end, nbyte-1) + for { + *bitp = bitPointerAll | bitScanAll + if bitp == end { + break + } + bitp = add1(bitp) + } + return + } + memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) +} + +// initCheckmarkSpan initializes a span for being checkmarked. +// It clears the checkmark bits, which are set to 1 in normal operation. +func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { + // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. + if sys.PtrSize == 8 && size == sys.PtrSize { + // Checkmark bit is type bit, bottom bit of every 2-bit entry. + // Only possible on 64-bit system, since minimum size is 8. + // Must clear type bit (checkmark bit) of every word. + // The type bit is the lower of every two-bit pair. + bitp := h.bitp + for i := uintptr(0); i < n; i += 4 { + *bitp &^= bitPointerAll + bitp = subtract1(bitp) + } + return + } + for i := uintptr(0); i < n; i++ { + *h.bitp &^= bitScan << (heapBitsShift + h.shift) + h = h.forward(size / sys.PtrSize) + } +} + +// clearCheckmarkSpan undoes all the checkmarking in a span. +// The actual checkmark bits are ignored, so the only work to do +// is to fix the pointer bits. (Pointer bits are ignored by scanobject +// but consulted by typedmemmove.) +func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { + // The ptrSize == 8 is a compile-time constant false on 32-bit and eliminates this code entirely. + if sys.PtrSize == 8 && size == sys.PtrSize { + // Checkmark bit is type bit, bottom bit of every 2-bit entry. + // Only possible on 64-bit system, since minimum size is 8. + // Must clear type bit (checkmark bit) of every word. + // The type bit is the lower of every two-bit pair. + bitp := h.bitp + for i := uintptr(0); i < n; i += 4 { + *bitp |= bitPointerAll + bitp = subtract1(bitp) + } + } +} + +// oneBitCount is indexed by byte and produces the +// number of 1 bits in that byte. For example 128 has 1 bit set +// and oneBitCount[128] will holds 1. +var oneBitCount = [256]uint8{ + 0, 1, 1, 2, 1, 2, 2, 3, + 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, + 5, 6, 6, 7, 6, 7, 7, 8} + +// countFree runs through the mark bits in a span and counts the number of free objects +// in the span. +// TODO:(rlh) Use popcount intrinsic. +func (s *mspan) countFree() int { + count := 0 + maxIndex := s.nelems / 8 + for i := uintptr(0); i < maxIndex; i++ { + mrkBits := *addb(s.gcmarkBits, i) + count += int(oneBitCount[mrkBits]) + } + if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 { + mrkBits := *addb(s.gcmarkBits, maxIndex) + mask := uint8((1 << bitsInLastByte) - 1) + bits := mrkBits & mask + count += int(oneBitCount[bits]) + } + return int(s.nelems) - count +} + +// heapBitsSetType records that the new allocation [x, x+size) +// holds in [x, x+dataSize) one or more values of type typ. +// (The number of values is given by dataSize / typ.size.) +// If dataSize < size, the fragment [x+dataSize, x+size) is +// recorded as non-pointer data. +// It is known that the type has pointers somewhere; +// malloc does not call heapBitsSetType when there are no pointers, +// because all free objects are marked as noscan during +// heapBitsSweepSpan. +// +// There can only be one allocation from a given span active at a time, +// and the bitmap for a span always falls on byte boundaries, +// so there are no write-write races for access to the heap bitmap. +// Hence, heapBitsSetType can access the bitmap without atomics. +// +// There can be read-write races between heapBitsSetType and things +// that read the heap bitmap like scanobject. However, since +// heapBitsSetType is only used for objects that have not yet been +// made reachable, readers will ignore bits being modified by this +// function. This does mean this function cannot transiently modify +// bits that belong to neighboring objects. Also, on weakly-ordered +// machines, callers must execute a store/store (publication) barrier +// between calling this function and making the object reachable. +func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { + const doubleCheck = false // slow but helpful; enable to test modifications to this code + + // dataSize is always size rounded up to the next malloc size class, + // except in the case of allocating a defer block, in which case + // size is sizeof(_defer{}) (at least 6 words) and dataSize may be + // arbitrarily larger. + // + // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore + // assume that dataSize == size without checking it explicitly. + + if sys.PtrSize == 8 && size == sys.PtrSize { + // It's one word and it has pointers, it must be a pointer. + // Since all allocated one-word objects are pointers + // (non-pointers are aggregated into tinySize allocations), + // initSpan sets the pointer bits for us. Nothing to do here. + if doubleCheck { + h := heapBitsForAddr(x) + if !h.isPointer() { + throw("heapBitsSetType: pointer bit missing") + } + if !h.morePointers() { + throw("heapBitsSetType: scan bit missing") + } + } + return + } + + h := heapBitsForAddr(x) + ptrmask := typ.gcdata // start of 1-bit pointer mask (or GC program, handled below) + + // Heap bitmap bits for 2-word object are only 4 bits, + // so also shared with objects next to it. + // This is called out as a special case primarily for 32-bit systems, + // so that on 32-bit systems the code below can assume all objects + // are 4-word aligned (because they're all 16-byte aligned). + if size == 2*sys.PtrSize { + if typ.size == sys.PtrSize { + // We're allocating a block big enough to hold two pointers. + // On 64-bit, that means the actual object must be two pointers, + // or else we'd have used the one-pointer-sized block. + // On 32-bit, however, this is the 8-byte block, the smallest one. + // So it could be that we're allocating one pointer and this was + // just the smallest block available. Distinguish by checking dataSize. + // (In general the number of instances of typ being allocated is + // dataSize/typ.size.) + if sys.PtrSize == 4 && dataSize == sys.PtrSize { + // 1 pointer object. On 32-bit machines clear the bit for the + // unused second word. + *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift + *h.bitp |= (bitPointer | bitScan) << h.shift + } else { + // 2-element slice of pointer. + *h.bitp |= (bitPointer | bitScan | bitPointer<<heapBitsShift) << h.shift + } + return + } + // Otherwise typ.size must be 2*sys.PtrSize, + // and typ.kind&kindGCProg == 0. + if doubleCheck { + if typ.size != 2*sys.PtrSize || typ.kind&kindGCProg != 0 { + print("runtime: heapBitsSetType size=", size, " but typ.size=", typ.size, " gcprog=", typ.kind&kindGCProg != 0, "\n") + throw("heapBitsSetType") + } + } + b := uint32(*ptrmask) + hb := (b & 3) | bitScan + // bitPointer == 1, bitScan is 1 << 4, heapBitsShift is 1. + // 110011 is shifted h.shift and complemented. + // This clears out the bits that are about to be + // ored into *h.hbitp in the next instructions. + *h.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << h.shift + *h.bitp |= uint8(hb << h.shift) + return + } + + // Copy from 1-bit ptrmask into 2-bit bitmap. + // The basic approach is to use a single uintptr as a bit buffer, + // alternating between reloading the buffer and writing bitmap bytes. + // In general, one load can supply two bitmap byte writes. + // This is a lot of lines of code, but it compiles into relatively few + // machine instructions. + + var ( + // Ptrmask input. + p *byte // last ptrmask byte read + b uintptr // ptrmask bits already loaded + nb uintptr // number of bits in b at next read + endp *byte // final ptrmask byte to read (then repeat) + endnb uintptr // number of valid bits in *endp + pbits uintptr // alternate source of bits + + // Heap bitmap output. + w uintptr // words processed + nw uintptr // number of words to process + hbitp *byte // next heap bitmap byte to write + hb uintptr // bits being prepared for *hbitp + ) + + hbitp = h.bitp + + // Handle GC program. Delayed until this part of the code + // so that we can use the same double-checking mechanism + // as the 1-bit case. Nothing above could have encountered + // GC programs: the cases were all too small. + if typ.kind&kindGCProg != 0 { + heapBitsSetTypeGCProg(h, typ.ptrdata, typ.size, dataSize, size, addb(typ.gcdata, 4)) + if doubleCheck { + // Double-check the heap bits written by GC program + // by running the GC program to create a 1-bit pointer mask + // and then jumping to the double-check code below. + // This doesn't catch bugs shared between the 1-bit and 4-bit + // GC program execution, but it does catch mistakes specific + // to just one of those and bugs in heapBitsSetTypeGCProg's + // implementation of arrays. + lock(&debugPtrmask.lock) + if debugPtrmask.data == nil { + debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) + } + ptrmask = debugPtrmask.data + runGCProg(addb(typ.gcdata, 4), nil, ptrmask, 1) + goto Phase4 + } + return + } + + // Note about sizes: + // + // typ.size is the number of words in the object, + // and typ.ptrdata is the number of words in the prefix + // of the object that contains pointers. That is, the final + // typ.size - typ.ptrdata words contain no pointers. + // This allows optimization of a common pattern where + // an object has a small header followed by a large scalar + // buffer. If we know the pointers are over, we don't have + // to scan the buffer's heap bitmap at all. + // The 1-bit ptrmasks are sized to contain only bits for + // the typ.ptrdata prefix, zero padded out to a full byte + // of bitmap. This code sets nw (below) so that heap bitmap + // bits are only written for the typ.ptrdata prefix; if there is + // more room in the allocated object, the next heap bitmap + // entry is a 00, indicating that there are no more pointers + // to scan. So only the ptrmask for the ptrdata bytes is needed. + // + // Replicated copies are not as nice: if there is an array of + // objects with scalar tails, all but the last tail does have to + // be initialized, because there is no way to say "skip forward". + // However, because of the possibility of a repeated type with + // size not a multiple of 4 pointers (one heap bitmap byte), + // the code already must handle the last ptrmask byte specially + // by treating it as containing only the bits for endnb pointers, + // where endnb <= 4. We represent large scalar tails that must + // be expanded in the replication by setting endnb larger than 4. + // This will have the effect of reading many bits out of b, + // but once the real bits are shifted out, b will supply as many + // zero bits as we try to read, which is exactly what we need. + + p = ptrmask + if typ.size < dataSize { + // Filling in bits for an array of typ. + // Set up for repetition of ptrmask during main loop. + // Note that ptrmask describes only a prefix of + const maxBits = sys.PtrSize*8 - 7 + if typ.ptrdata/sys.PtrSize <= maxBits { + // Entire ptrmask fits in uintptr with room for a byte fragment. + // Load into pbits and never read from ptrmask again. + // This is especially important when the ptrmask has + // fewer than 8 bits in it; otherwise the reload in the middle + // of the Phase 2 loop would itself need to loop to gather + // at least 8 bits. + + // Accumulate ptrmask into b. + // ptrmask is sized to describe only typ.ptrdata, but we record + // it as describing typ.size bytes, since all the high bits are zero. + nb = typ.ptrdata / sys.PtrSize + for i := uintptr(0); i < nb; i += 8 { + b |= uintptr(*p) << i + p = add1(p) + } + nb = typ.size / sys.PtrSize + + // Replicate ptrmask to fill entire pbits uintptr. + // Doubling and truncating is fewer steps than + // iterating by nb each time. (nb could be 1.) + // Since we loaded typ.ptrdata/sys.PtrSize bits + // but are pretending to have typ.size/sys.PtrSize, + // there might be no replication necessary/possible. + pbits = b + endnb = nb + if nb+nb <= maxBits { + for endnb <= sys.PtrSize*8 { + pbits |= pbits << endnb + endnb += endnb + } + // Truncate to a multiple of original ptrmask. + endnb = maxBits / nb * nb + pbits &= 1<<endnb - 1 + b = pbits + nb = endnb + } + + // Clear p and endp as sentinel for using pbits. + // Checked during Phase 2 loop. + p = nil + endp = nil + } else { + // Ptrmask is larger. Read it multiple times. + n := (typ.ptrdata/sys.PtrSize+7)/8 - 1 + endp = addb(ptrmask, n) + endnb = typ.size/sys.PtrSize - n*8 + } + } + if p != nil { + b = uintptr(*p) + p = add1(p) + nb = 8 + } + + if typ.size == dataSize { + // Single entry: can stop once we reach the non-pointer data. + nw = typ.ptrdata / sys.PtrSize + } else { + // Repeated instances of typ in an array. + // Have to process first N-1 entries in full, but can stop + // once we reach the non-pointer data in the final entry. + nw = ((dataSize/typ.size-1)*typ.size + typ.ptrdata) / sys.PtrSize + } + if nw == 0 { + // No pointers! Caller was supposed to check. + println("runtime: invalid type ", *typ.string) + throw("heapBitsSetType: called with non-pointer type") + return + } + if nw < 2 { + // Must write at least 2 words, because the "no scan" + // encoding doesn't take effect until the third word. + nw = 2 + } + + // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==4). + // The leading byte is special because it contains the bits for word 1, + // which does not have the scan bit set. + // The leading half-byte is special because it's a half a byte, + // so we have to be careful with the bits already there. + switch { + default: + throw("heapBitsSetType: unexpected shift") + + case h.shift == 0: + // Ptrmask and heap bitmap are aligned. + // Handle first byte of bitmap specially. + // + // The first byte we write out covers the first four + // words of the object. The scan/dead bit on the first + // word must be set to scan since there are pointers + // somewhere in the object. The scan/dead bit on the + // second word is the checkmark, so we don't set it. + // In all following words, we set the scan/dead + // appropriately to indicate that the object contains + // to the next 2-bit entry in the bitmap. + // + // TODO: It doesn't matter if we set the checkmark, so + // maybe this case isn't needed any more. + hb = b & bitPointerAll + hb |= bitScan | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift) + if w += 4; w >= nw { + goto Phase3 + } + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + b >>= 4 + nb -= 4 + + case sys.PtrSize == 8 && h.shift == 2: + // Ptrmask and heap bitmap are misaligned. + // The bits for the first two words are in a byte shared + // with another object, so we must be careful with the bits + // already there. + // We took care of 1-word and 2-word objects above, + // so this is at least a 6-word object. + hb = (b & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) + // This is not noscan, so set the scan bit in the + // first word. + hb |= bitScan << (2 * heapBitsShift) + b >>= 2 + nb -= 2 + // Note: no bitScan for second word because that's + // the checkmark. + *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) + *hbitp |= uint8(hb) + hbitp = subtract1(hbitp) + if w += 2; w >= nw { + // We know that there is more data, because we handled 2-word objects above. + // This must be at least a 6-word object. If we're out of pointer words, + // mark no scan in next bitmap byte and finish. + hb = 0 + w += 4 + goto Phase3 + } + } + + // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. + // The loop computes the bits for that last write but does not execute the write; + // it leaves the bits in hb for processing by phase 3. + // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to + // use in the first half of the loop right now, and then we only adjust nb explicitly + // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. + nb -= 4 + for { + // Emit bitmap byte. + // b has at least nb+4 bits, with one exception: + // if w+4 >= nw, then b has only nw-w bits, + // but we'll stop at the break and then truncate + // appropriately in Phase 3. + hb = b & bitPointerAll + hb |= bitScanAll + if w += 4; w >= nw { + break + } + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + b >>= 4 + + // Load more bits. b has nb right now. + if p != endp { + // Fast path: keep reading from ptrmask. + // nb unmodified: we just loaded 8 bits, + // and the next iteration will consume 8 bits, + // leaving us with the same nb the next time we're here. + if nb < 8 { + b |= uintptr(*p) << nb + p = add1(p) + } else { + // Reduce the number of bits in b. + // This is important if we skipped + // over a scalar tail, since nb could + // be larger than the bit width of b. + nb -= 8 + } + } else if p == nil { + // Almost as fast path: track bit count and refill from pbits. + // For short repetitions. + if nb < 8 { + b |= pbits << nb + nb += endnb + } + nb -= 8 // for next iteration + } else { + // Slow path: reached end of ptrmask. + // Process final partial byte and rewind to start. + b |= uintptr(*p) << nb + nb += endnb + if nb < 8 { + b |= uintptr(*ptrmask) << nb + p = add1(ptrmask) + } else { + nb -= 8 + p = ptrmask + } + } + + // Emit bitmap byte. + hb = b & bitPointerAll + hb |= bitScanAll + if w += 4; w >= nw { + break + } + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + b >>= 4 + } + +Phase3: + // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries. + if w > nw { + // Counting the 4 entries in hb not yet written to memory, + // there are more entries than possible pointer slots. + // Discard the excess entries (can't be more than 3). + mask := uintptr(1)<<(4-(w-nw)) - 1 + hb &= mask | mask<<4 // apply mask to both pointer bits and scan bits + } + + // Change nw from counting possibly-pointer words to total words in allocation. + nw = size / sys.PtrSize + + // Write whole bitmap bytes. + // The first is hb, the rest are zero. + if w <= nw { + *hbitp = uint8(hb) + hbitp = subtract1(hbitp) + hb = 0 // for possible final half-byte below + for w += 4; w <= nw; w += 4 { + *hbitp = 0 + hbitp = subtract1(hbitp) + } + } + + // Write final partial bitmap byte if any. + // We know w > nw, or else we'd still be in the loop above. + // It can be bigger only due to the 4 entries in hb that it counts. + // If w == nw+4 then there's nothing left to do: we wrote all nw entries + // and can discard the 4 sitting in hb. + // But if w == nw+2, we need to write first two in hb. + // The byte is shared with the next object, so be careful with + // existing bits. + if w == nw+2 { + *hbitp = *hbitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8(hb) + } + +Phase4: + // Phase 4: all done, but perhaps double check. + if doubleCheck { + end := heapBitsForAddr(x + size) + if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { + println("ended at wrong bitmap byte for", *typ.string, "x", dataSize/typ.size) + print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") + print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") + h0 := heapBitsForAddr(x) + print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") + print("ended at hbitp=", hbitp, " but next starts at bitp=", end.bitp, " shift=", end.shift, "\n") + throw("bad heapBitsSetType") + } + + // Double-check that bits to be written were written correctly. + // Does not check that other bits were not written, unfortunately. + h := heapBitsForAddr(x) + nptr := typ.ptrdata / sys.PtrSize + ndata := typ.size / sys.PtrSize + count := dataSize / typ.size + totalptr := ((count-1)*typ.size + typ.ptrdata) / sys.PtrSize + for i := uintptr(0); i < size/sys.PtrSize; i++ { + j := i % ndata + var have, want uint8 + have = (*h.bitp >> h.shift) & (bitPointer | bitScan) + if i >= totalptr { + want = 0 // deadmarker + if typ.kind&kindGCProg != 0 && i < (totalptr+3)/4*4 { + want = bitScan + } + } else { + if j < nptr && (*addb(ptrmask, j/8)>>(j%8))&1 != 0 { + want |= bitPointer + } + if i != 1 { + want |= bitScan + } else { + have &^= bitScan + } + } + if have != want { + println("mismatch writing bits for", *typ.string, "x", dataSize/typ.size) + print("typ.size=", typ.size, " typ.ptrdata=", typ.ptrdata, " dataSize=", dataSize, " size=", size, "\n") + print("kindGCProg=", typ.kind&kindGCProg != 0, "\n") + print("w=", w, " nw=", nw, " b=", hex(b), " nb=", nb, " hb=", hex(hb), "\n") + h0 := heapBitsForAddr(x) + print("initial bits h0.bitp=", h0.bitp, " h0.shift=", h0.shift, "\n") + print("current bits h.bitp=", h.bitp, " h.shift=", h.shift, " *h.bitp=", hex(*h.bitp), "\n") + print("ptrmask=", ptrmask, " p=", p, " endp=", endp, " endnb=", endnb, " pbits=", hex(pbits), " b=", hex(b), " nb=", nb, "\n") + println("at word", i, "offset", i*sys.PtrSize, "have", have, "want", want) + if typ.kind&kindGCProg != 0 { + println("GC program:") + dumpGCProg(addb(typ.gcdata, 4)) + } + throw("bad heapBitsSetType") + } + h = h.next() + } + if ptrmask == debugPtrmask.data { + unlock(&debugPtrmask.lock) + } + } +} + +// heapBitsSetTypeNoScan marks x as noscan by setting the first word +// of x in the heap bitmap to scalar/dead. +func heapBitsSetTypeNoScan(x uintptr) { + h := heapBitsForAddr(uintptr(x)) + *h.bitp &^= (bitPointer | bitScan) << h.shift +} + +var debugPtrmask struct { + lock mutex + data *byte +} + +// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program. +// progSize is the size of the memory described by the program. +// elemSize is the size of the element that the GC program describes (a prefix of). +// dataSize is the total size of the intended data, a multiple of elemSize. +// allocSize is the total size of the allocated memory. +// +// GC programs are only used for large allocations. +// heapBitsSetType requires that allocSize is a multiple of 4 words, +// so that the relevant bitmap bytes are not shared with surrounding +// objects. +func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize uintptr, prog *byte) { + if sys.PtrSize == 8 && allocSize%(4*sys.PtrSize) != 0 { + // Alignment will be wrong. + throw("heapBitsSetTypeGCProg: small allocation") + } + var totalBits uintptr + if elemSize == dataSize { + totalBits = runGCProg(prog, nil, h.bitp, 2) + if totalBits*sys.PtrSize != progSize { + println("runtime: heapBitsSetTypeGCProg: total bits", totalBits, "but progSize", progSize) + throw("heapBitsSetTypeGCProg: unexpected bit count") + } + } else { + count := dataSize / elemSize + + // Piece together program trailer to run after prog that does: + // literal(0) + // repeat(1, elemSize-progSize-1) // zeros to fill element size + // repeat(elemSize, count-1) // repeat that element for count + // This zero-pads the data remaining in the first element and then + // repeats that first element to fill the array. + var trailer [40]byte // 3 varints (max 10 each) + some bytes + i := 0 + if n := elemSize/sys.PtrSize - progSize/sys.PtrSize; n > 0 { + // literal(0) + trailer[i] = 0x01 + i++ + trailer[i] = 0 + i++ + if n > 1 { + // repeat(1, n-1) + trailer[i] = 0x81 + i++ + n-- + for ; n >= 0x80; n >>= 7 { + trailer[i] = byte(n | 0x80) + i++ + } + trailer[i] = byte(n) + i++ + } + } + // repeat(elemSize/ptrSize, count-1) + trailer[i] = 0x80 + i++ + n := elemSize / sys.PtrSize + for ; n >= 0x80; n >>= 7 { + trailer[i] = byte(n | 0x80) + i++ + } + trailer[i] = byte(n) + i++ + n = count - 1 + for ; n >= 0x80; n >>= 7 { + trailer[i] = byte(n | 0x80) + i++ + } + trailer[i] = byte(n) + i++ + trailer[i] = 0 + i++ + + runGCProg(prog, &trailer[0], h.bitp, 2) + + // Even though we filled in the full array just now, + // record that we only filled in up to the ptrdata of the + // last element. This will cause the code below to + // memclr the dead section of the final array element, + // so that scanobject can stop early in the final element. + totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize + } + endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) + endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) + memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) +} + +// progToPointerMask returns the 1-bit pointer mask output by the GC program prog. +// size the size of the region described by prog, in bytes. +// The resulting bitvector will have no more than size/sys.PtrSize bits. +func progToPointerMask(prog *byte, size uintptr) bitvector { + n := (size/sys.PtrSize + 7) / 8 + x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] + x[len(x)-1] = 0xa1 // overflow check sentinel + n = runGCProg(prog, nil, &x[0], 1) + if x[len(x)-1] != 0xa1 { + throw("progToPointerMask: overflow") + } + return bitvector{int32(n), &x[0]} +} + +// Packed GC pointer bitmaps, aka GC programs. +// +// For large types containing arrays, the type information has a +// natural repetition that can be encoded to save space in the +// binary and in the memory representation of the type information. +// +// The encoding is a simple Lempel-Ziv style bytecode machine +// with the following instructions: +// +// 00000000: stop +// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes +// 10000000 n c: repeat the previous n bits c times; n, c are varints +// 1nnnnnnn c: repeat the previous n bits c times; c is a varint + +// runGCProg executes the GC program prog, and then trailer if non-nil, +// writing to dst with entries of the given size. +// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst. +// If size == 2, dst is the 2-bit heap bitmap, and writes move backward +// starting at dst (because the heap bitmap does). In this case, the caller guarantees +// that only whole bytes in dst need to be written. +// +// runGCProg returns the number of 1- or 2-bit entries written to memory. +func runGCProg(prog, trailer, dst *byte, size int) uintptr { + dstStart := dst + + // Bits waiting to be written to memory. + var bits uintptr + var nbits uintptr + + p := prog +Run: + for { + // Flush accumulated full bytes. + // The rest of the loop assumes that nbits <= 7. + for ; nbits >= 8; nbits -= 8 { + if size == 1 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } else { + v := bits&bitPointerAll | bitScanAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + v = bits&bitPointerAll | bitScanAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + } + } + + // Process one instruction. + inst := uintptr(*p) + p = add1(p) + n := inst & 0x7F + if inst&0x80 == 0 { + // Literal bits; n == 0 means end of program. + if n == 0 { + // Program is over; continue in trailer if present. + if trailer != nil { + //println("trailer") + p = trailer + trailer = nil + continue + } + //println("done") + break Run + } + //println("lit", n, dst) + nbyte := n / 8 + for i := uintptr(0); i < nbyte; i++ { + bits |= uintptr(*p) << nbits + p = add1(p) + if size == 1 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } else { + v := bits&0xf | bitScanAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + v = bits&0xf | bitScanAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + } + } + if n %= 8; n > 0 { + bits |= uintptr(*p) << nbits + p = add1(p) + nbits += n + } + continue Run + } + + // Repeat. If n == 0, it is encoded in a varint in the next bytes. + if n == 0 { + for off := uint(0); ; off += 7 { + x := uintptr(*p) + p = add1(p) + n |= (x & 0x7F) << off + if x&0x80 == 0 { + break + } + } + } + + // Count is encoded in a varint in the next bytes. + c := uintptr(0) + for off := uint(0); ; off += 7 { + x := uintptr(*p) + p = add1(p) + c |= (x & 0x7F) << off + if x&0x80 == 0 { + break + } + } + c *= n // now total number of bits to copy + + // If the number of bits being repeated is small, load them + // into a register and use that register for the entire loop + // instead of repeatedly reading from memory. + // Handling fewer than 8 bits here makes the general loop simpler. + // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add + // the pattern to a bit buffer holding at most 7 bits (a partial byte) + // it will not overflow. + src := dst + const maxBits = sys.PtrSize*8 - 7 + if n <= maxBits { + // Start with bits in output buffer. + pattern := bits + npattern := nbits + + // If we need more bits, fetch them from memory. + if size == 1 { + src = subtract1(src) + for npattern < n { + pattern <<= 8 + pattern |= uintptr(*src) + src = subtract1(src) + npattern += 8 + } + } else { + src = add1(src) + for npattern < n { + pattern <<= 4 + pattern |= uintptr(*src) & 0xf + src = add1(src) + npattern += 4 + } + } + + // We started with the whole bit output buffer, + // and then we loaded bits from whole bytes. + // Either way, we might now have too many instead of too few. + // Discard the extra. + if npattern > n { + pattern >>= npattern - n + npattern = n + } + + // Replicate pattern to at most maxBits. + if npattern == 1 { + // One bit being repeated. + // If the bit is 1, make the pattern all 1s. + // If the bit is 0, the pattern is already all 0s, + // but we can claim that the number of bits + // in the word is equal to the number we need (c), + // because right shift of bits will zero fill. + if pattern == 1 { + pattern = 1<<maxBits - 1 + npattern = maxBits + } else { + npattern = c + } + } else { + b := pattern + nb := npattern + if nb+nb <= maxBits { + // Double pattern until the whole uintptr is filled. + for nb <= sys.PtrSize*8 { + b |= b << nb + nb += nb + } + // Trim away incomplete copy of original pattern in high bits. + // TODO(rsc): Replace with table lookup or loop on systems without divide? + nb = maxBits / npattern * npattern + b &= 1<<nb - 1 + pattern = b + npattern = nb + } + } + + // Add pattern to bit buffer and flush bit buffer, c/npattern times. + // Since pattern contains >8 bits, there will be full bytes to flush + // on each iteration. + for ; c >= npattern; c -= npattern { + bits |= pattern << nbits + nbits += npattern + if size == 1 { + for nbits >= 8 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + nbits -= 8 + } + } else { + for nbits >= 4 { + *dst = uint8(bits&0xf | bitScanAll) + dst = subtract1(dst) + bits >>= 4 + nbits -= 4 + } + } + } + + // Add final fragment to bit buffer. + if c > 0 { + pattern &= 1<<c - 1 + bits |= pattern << nbits + nbits += c + } + continue Run + } + + // Repeat; n too large to fit in a register. + // Since nbits <= 7, we know the first few bytes of repeated data + // are already written to memory. + off := n - nbits // n > nbits because n > maxBits and nbits <= 7 + if size == 1 { + // Leading src fragment. + src = subtractb(src, (off+7)/8) + if frag := off & 7; frag != 0 { + bits |= uintptr(*src) >> (8 - frag) << nbits + src = add1(src) + nbits += frag + c -= frag + } + // Main loop: load one byte, write another. + // The bits are rotating through the bit buffer. + for i := c / 8; i > 0; i-- { + bits |= uintptr(*src) << nbits + src = add1(src) + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } + // Final src fragment. + if c %= 8; c > 0 { + bits |= (uintptr(*src) & (1<<c - 1)) << nbits + nbits += c + } + } else { + // Leading src fragment. + src = addb(src, (off+3)/4) + if frag := off & 3; frag != 0 { + bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits + src = subtract1(src) + nbits += frag + c -= frag + } + // Main loop: load one byte, write another. + // The bits are rotating through the bit buffer. + for i := c / 4; i > 0; i-- { + bits |= (uintptr(*src) & 0xf) << nbits + src = subtract1(src) + *dst = uint8(bits&0xf | bitScanAll) + dst = subtract1(dst) + bits >>= 4 + } + // Final src fragment. + if c %= 4; c > 0 { + bits |= (uintptr(*src) & (1<<c - 1)) << nbits + nbits += c + } + } + } + + // Write any final bits out, using full-byte writes, even for the final byte. + var totalBits uintptr + if size == 1 { + totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits + nbits += -nbits & 7 + for ; nbits > 0; nbits -= 8 { + *dst = uint8(bits) + dst = add1(dst) + bits >>= 8 + } + } else { + totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits + nbits += -nbits & 3 + for ; nbits > 0; nbits -= 4 { + v := bits&0xf | bitScanAll + *dst = uint8(v) + dst = subtract1(dst) + bits >>= 4 + } + } + return totalBits +} + +func dumpGCProg(p *byte) { + nptr := 0 + for { + x := *p + p = add1(p) + if x == 0 { + print("\t", nptr, " end\n") + break + } + if x&0x80 == 0 { + print("\t", nptr, " lit ", x, ":") + n := int(x+7) / 8 + for i := 0; i < n; i++ { + print(" ", hex(*p)) + p = add1(p) + } + print("\n") + nptr += int(x) + } else { + nbit := int(x &^ 0x80) + if nbit == 0 { + for nb := uint(0); ; nb += 7 { + x := *p + p = add1(p) + nbit |= int(x&0x7f) << nb + if x&0x80 == 0 { + break + } + } + } + count := 0 + for nb := uint(0); ; nb += 7 { + x := *p + p = add1(p) + count |= int(x&0x7f) << nb + if x&0x80 == 0 { + break + } + } + print("\t", nptr, " repeat ", nbit, " × ", count, "\n") + nptr += nbit * count + } + } +} + +// Testing. + +// gcbits returns the GC type info for x, for testing. +// The result is the bitmap entries (0 or 1), one entry per byte. +//go:linkname reflect_gcbits reflect.gcbits +func reflect_gcbits(x interface{}) []byte { + ret := getgcmask(x) + typ := (*ptrtype)(unsafe.Pointer(efaceOf(&x)._type)).elem + nptr := typ.ptrdata / sys.PtrSize + for uintptr(len(ret)) > nptr && ret[len(ret)-1] == 0 { + ret = ret[:len(ret)-1] + } + return ret +} + +// Returns GC type info for object p for testing. +func getgcmask(ep interface{}) (mask []byte) { + e := *efaceOf(&ep) + p := e.data + t := e._type + // data or bss + roots := gcRoots + for roots != nil { + for i := 0; i < roots.count; i++ { + pr := roots.roots[i] + addr := uintptr(pr.decl) + if addr <= uintptr(p) && uintptr(p) < addr+pr.size { + n := (*ptrtype)(unsafe.Pointer(t)).elem.size + mask = make([]byte, n/sys.PtrSize) + copy(mask, (*[1 << 29]uint8)(unsafe.Pointer(pr.gcdata))[:pr.ptrdata]) + } + return + } + roots = roots.next + } + + // heap + var n uintptr + var base uintptr + if mlookup(uintptr(p), &base, &n, nil) != 0 { + mask = make([]byte, n/sys.PtrSize) + for i := uintptr(0); i < n; i += sys.PtrSize { + hbits := heapBitsForAddr(base + i) + if hbits.isPointer() { + mask[i/sys.PtrSize] = 1 + } + if i != 1*sys.PtrSize && !hbits.morePointers() { + mask = mask[:i/sys.PtrSize] + break + } + } + return + } + + // otherwise, not something the GC knows about. + // possibly read-only data, like malloc(0). + // must not have pointers + // For gccgo, may live on the stack, which is collected conservatively. + return +} diff --git a/libgo/go/runtime/mcache.go b/libgo/go/runtime/mcache.go index b65dd37..92dabef 100644 --- a/libgo/go/runtime/mcache.go +++ b/libgo/go/runtime/mcache.go @@ -4,16 +4,8 @@ package runtime -// This is a temporary mcache.go for gccgo. -// At some point it will be replaced by the one in the gc runtime package. - import "unsafe" -type mcachelist struct { - list *mlink - nlist uint32 -} - // Per-thread (in Go, per-P) cache for small objects. // No locking needed because it is per-thread (per-P). // @@ -24,8 +16,8 @@ type mcachelist struct { type mcache struct { // The following members are accessed on every malloc, // so they are grouped here for better caching. - next_sample int32 // trigger heap sample after allocating this many bytes - local_cachealloc uintptr // bytes allocated (or freed) from cache since last lock of heap + next_sample int32 // trigger heap sample after allocating this many bytes + local_scan uintptr // bytes of scannable heap allocated // Allocator cache for tiny objects w/o pointers. // See "Tiny allocator" comment in malloc.go. @@ -36,12 +28,12 @@ type mcache struct { // tiny is a heap pointer. Since mcache is in non-GC'd memory, // we handle it by clearing it in releaseAll during mark // termination. - tiny unsafe.Pointer - tinysize uintptr + tiny uintptr + tinyoffset uintptr + local_tinyallocs uintptr // number of tiny allocs not counted in other stats // The rest is not accessed on every malloc. - alloc [_NumSizeClasses]*mspan // spans to allocate from - free [_NumSizeClasses]mcachelist // lists of explicitly freed objects + alloc [_NumSizeClasses]*mspan // spans to allocate from // Local allocator stats, flushed during GC. local_nlookup uintptr // number of pointer lookups @@ -50,46 +42,98 @@ type mcache struct { local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) } -type mtypes struct { - compression byte - data uintptr +// A gclink is a node in a linked list of blocks, like mlink, +// but it is opaque to the garbage collector. +// The GC does not trace the pointers during collection, +// and the compiler does not emit write barriers for assignments +// of gclinkptr values. Code should store references to gclinks +// as gclinkptr, not as *gclink. +type gclink struct { + next gclinkptr } -type special struct { - next *special - offset uint16 - kind byte +// A gclinkptr is a pointer to a gclink, but it is opaque +// to the garbage collector. +type gclinkptr uintptr + +// ptr returns the *gclink form of p. +// The result should be used for accessing fields, not stored +// in other data structures. +func (p gclinkptr) ptr() *gclink { + return (*gclink)(unsafe.Pointer(p)) } -type mspan struct { - next *mspan // next span in list, or nil if none - prev *mspan // previous span's next field, or list head's first field if none - start uintptr - npages uintptr // number of pages in span - freelist *mlink - - // sweep generation: - // if sweepgen == h->sweepgen - 2, the span needs sweeping - // if sweepgen == h->sweepgen - 1, the span is currently being swept - // if sweepgen == h->sweepgen, the span is swept and ready to use - // h->sweepgen is incremented by 2 after every GC - - sweepgen uint32 - ref uint16 - sizeclass uint8 // size class - incache bool // being used by an mcache - state uint8 // mspaninuse etc - needzero uint8 // needs to be zeroed before allocation - elemsize uintptr // computed from sizeclass or from npages - unusedsince int64 // first time spotted by gc in mspanfree state - npreleased uintptr // number of pages released to the os - limit uintptr // end of data in span - types mtypes - speciallock mutex // guards specials list - specials *special // linked list of special records sorted by offset. - freebuf *mlink +// dummy MSpan that contains no free objects. +var emptymspan mspan + +func allocmcache() *mcache { + lock(&mheap_.lock) + c := (*mcache)(mheap_.cachealloc.alloc()) + unlock(&mheap_.lock) + for i := 0; i < _NumSizeClasses; i++ { + c.alloc[i] = &emptymspan + } + c.next_sample = nextSample() + return c +} + +func freemcache(c *mcache) { + systemstack(func() { + c.releaseAll() + + // NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate + // with the stealing of gcworkbufs during garbage collection to avoid + // a race where the workbuf is double-freed. + // gcworkbuffree(c.gcworkbuf) + + lock(&mheap_.lock) + purgecachedstats(c) + mheap_.cachealloc.free(unsafe.Pointer(c)) + unlock(&mheap_.lock) + }) +} + +// Gets a span that has a free object in it and assigns it +// to be the cached span for the given sizeclass. Returns this span. +func (c *mcache) refill(sizeclass int32) *mspan { + _g_ := getg() + + _g_.m.locks++ + // Return the current cached span to the central lists. + s := c.alloc[sizeclass] + + if uintptr(s.allocCount) != s.nelems { + throw("refill of span with free space remaining") + } + + if s != &emptymspan { + s.incache = false + } + + // Get a new cached span from the central lists. + s = mheap_.central[sizeclass].mcentral.cacheSpan() + if s == nil { + throw("out of memory") + } + + if uintptr(s.allocCount) == s.nelems { + throw("span has no free space") + } + + c.alloc[sizeclass] = s + _g_.m.locks-- + return s } -type mlink struct { - next *mlink +func (c *mcache) releaseAll() { + for i := 0; i < _NumSizeClasses; i++ { + s := c.alloc[i] + if s != &emptymspan { + mheap_.central[i].mcentral.uncacheSpan(s) + c.alloc[i] = &emptymspan + } + } + // Clear tinyalloc pool. + c.tiny = 0 + c.tinyoffset = 0 } diff --git a/libgo/go/runtime/mcentral.go b/libgo/go/runtime/mcentral.go new file mode 100644 index 0000000..ddcf81e --- /dev/null +++ b/libgo/go/runtime/mcentral.go @@ -0,0 +1,222 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Central free lists. +// +// See malloc.go for an overview. +// +// The MCentral doesn't actually contain the list of free objects; the MSpan does. +// Each MCentral is two lists of MSpans: those with free objects (c->nonempty) +// and those that are completely allocated (c->empty). + +package runtime + +import "runtime/internal/atomic" + +// Central list of free objects of a given size. +// +//go:notinheap +type mcentral struct { + lock mutex + sizeclass int32 + nonempty mSpanList // list of spans with a free object, ie a nonempty free list + empty mSpanList // list of spans with no free objects (or cached in an mcache) +} + +// Initialize a single central free list. +func (c *mcentral) init(sizeclass int32) { + c.sizeclass = sizeclass + c.nonempty.init() + c.empty.init() +} + +// Allocate a span to use in an MCache. +func (c *mcentral) cacheSpan() *mspan { + // Deduct credit for this span allocation and sweep if necessary. + spanBytes := uintptr(class_to_allocnpages[c.sizeclass]) * _PageSize + deductSweepCredit(spanBytes, 0) + + lock(&c.lock) + sg := mheap_.sweepgen +retry: + var s *mspan + for s = c.nonempty.first; s != nil; s = s.next { + if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { + c.nonempty.remove(s) + c.empty.insertBack(s) + unlock(&c.lock) + s.sweep(true) + goto havespan + } + if s.sweepgen == sg-1 { + // the span is being swept by background sweeper, skip + continue + } + // we have a nonempty span that does not require sweeping, allocate from it + c.nonempty.remove(s) + c.empty.insertBack(s) + unlock(&c.lock) + goto havespan + } + + for s = c.empty.first; s != nil; s = s.next { + if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { + // we have an empty span that requires sweeping, + // sweep it and see if we can free some space in it + c.empty.remove(s) + // swept spans are at the end of the list + c.empty.insertBack(s) + unlock(&c.lock) + s.sweep(true) + freeIndex := s.nextFreeIndex() + if freeIndex != s.nelems { + s.freeindex = freeIndex + goto havespan + } + lock(&c.lock) + // the span is still empty after sweep + // it is already in the empty list, so just retry + goto retry + } + if s.sweepgen == sg-1 { + // the span is being swept by background sweeper, skip + continue + } + // already swept empty span, + // all subsequent ones must also be either swept or in process of sweeping + break + } + unlock(&c.lock) + + // Replenish central list if empty. + s = c.grow() + if s == nil { + return nil + } + lock(&c.lock) + c.empty.insertBack(s) + unlock(&c.lock) + + // At this point s is a non-empty span, queued at the end of the empty list, + // c is unlocked. +havespan: + cap := int32((s.npages << _PageShift) / s.elemsize) + n := cap - int32(s.allocCount) + if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { + throw("span has no free objects") + } + usedBytes := uintptr(s.allocCount) * s.elemsize + if usedBytes > 0 { + reimburseSweepCredit(usedBytes) + } + atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes)) + if trace.enabled { + // heap_live changed. + traceHeapAlloc() + } + if gcBlackenEnabled != 0 { + // heap_live changed. + gcController.revise() + } + s.incache = true + freeByteBase := s.freeindex &^ (64 - 1) + whichByte := freeByteBase / 8 + // Init alloc bits cache. + s.refillAllocCache(whichByte) + + // Adjust the allocCache so that s.freeindex corresponds to the low bit in + // s.allocCache. + s.allocCache >>= s.freeindex % 64 + + return s +} + +// Return span from an MCache. +func (c *mcentral) uncacheSpan(s *mspan) { + lock(&c.lock) + + s.incache = false + + if s.allocCount == 0 { + throw("uncaching span but s.allocCount == 0") + } + + cap := int32((s.npages << _PageShift) / s.elemsize) + n := cap - int32(s.allocCount) + if n > 0 { + c.empty.remove(s) + c.nonempty.insert(s) + // mCentral_CacheSpan conservatively counted + // unallocated slots in heap_live. Undo this. + atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) + } + unlock(&c.lock) +} + +// freeSpan updates c and s after sweeping s. +// It sets s's sweepgen to the latest generation, +// and, based on the number of free objects in s, +// moves s to the appropriate list of c or returns it +// to the heap. +// freeSpan returns true if s was returned to the heap. +// If preserve=true, it does not move s (the caller +// must take care of it). +func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool { + if s.incache { + throw("freeSpan given cached span") + } + s.needzero = 1 + + if preserve { + // preserve is set only when called from MCentral_CacheSpan above, + // the span must be in the empty list. + if !s.inList() { + throw("can't preserve unlinked span") + } + atomic.Store(&s.sweepgen, mheap_.sweepgen) + return false + } + + lock(&c.lock) + + // Move to nonempty if necessary. + if wasempty { + c.empty.remove(s) + c.nonempty.insert(s) + } + + // delay updating sweepgen until here. This is the signal that + // the span may be used in an MCache, so it must come after the + // linked list operations above (actually, just after the + // lock of c above.) + atomic.Store(&s.sweepgen, mheap_.sweepgen) + + if s.allocCount != 0 { + unlock(&c.lock) + return false + } + + c.nonempty.remove(s) + unlock(&c.lock) + mheap_.freeSpan(s, 0) + return true +} + +// grow allocates a new empty span from the heap and initializes it for c's size class. +func (c *mcentral) grow() *mspan { + npages := uintptr(class_to_allocnpages[c.sizeclass]) + size := uintptr(class_to_size[c.sizeclass]) + n := (npages << _PageShift) / size + + s := mheap_.alloc(npages, c.sizeclass, false, true) + if s == nil { + return nil + } + + p := s.base() + s.limit = p + size*n + + heapBitsForSpan(s.base()).initSpan(s) + return s +} diff --git a/libgo/go/runtime/mem_gccgo.go b/libgo/go/runtime/mem_gccgo.go new file mode 100644 index 0000000..161ff26 --- /dev/null +++ b/libgo/go/runtime/mem_gccgo.go @@ -0,0 +1,280 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The gccgo version of mem_*.go. + +package runtime + +import ( + "runtime/internal/sys" + "unsafe" +) + +// Functions called by C code. +//go:linkname sysAlloc runtime.sysAlloc + +//extern mmap +func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uintptr) unsafe.Pointer + +//extern munmap +func munmap(addr unsafe.Pointer, length uintptr) int32 + +//extern mincore +func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 + +//extern madvise +func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32 + +var mmapFD = int32(-1) + +var devZero = []byte("/dev/zero\x00") + +func init() { + if _MAP_ANON == 0 { + mmapFD = open(&devZero[0], 0 /* O_RDONLY */, 0) + if mmapFD < 0 { + println("open /dev/zero: errno=", errno()) + exit(2) + } + } +} + +// NOTE: vec must be just 1 byte long here. +// Mincore returns ENOMEM if any of the pages are unmapped, +// but we want to know that all of the pages are unmapped. +// To make these the same, we can only ask about one page +// at a time. See golang.org/issue/7476. +var addrspace_vec [1]byte + +func addrspace_free(v unsafe.Pointer, n uintptr) bool { + for off := uintptr(0); off < n; off += physPageSize { + // Use a length of 1 byte, which the kernel will round + // up to one physical page regardless of the true + // physical page size. + errval := 0 + if mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0]) < 0 { + errval = errno() + } + if errval == _ENOSYS { + // mincore is not available on this system. + // Assume the address is available. + return true + } + if errval == _EINVAL { + // Address is not a multiple of the physical + // page size. Shouldn't happen, but just ignore it. + continue + } + // ENOMEM means unmapped, which is what we want. + // Anything else we assume means the pages are mapped. + if errval != _ENOMEM { + return false + } + } + return true +} + +func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uintptr) unsafe.Pointer { + p := mmap(v, n, prot, flags, fd, offset) + // On some systems, mmap ignores v without + // MAP_FIXED, so retry if the address space is free. + if p != v && addrspace_free(v, n) { + if uintptr(p) != _MAP_FAILED { + munmap(p, n) + } + p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset) + } + return p +} + +// Don't split the stack as this method may be invoked without a valid G, which +// prevents us from allocating more stack. +//go:nosplit +func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { + p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) + if uintptr(p) == _MAP_FAILED { + errval := errno() + if errval == _EACCES { + print("runtime: mmap: access denied\n") + exit(2) + } + if errval == _EAGAIN { + print("runtime: mmap: too much locked memory (check 'ulimit -l').\n") + exit(2) + } + return nil + } + mSysStatInc(sysStat, n) + return p +} + +func sysUnused(v unsafe.Pointer, n uintptr) { + // By default, Linux's "transparent huge page" support will + // merge pages into a huge page if there's even a single + // present regular page, undoing the effects of the DONTNEED + // below. On amd64, that means khugepaged can turn a single + // 4KB page to 2MB, bloating the process's RSS by as much as + // 512X. (See issue #8832 and Linux kernel bug + // https://bugzilla.kernel.org/show_bug.cgi?id=93111) + // + // To work around this, we explicitly disable transparent huge + // pages when we release pages of the heap. However, we have + // to do this carefully because changing this flag tends to + // split the VMA (memory mapping) containing v in to three + // VMAs in order to track the different values of the + // MADV_NOHUGEPAGE flag in the different regions. There's a + // default limit of 65530 VMAs per address space (sysctl + // vm.max_map_count), so we must be careful not to create too + // many VMAs (see issue #12233). + // + // Since huge pages are huge, there's little use in adjusting + // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid + // exploding the number of VMAs by only adjusting the + // MADV_NOHUGEPAGE flag on a large granularity. This still + // gets most of the benefit of huge pages while keeping the + // number of VMAs under control. With hugePageSize = 2MB, even + // a pessimal heap can reach 128GB before running out of VMAs. + if sys.HugePageSize != 0 && _MADV_NOHUGEPAGE != 0 { + var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :( + + // If it's a large allocation, we want to leave huge + // pages enabled. Hence, we only adjust the huge page + // flag on the huge pages containing v and v+n-1, and + // only if those aren't aligned. + var head, tail uintptr + if uintptr(v)%s != 0 { + // Compute huge page containing v. + head = uintptr(v) &^ (s - 1) + } + if (uintptr(v)+n)%s != 0 { + // Compute huge page containing v+n-1. + tail = (uintptr(v) + n - 1) &^ (s - 1) + } + + // Note that madvise will return EINVAL if the flag is + // already set, which is quite likely. We ignore + // errors. + if head != 0 && head+sys.HugePageSize == tail { + // head and tail are different but adjacent, + // so do this in one call. + madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE) + } else { + // Advise the huge pages containing v and v+n-1. + if head != 0 { + madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE) + } + if tail != 0 && tail != head { + madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE) + } + } + } + + if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 { + // madvise will round this to any physical page + // *covered* by this range, so an unaligned madvise + // will release more memory than intended. + throw("unaligned sysUnused") + } + + if _MADV_DONTNEED != 0 { + madvise(v, n, _MADV_DONTNEED) + } else if _MADV_FREE != 0 { + madvise(v, n, _MADV_FREE) + } +} + +func sysUsed(v unsafe.Pointer, n uintptr) { + if sys.HugePageSize != 0 && _MADV_HUGEPAGE != 0 { + // Partially undo the NOHUGEPAGE marks from sysUnused + // for whole huge pages between v and v+n. This may + // leave huge pages off at the end points v and v+n + // even though allocations may cover these entire huge + // pages. We could detect this and undo NOHUGEPAGE on + // the end points as well, but it's probably not worth + // the cost because when neighboring allocations are + // freed sysUnused will just set NOHUGEPAGE again. + var s uintptr = sys.HugePageSize + + // Round v up to a huge page boundary. + beg := (uintptr(v) + (s - 1)) &^ (s - 1) + // Round v+n down to a huge page boundary. + end := (uintptr(v) + n) &^ (s - 1) + + if beg < end { + madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE) + } + } +} + +// Don't split the stack as this function may be invoked without a valid G, +// which prevents us from allocating more stack. +//go:nosplit +func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { + mSysStatDec(sysStat, n) + munmap(v, n) +} + +func sysFault(v unsafe.Pointer, n uintptr) { + mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, mmapFD, 0) +} + +func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer { + // On 64-bit, people with ulimit -v set complain if we reserve too + // much address space. Instead, assume that the reservation is okay + // if we can reserve at least 64K and check the assumption in SysMap. + // Only user-mode Linux (UML) rejects these requests. + if sys.PtrSize == 8 && uint64(n) > 1<<32 { + p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) + if p != v { + if uintptr(p) != _MAP_FAILED { + munmap(p, 64<<10) + } + return nil + } + munmap(p, 64<<10) + *reserved = false + return v + } + + p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, mmapFD, 0) + if uintptr(p) == _MAP_FAILED { + return nil + } + *reserved = true + return p +} + +func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) { + mSysStatInc(sysStat, n) + + // On 64-bit, we don't actually have v reserved, so tread carefully. + if !reserved { + flags := int32(_MAP_ANON | _MAP_PRIVATE) + if GOOS == "dragonfly" { + // TODO(jsing): For some reason DragonFly seems to return + // memory at a different address than we requested, even when + // there should be no reason for it to do so. This can be + // avoided by using MAP_FIXED, but I'm not sure we should need + // to do this - we do not on other platforms. + flags |= _MAP_FIXED + } + p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, flags, mmapFD, 0) + if uintptr(p) == _MAP_FAILED && errno() == _ENOMEM { + throw("runtime: out of memory") + } + if p != v { + print("runtime: address space conflict: map(", v, ") = ", p, "\n") + throw("runtime: address space conflict") + } + return + } + + p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, mmapFD, 0) + if uintptr(p) == _MAP_FAILED && errno() == _ENOMEM { + throw("runtime: out of memory") + } + if p != v { + throw("runtime: cannot map pages in arena address space") + } +} diff --git a/libgo/go/runtime/memmove_linux_amd64_test.go b/libgo/go/runtime/memmove_linux_amd64_test.go new file mode 100644 index 0000000..d0e8b42a --- /dev/null +++ b/libgo/go/runtime/memmove_linux_amd64_test.go @@ -0,0 +1,62 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "io/ioutil" + "os" + "reflect" + "syscall" + "testing" + "unsafe" +) + +// TestMemmoveOverflow maps 3GB of memory and calls memmove on +// the corresponding slice. +func TestMemmoveOverflow(t *testing.T) { + t.Parallel() + // Create a temporary file. + tmp, err := ioutil.TempFile("", "go-memmovetest") + if err != nil { + t.Fatal(err) + } + _, err = tmp.Write(make([]byte, 65536)) + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmp.Name()) + defer tmp.Close() + + // Set up mappings. + base, _, errno := syscall.Syscall6(syscall.SYS_MMAP, + 0xa0<<32, 3<<30, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_PRIVATE|syscall.MAP_ANONYMOUS, ^uintptr(0), 0) + if errno != 0 { + t.Skipf("could not create memory mapping: %s", errno) + } + syscall.Syscall(syscall.SYS_MUNMAP, base, 3<<30, 0) + + for off := uintptr(0); off < 3<<30; off += 65536 { + _, _, errno := syscall.Syscall6(syscall.SYS_MMAP, + base+off, 65536, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED|syscall.MAP_FIXED, tmp.Fd(), 0) + if errno != 0 { + t.Skipf("could not map a page at requested 0x%x: %s", base+off, errno) + } + defer syscall.Syscall(syscall.SYS_MUNMAP, base+off, 65536, 0) + } + + var s []byte + sp := (*reflect.SliceHeader)(unsafe.Pointer(&s)) + sp.Data = base + sp.Len, sp.Cap = 3<<30, 3<<30 + + n := copy(s[1:], s) + if n != 3<<30-1 { + t.Fatalf("copied %d bytes, expected %d", n, 3<<30-1) + } + n = copy(s, s[1:]) + if n != 3<<30-1 { + t.Fatalf("copied %d bytes, expected %d", n, 3<<30-1) + } +} diff --git a/libgo/go/runtime/memmove_test.go b/libgo/go/runtime/memmove_test.go new file mode 100644 index 0000000..74b8753 --- /dev/null +++ b/libgo/go/runtime/memmove_test.go @@ -0,0 +1,469 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "crypto/rand" + "encoding/binary" + "fmt" + "internal/race" + . "runtime" + "testing" +) + +func TestMemmove(t *testing.T) { + t.Parallel() + size := 256 + if testing.Short() { + size = 128 + 16 + } + src := make([]byte, size) + dst := make([]byte, size) + for i := 0; i < size; i++ { + src[i] = byte(128 + (i & 127)) + } + for i := 0; i < size; i++ { + dst[i] = byte(i & 127) + } + for n := 0; n <= size; n++ { + for x := 0; x <= size-n; x++ { // offset in src + for y := 0; y <= size-n; y++ { // offset in dst + copy(dst[y:y+n], src[x:x+n]) + for i := 0; i < y; i++ { + if dst[i] != byte(i&127) { + t.Fatalf("prefix dst[%d] = %d", i, dst[i]) + } + } + for i := y; i < y+n; i++ { + if dst[i] != byte(128+((i-y+x)&127)) { + t.Fatalf("copied dst[%d] = %d", i, dst[i]) + } + dst[i] = byte(i & 127) // reset dst + } + for i := y + n; i < size; i++ { + if dst[i] != byte(i&127) { + t.Fatalf("suffix dst[%d] = %d", i, dst[i]) + } + } + } + } + } +} + +func TestMemmoveAlias(t *testing.T) { + t.Parallel() + size := 256 + if testing.Short() { + size = 128 + 16 + } + buf := make([]byte, size) + for i := 0; i < size; i++ { + buf[i] = byte(i) + } + for n := 0; n <= size; n++ { + for x := 0; x <= size-n; x++ { // src offset + for y := 0; y <= size-n; y++ { // dst offset + copy(buf[y:y+n], buf[x:x+n]) + for i := 0; i < y; i++ { + if buf[i] != byte(i) { + t.Fatalf("prefix buf[%d] = %d", i, buf[i]) + } + } + for i := y; i < y+n; i++ { + if buf[i] != byte(i-y+x) { + t.Fatalf("copied buf[%d] = %d", i, buf[i]) + } + buf[i] = byte(i) // reset buf + } + for i := y + n; i < size; i++ { + if buf[i] != byte(i) { + t.Fatalf("suffix buf[%d] = %d", i, buf[i]) + } + } + } + } + } +} + +func TestMemmoveLarge0x180000(t *testing.T) { + t.Parallel() + if race.Enabled { + t.Skip("skipping large memmove test under race detector") + } + testSize(t, 0x180000) +} + +func TestMemmoveOverlapLarge0x120000(t *testing.T) { + t.Parallel() + if race.Enabled { + t.Skip("skipping large memmove test under race detector") + } + testOverlap(t, 0x120000) +} + +func testSize(t *testing.T, size int) { + src := make([]byte, size) + dst := make([]byte, size) + _, _ = rand.Read(src) + _, _ = rand.Read(dst) + + ref := make([]byte, size) + copyref(ref, dst) + + for n := size - 50; n > 1; n >>= 1 { + for x := 0; x <= size-n; x = x*7 + 1 { // offset in src + for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst + copy(dst[y:y+n], src[x:x+n]) + copyref(ref[y:y+n], src[x:x+n]) + p := cmpb(dst, ref) + if p >= 0 { + t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, dst[p], ref[p]) + } + } + } + } +} + +func testOverlap(t *testing.T, size int) { + src := make([]byte, size) + test := make([]byte, size) + ref := make([]byte, size) + _, _ = rand.Read(src) + + for n := size - 50; n > 1; n >>= 1 { + for x := 0; x <= size-n; x = x*7 + 1 { // offset in src + for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst + // Reset input + copyref(test, src) + copyref(ref, src) + copy(test[y:y+n], test[x:x+n]) + if y <= x { + copyref(ref[y:y+n], ref[x:x+n]) + } else { + copybw(ref[y:y+n], ref[x:x+n]) + } + p := cmpb(test, ref) + if p >= 0 { + t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, test[p], ref[p]) + } + } + } + } + +} + +// Forward copy. +func copyref(dst, src []byte) { + for i, v := range src { + dst[i] = v + } +} + +// Backwards copy +func copybw(dst, src []byte) { + if len(src) == 0 { + return + } + for i := len(src) - 1; i >= 0; i-- { + dst[i] = src[i] + } +} + +// Returns offset of difference +func matchLen(a, b []byte, max int) int { + a = a[:max] + b = b[:max] + for i, av := range a { + if b[i] != av { + return i + } + } + return max +} + +func cmpb(a, b []byte) int { + l := matchLen(a, b, len(a)) + if l == len(a) { + return -1 + } + return l +} + +func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) { + for _, n := range sizes { + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n)) + fn(b, n) + }) + } +} + +var bufSizes = []int{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 32, 64, 128, 256, 512, 1024, 2048, 4096, +} + +func BenchmarkMemmove(b *testing.B) { + benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { + x := make([]byte, n) + y := make([]byte, n) + for i := 0; i < b.N; i++ { + copy(x, y) + } + }) +} + +func BenchmarkMemmoveUnalignedDst(b *testing.B) { + benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { + x := make([]byte, n+1) + y := make([]byte, n) + for i := 0; i < b.N; i++ { + copy(x[1:], y) + } + }) +} + +func BenchmarkMemmoveUnalignedSrc(b *testing.B) { + benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { + x := make([]byte, n) + y := make([]byte, n+1) + for i := 0; i < b.N; i++ { + copy(x, y[1:]) + } + }) +} + +func TestMemclr(t *testing.T) { + size := 512 + if testing.Short() { + size = 128 + 16 + } + mem := make([]byte, size) + for i := 0; i < size; i++ { + mem[i] = 0xee + } + for n := 0; n < size; n++ { + for x := 0; x <= size-n; x++ { // offset in mem + MemclrBytes(mem[x : x+n]) + for i := 0; i < x; i++ { + if mem[i] != 0xee { + t.Fatalf("overwrite prefix mem[%d] = %d", i, mem[i]) + } + } + for i := x; i < x+n; i++ { + if mem[i] != 0 { + t.Fatalf("failed clear mem[%d] = %d", i, mem[i]) + } + mem[i] = 0xee + } + for i := x + n; i < size; i++ { + if mem[i] != 0xee { + t.Fatalf("overwrite suffix mem[%d] = %d", i, mem[i]) + } + } + } + } +} + +func BenchmarkMemclr(b *testing.B) { + for _, n := range []int{5, 16, 64, 256, 4096, 65536} { + x := make([]byte, n) + b.Run(fmt.Sprint(n), func(b *testing.B) { + b.SetBytes(int64(n)) + for i := 0; i < b.N; i++ { + MemclrBytes(x) + } + }) + } + for _, m := range []int{1, 4, 8, 16, 64} { + x := make([]byte, m<<20) + b.Run(fmt.Sprint(m, "M"), func(b *testing.B) { + b.SetBytes(int64(m << 20)) + for i := 0; i < b.N; i++ { + MemclrBytes(x) + } + }) + } +} + +func BenchmarkGoMemclr(b *testing.B) { + benchmarkSizes(b, []int{5, 16, 64, 256}, func(b *testing.B, n int) { + x := make([]byte, n) + for i := 0; i < b.N; i++ { + for j := range x { + x[j] = 0 + } + } + }) +} + +func BenchmarkClearFat8(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [8 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat12(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [12 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat16(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [16 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat24(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [24 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat32(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [32 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat40(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [40 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat48(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [48 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat56(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [56 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat64(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [64 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat128(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [128 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat256(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [256 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat512(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [512 / 4]uint32 + _ = x + } +} +func BenchmarkClearFat1024(b *testing.B) { + for i := 0; i < b.N; i++ { + var x [1024 / 4]uint32 + _ = x + } +} + +func BenchmarkCopyFat8(b *testing.B) { + var x [8 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat12(b *testing.B) { + var x [12 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat16(b *testing.B) { + var x [16 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat24(b *testing.B) { + var x [24 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat32(b *testing.B) { + var x [32 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat64(b *testing.B) { + var x [64 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat128(b *testing.B) { + var x [128 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat256(b *testing.B) { + var x [256 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat512(b *testing.B) { + var x [512 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} +func BenchmarkCopyFat1024(b *testing.B) { + var x [1024 / 4]uint32 + for i := 0; i < b.N; i++ { + y := x + _ = y + } +} + +func BenchmarkIssue18740(b *testing.B) { + // This tests that memmove uses one 4-byte load/store to move 4 bytes. + // It used to do 2 2-byte load/stores, which leads to a pipeline stall + // when we try to read the result with one 4-byte load. + var buf [4]byte + for j := 0; j < b.N; j++ { + s := uint32(0) + for i := 0; i < 4096; i += 4 { + copy(buf[:], g[i:]) + s += binary.LittleEndian.Uint32(buf[:]) + } + sink = uint64(s) + } +} + +// TODO: 2 byte and 8 byte benchmarks also. + +var g [4096]byte diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go new file mode 100644 index 0000000..f0123b3 --- /dev/null +++ b/libgo/go/runtime/mfinal.go @@ -0,0 +1,424 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector: finalizers and block profiling. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// finblock is allocated from non-GC'd memory, so any heap pointers +// must be specially handled. +// +//go:notinheap +type finblock struct { + alllink *finblock + next *finblock + cnt uint32 + _ int32 + fin [(_FinBlockSize - 2*sys.PtrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer +} + +var finlock mutex // protects the following variables +var fing *g // goroutine that runs finalizers +var finq *finblock // list of finalizers that are to be executed +var finc *finblock // cache of free blocks +var finptrmask [_FinBlockSize / sys.PtrSize / 8]byte +var fingwait bool +var fingwake bool +var allfin *finblock // list of all blocks + +// NOTE: Layout known to queuefinalizer. +type finalizer struct { + fn *funcval // function to call (may be a heap pointer) + arg unsafe.Pointer // ptr to object (may be a heap pointer) + ft *functype // type of fn (unlikely, but may be a heap pointer) + ot *ptrtype // type of ptr to object (may be a heap pointer) +} + +func queuefinalizer(p unsafe.Pointer, fn *funcval, ft *functype, ot *ptrtype) { + lock(&finlock) + if finq == nil || finq.cnt == uint32(len(finq.fin)) { + if finc == nil { + finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys)) + finc.alllink = allfin + allfin = finc + if finptrmask[0] == 0 { + // Build pointer mask for Finalizer array in block. + // We allocate values of type finalizer in + // finblock values. Since these values are + // allocated by persistentalloc, they require + // special scanning during GC. finptrmask is a + // pointer mask to use while scanning. + // Since all the values in finalizer are + // pointers, just turn all bits on. + for i := range finptrmask { + finptrmask[i] = 0xff + } + } + } + block := finc + finc = block.next + block.next = finq + finq = block + } + f := &finq.fin[finq.cnt] + atomic.Xadd(&finq.cnt, +1) // Sync with markroots + f.fn = fn + f.ft = ft + f.ot = ot + f.arg = p + fingwake = true + unlock(&finlock) +} + +//go:nowritebarrier +func iterate_finq(callback func(*funcval, unsafe.Pointer, *functype, *ptrtype)) { + for fb := allfin; fb != nil; fb = fb.alllink { + for i := uint32(0); i < fb.cnt; i++ { + f := &fb.fin[i] + callback(f.fn, f.arg, f.ft, f.ot) + } + } +} + +func wakefing() *g { + var res *g + lock(&finlock) + if fingwait && fingwake { + fingwait = false + fingwake = false + res = fing + } + unlock(&finlock) + return res +} + +var ( + fingCreate uint32 + fingRunning bool +) + +func createfing() { + // start the finalizer goroutine exactly once + if fingCreate == 0 && atomic.Cas(&fingCreate, 0, 1) { + go runfinq() + } +} + +// This is the goroutine that runs all of the finalizers +func runfinq() { + var ( + ef eface + ifac iface + ) + + for { + lock(&finlock) + fb := finq + finq = nil + if fb == nil { + gp := getg() + fing = gp + fingwait = true + goparkunlock(&finlock, "finalizer wait", traceEvGoBlock, 1) + continue + } + unlock(&finlock) + for fb != nil { + for i := fb.cnt; i > 0; i-- { + f := &fb.fin[i-1] + + if f.ft == nil { + throw("missing type in runfinq") + } + fint := f.ft.in[0] + var param unsafe.Pointer + switch fint.kind & kindMask { + case kindPtr: + // direct use of pointer + param = unsafe.Pointer(&f.arg) + case kindInterface: + ityp := (*interfacetype)(unsafe.Pointer(fint)) + if len(ityp.methods) == 0 { + // set up with empty interface + ef._type = &f.ot.typ + ef.data = f.arg + param = unsafe.Pointer(&ef) + } else { + // convert to interface with methods + // this conversion is guaranteed to succeed - we checked in SetFinalizer + ifac.tab = getitab(fint, &f.ot.typ, true) + ifac.data = f.arg + param = unsafe.Pointer(&ifac) + } + default: + throw("bad kind in runfinq") + } + fingRunning = true + reflectcall(f.ft, f.fn, false, false, ¶m, nil) + fingRunning = false + + // Drop finalizer queue heap references + // before hiding them from markroot. + // This also ensures these will be + // clear if we reuse the finalizer. + f.fn = nil + f.arg = nil + f.ot = nil + atomic.Store(&fb.cnt, i-1) + } + next := fb.next + lock(&finlock) + fb.next = finc + finc = fb + unlock(&finlock) + fb = next + } + } +} + +// SetFinalizer sets the finalizer associated with obj to the provided +// finalizer function. When the garbage collector finds an unreachable block +// with an associated finalizer, it clears the association and runs +// finalizer(obj) in a separate goroutine. This makes obj reachable again, +// but now without an associated finalizer. Assuming that SetFinalizer +// is not called again, the next time the garbage collector sees +// that obj is unreachable, it will free obj. +// +// SetFinalizer(obj, nil) clears any finalizer associated with obj. +// +// The argument obj must be a pointer to an object allocated by calling +// new, by taking the address of a composite literal, or by taking the +// address of a local variable. +// The argument finalizer must be a function that takes a single argument +// to which obj's type can be assigned, and can have arbitrary ignored return +// values. If either of these is not true, SetFinalizer may abort the +// program. +// +// Finalizers are run in dependency order: if A points at B, both have +// finalizers, and they are otherwise unreachable, only the finalizer +// for A runs; once A is freed, the finalizer for B can run. +// If a cyclic structure includes a block with a finalizer, that +// cycle is not guaranteed to be garbage collected and the finalizer +// is not guaranteed to run, because there is no ordering that +// respects the dependencies. +// +// The finalizer for obj is scheduled to run at some arbitrary time after +// obj becomes unreachable. +// There is no guarantee that finalizers will run before a program exits, +// so typically they are useful only for releasing non-memory resources +// associated with an object during a long-running program. +// For example, an os.File object could use a finalizer to close the +// associated operating system file descriptor when a program discards +// an os.File without calling Close, but it would be a mistake +// to depend on a finalizer to flush an in-memory I/O buffer such as a +// bufio.Writer, because the buffer would not be flushed at program exit. +// +// It is not guaranteed that a finalizer will run if the size of *obj is +// zero bytes. +// +// It is not guaranteed that a finalizer will run for objects allocated +// in initializers for package-level variables. Such objects may be +// linker-allocated, not heap-allocated. +// +// A finalizer may run as soon as an object becomes unreachable. +// In order to use finalizers correctly, the program must ensure that +// the object is reachable until it is no longer required. +// Objects stored in global variables, or that can be found by tracing +// pointers from a global variable, are reachable. For other objects, +// pass the object to a call of the KeepAlive function to mark the +// last point in the function where the object must be reachable. +// +// For example, if p points to a struct that contains a file descriptor d, +// and p has a finalizer that closes that file descriptor, and if the last +// use of p in a function is a call to syscall.Write(p.d, buf, size), then +// p may be unreachable as soon as the program enters syscall.Write. The +// finalizer may run at that moment, closing p.d, causing syscall.Write +// to fail because it is writing to a closed file descriptor (or, worse, +// to an entirely different file descriptor opened by a different goroutine). +// To avoid this problem, call runtime.KeepAlive(p) after the call to +// syscall.Write. +// +// A single goroutine runs all finalizers for a program, sequentially. +// If a finalizer must run for a long time, it should do so by starting +// a new goroutine. +func SetFinalizer(obj interface{}, finalizer interface{}) { + if debug.sbrk != 0 { + // debug.sbrk never frees memory, so no finalizers run + // (and we don't have the data structures to record them). + return + } + e := efaceOf(&obj) + etyp := e._type + if etyp == nil { + throw("runtime.SetFinalizer: first argument is nil") + } + if etyp.kind&kindMask != kindPtr { + throw("runtime.SetFinalizer: first argument is " + *etyp.string + ", not pointer") + } + ot := (*ptrtype)(unsafe.Pointer(etyp)) + if ot.elem == nil { + throw("nil elem type!") + } + + // find the containing object + _, base, _ := findObject(e.data) + + if base == nil { + // 0-length objects are okay. + if e.data == unsafe.Pointer(&zerobase) { + return + } + + // Global initializers might be linker-allocated. + // var Foo = &Object{} + // func main() { + // runtime.SetFinalizer(Foo, nil) + // } + // The relevant segments are: noptrdata, data, bss, noptrbss. + // We cannot assume they are in any order or even contiguous, + // due to external linking. + // + // For gccgo we have no reliable way to detect them, + // so we just return. + return + } + + if e.data != base { + // As an implementation detail we allow to set finalizers for an inner byte + // of an object if it could come from tiny alloc (see mallocgc for details). + if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize { + throw("runtime.SetFinalizer: pointer not at beginning of allocated block") + } + } + + f := efaceOf(&finalizer) + ftyp := f._type + if ftyp == nil { + // switch to system stack and remove finalizer + systemstack(func() { + removefinalizer(e.data) + }) + return + } + + if ftyp.kind&kindMask != kindFunc { + throw("runtime.SetFinalizer: second argument is " + *ftyp.string + ", not a function") + } + ft := (*functype)(unsafe.Pointer(ftyp)) + if ft.dotdotdot { + throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string + " because dotdotdot") + } + if len(ft.in) != 1 { + throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string) + } + fint := ft.in[0] + switch { + case fint == etyp: + // ok - same type + goto okarg + case fint.kind&kindMask == kindPtr: + if (fint.uncommontype == nil || etyp.uncommontype == nil) && (*ptrtype)(unsafe.Pointer(fint)).elem == ot.elem { + // ok - not same type, but both pointers, + // one or the other is unnamed, and same element type, so assignable. + goto okarg + } + case fint.kind&kindMask == kindInterface: + ityp := (*interfacetype)(unsafe.Pointer(fint)) + if len(ityp.methods) == 0 { + // ok - satisfies empty interface + goto okarg + } + if getitab(fint, etyp, true) == nil { + goto okarg + } + } + throw("runtime.SetFinalizer: cannot pass " + *etyp.string + " to finalizer " + *ftyp.string) +okarg: + // make sure we have a finalizer goroutine + createfing() + + systemstack(func() { + data := f.data + if !isDirectIface(ftyp) { + data = *(*unsafe.Pointer)(data) + } + if !addfinalizer(e.data, (*funcval)(data), ft, ot) { + throw("runtime.SetFinalizer: finalizer already set") + } + }) +} + +// Look up pointer v in heap. Return the span containing the object, +// the start of the object, and the size of the object. If the object +// does not exist, return nil, nil, 0. +func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) { + c := gomcache() + c.local_nlookup++ + if sys.PtrSize == 4 && c.local_nlookup >= 1<<30 { + // purge cache stats to prevent overflow + lock(&mheap_.lock) + purgecachedstats(c) + unlock(&mheap_.lock) + } + + // find span + arena_start := mheap_.arena_start + arena_used := mheap_.arena_used + if uintptr(v) < arena_start || uintptr(v) >= arena_used { + return + } + p := uintptr(v) >> pageShift + q := p - arena_start>>pageShift + s = mheap_.spans[q] + if s == nil { + return + } + x = unsafe.Pointer(s.base()) + + if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse { + s = nil + x = nil + return + } + + n = s.elemsize + if s.sizeclass != 0 { + x = add(x, (uintptr(v)-uintptr(x))/n*n) + } + return +} + +// Mark KeepAlive as noinline so that the current compiler will ensure +// that the argument is alive at the point of the function call. +// If it were inlined, it would disappear, and there would be nothing +// keeping the argument alive. Perhaps a future compiler will recognize +// runtime.KeepAlive specially and do something more efficient. +//go:noinline + +// KeepAlive marks its argument as currently reachable. +// This ensures that the object is not freed, and its finalizer is not run, +// before the point in the program where KeepAlive is called. +// +// A very simplified example showing where KeepAlive is required: +// type File struct { d int } +// d, err := syscall.Open("/file/path", syscall.O_RDONLY, 0) +// // ... do something if err != nil ... +// p := &File{d} +// runtime.SetFinalizer(p, func(p *File) { syscall.Close(p.d) }) +// var buf [10]byte +// n, err := syscall.Read(p.d, buf[:]) +// // Ensure p is not finalized until Read returns. +// runtime.KeepAlive(p) +// // No more uses of p after this point. +// +// Without the KeepAlive call, the finalizer could run at the start of +// syscall.Read, closing the file descriptor before syscall.Read makes +// the actual system call. +func KeepAlive(interface{}) {} diff --git a/libgo/go/runtime/mfixalloc.go b/libgo/go/runtime/mfixalloc.go new file mode 100644 index 0000000..fe4b0fc --- /dev/null +++ b/libgo/go/runtime/mfixalloc.go @@ -0,0 +1,99 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Fixed-size object allocator. Returned memory is not zeroed. +// +// See malloc.go for overview. + +package runtime + +import "unsafe" + +// FixAlloc is a simple free-list allocator for fixed size objects. +// Malloc uses a FixAlloc wrapped around sysAlloc to manages its +// MCache and MSpan objects. +// +// Memory returned by fixalloc.alloc is zeroed by default, but the +// caller may take responsibility for zeroing allocations by setting +// the zero flag to false. This is only safe if the memory never +// contains heap pointers. +// +// The caller is responsible for locking around FixAlloc calls. +// Callers can keep state in the object but the first word is +// smashed by freeing and reallocating. +// +// Consider marking fixalloc'd types go:notinheap. +type fixalloc struct { + size uintptr + first func(arg, p unsafe.Pointer) // called first time p is returned + arg unsafe.Pointer + list *mlink + chunk unsafe.Pointer + nchunk uint32 + inuse uintptr // in-use bytes now + stat *uint64 + zero bool // zero allocations +} + +// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).) +// Since assignments to mlink.next will result in a write barrier being performed +// this cannot be used by some of the internal GC structures. For example when +// the sweeper is placing an unmarked object on the free list it does not want the +// write barrier to be called since that could result in the object being reachable. +// +//go:notinheap +type mlink struct { + next *mlink +} + +// Initialize f to allocate objects of the given size, +// using the allocator to obtain chunks of memory. +func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *uint64) { + f.size = size + f.first = first + f.arg = arg + f.list = nil + f.chunk = nil + f.nchunk = 0 + f.inuse = 0 + f.stat = stat + f.zero = true +} + +func (f *fixalloc) alloc() unsafe.Pointer { + if f.size == 0 { + print("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n") + throw("runtime: internal error") + } + + if f.list != nil { + v := unsafe.Pointer(f.list) + f.list = f.list.next + f.inuse += f.size + if f.zero { + memclrNoHeapPointers(v, f.size) + } + return v + } + if uintptr(f.nchunk) < f.size { + f.chunk = persistentalloc(_FixAllocChunk, 0, f.stat) + f.nchunk = _FixAllocChunk + } + + v := f.chunk + if f.first != nil { + f.first(f.arg, v) + } + f.chunk = add(f.chunk, f.size) + f.nchunk -= uint32(f.size) + f.inuse += f.size + return v +} + +func (f *fixalloc) free(p unsafe.Pointer) { + f.inuse -= f.size + v := (*mlink)(p) + v.next = f.list + f.list = v +} diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go new file mode 100644 index 0000000..abec9d3 --- /dev/null +++ b/libgo/go/runtime/mgc.go @@ -0,0 +1,1963 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector (GC). +// +// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple +// GC thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is +// non-generational and non-compacting. Allocation is done using size segregated per P allocation +// areas to minimize fragmentation while eliminating locks in the common case. +// +// The algorithm decomposes into several steps. +// This is a high level description of the algorithm being used. For an overview of GC a good +// place to start is Richard Jones' gchandbook.org. +// +// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see +// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. +// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), +// 966-975. +// For journal quality proofs that these steps are complete, correct, and terminate see +// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. +// Concurrency and Computation: Practice and Experience 15(3-5), 2003. +// +// 1. GC performs sweep termination. +// +// a. Stop the world. This causes all Ps to reach a GC safe-point. +// +// b. Sweep any unswept spans. There will only be unswept spans if +// this GC cycle was forced before the expected time. +// +// 2. GC performs the "mark 1" sub-phase. In this sub-phase, Ps are +// allowed to locally cache parts of the work queue. +// +// a. Prepare for the mark phase by setting gcphase to _GCmark +// (from _GCoff), enabling the write barrier, enabling mutator +// assists, and enqueueing root mark jobs. No objects may be +// scanned until all Ps have enabled the write barrier, which is +// accomplished using STW. +// +// b. Start the world. From this point, GC work is done by mark +// workers started by the scheduler and by assists performed as +// part of allocation. The write barrier shades both the +// overwritten pointer and the new pointer value for any pointer +// writes (see mbarrier.go for details). Newly allocated objects +// are immediately marked black. +// +// c. GC performs root marking jobs. This includes scanning all +// stacks, shading all globals, and shading any heap pointers in +// off-heap runtime data structures. Scanning a stack stops a +// goroutine, shades any pointers found on its stack, and then +// resumes the goroutine. +// +// d. GC drains the work queue of grey objects, scanning each grey +// object to black and shading all pointers found in the object +// (which in turn may add those pointers to the work queue). +// +// 3. Once the global work queue is empty (but local work queue caches +// may still contain work), GC performs the "mark 2" sub-phase. +// +// a. GC stops all workers, disables local work queue caches, +// flushes each P's local work queue cache to the global work queue +// cache, and reenables workers. +// +// b. GC again drains the work queue, as in 2d above. +// +// 4. Once the work queue is empty, GC performs mark termination. +// +// a. Stop the world. +// +// b. Set gcphase to _GCmarktermination, and disable workers and +// assists. +// +// c. Drain any remaining work from the work queue (typically there +// will be none). +// +// d. Perform other housekeeping like flushing mcaches. +// +// 5. GC performs the sweep phase. +// +// a. Prepare for the sweep phase by setting gcphase to _GCoff, +// setting up sweep state and disabling the write barrier. +// +// b. Start the world. From this point on, newly allocated objects +// are white, and allocating sweeps spans before use if necessary. +// +// c. GC does concurrent sweeping in the background and in response +// to allocation. See description below. +// +// 6. When sufficient allocation has taken place, replay the sequence +// starting with 1 above. See discussion of GC rate below. + +// Concurrent sweep. +// +// The sweep phase proceeds concurrently with normal program execution. +// The heap is swept span-by-span both lazily (when a goroutine needs another span) +// and concurrently in a background goroutine (this helps programs that are not CPU bound). +// At the end of STW mark termination all spans are marked as "needs sweeping". +// +// The background sweeper goroutine simply sweeps spans one-by-one. +// +// To avoid requesting more OS memory while there are unswept spans, when a +// goroutine needs another span, it first attempts to reclaim that much memory +// by sweeping. When a goroutine needs to allocate a new small-object span, it +// sweeps small-object spans for the same object size until it frees at least +// one object. When a goroutine needs to allocate large-object span from heap, +// it sweeps spans until it frees at least that many pages into heap. There is +// one case where this may not suffice: if a goroutine sweeps and frees two +// nonadjacent one-page spans to the heap, it will allocate a new two-page +// span, but there can still be other one-page unswept spans which could be +// combined into a two-page span. +// +// It's critical to ensure that no operations proceed on unswept spans (that would corrupt +// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, +// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. +// When a goroutine explicitly frees an object or sets a finalizer, it ensures that +// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish). +// The finalizer goroutine is kicked off only when all spans are swept. +// When the next GC starts, it sweeps all not-yet-swept spans (if any). + +// GC rate. +// Next GC is after we've allocated an extra amount of memory proportional to +// the amount already in use. The proportion is controlled by GOGC environment variable +// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M +// (this mark is tracked in next_gc variable). This keeps the GC cost in linear +// proportion to the allocation cost. Adjusting GOGC just changes the linear constant +// (and also the amount of extra memory used). + +// Oblets +// +// In order to prevent long pauses while scanning large objects and to +// improve parallelism, the garbage collector breaks up scan jobs for +// objects larger than maxObletBytes into "oblets" of at most +// maxObletBytes. When scanning encounters the beginning of a large +// object, it scans only the first oblet and enqueues the remaining +// oblets as new scan jobs. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +const ( + _DebugGC = 0 + _ConcurrentSweep = true + _FinBlockSize = 4 * 1024 + + // sweepMinHeapDistance is a lower bound on the heap distance + // (in bytes) reserved for concurrent sweeping between GC + // cycles. This will be scaled by gcpercent/100. + sweepMinHeapDistance = 1024 * 1024 +) + +// heapminimum is the minimum heap size at which to trigger GC. +// For small heaps, this overrides the usual GOGC*live set rule. +// +// When there is a very small live set but a lot of allocation, simply +// collecting when the heap reaches GOGC*live results in many GC +// cycles and high total per-GC overhead. This minimum amortizes this +// per-GC overhead while keeping the heap reasonably small. +// +// During initialization this is set to 4MB*GOGC/100. In the case of +// GOGC==0, this will set heapminimum to 0, resulting in constant +// collection even when the heap size is small, which is useful for +// debugging. +var heapminimum uint64 = defaultHeapMinimum + +// defaultHeapMinimum is the value of heapminimum for GOGC==100. +const defaultHeapMinimum = 4 << 20 + +// Initialized from $GOGC. GOGC=off means no GC. +var gcpercent int32 + +func gcinit() { + if unsafe.Sizeof(workbuf{}) != _WorkbufSize { + throw("size of Workbuf is suboptimal") + } + + _ = setGCPercent(readgogc()) + memstats.gc_trigger = heapminimum + // Compute the goal heap size based on the trigger: + // trigger = marked * (1 + triggerRatio) + // marked = trigger / (1 + triggerRatio) + // goal = marked * (1 + GOGC/100) + // = trigger / (1 + triggerRatio) * (1 + GOGC/100) + memstats.next_gc = uint64(float64(memstats.gc_trigger) / (1 + gcController.triggerRatio) * (1 + float64(gcpercent)/100)) + if gcpercent < 0 { + memstats.next_gc = ^uint64(0) + } + work.startSema = 1 + work.markDoneSema = 1 +} + +func readgogc() int32 { + p := gogetenv("GOGC") + if p == "off" { + return -1 + } + if n, ok := atoi32(p); ok { + return n + } + return 100 +} + +// gcenable is called after the bulk of the runtime initialization, +// just before we're about to start letting user code run. +// It kicks off the background sweeper goroutine and enables GC. +func gcenable() { + c := make(chan int, 1) + go bgsweep(c) + <-c + memstats.enablegc = true // now that runtime is initialized, GC is okay +} + +//go:linkname setGCPercent runtime_debug.setGCPercent +func setGCPercent(in int32) (out int32) { + lock(&mheap_.lock) + out = gcpercent + if in < 0 { + in = -1 + } + gcpercent = in + heapminimum = defaultHeapMinimum * uint64(gcpercent) / 100 + if gcController.triggerRatio > float64(gcpercent)/100 { + gcController.triggerRatio = float64(gcpercent) / 100 + } + // This is either in gcinit or followed by a STW GC, both of + // which will reset other stats like memstats.gc_trigger and + // memstats.next_gc to appropriate values. + unlock(&mheap_.lock) + return out +} + +// Garbage collector phase. +// Indicates to write barrier and synchronization task to perform. +var gcphase uint32 + +// The compiler knows about this variable. +// If you change it, you must change the compiler too. +var writeBarrier struct { + enabled bool // compiler emits a check of this before calling write barrier + pad [3]byte // compiler uses 32-bit load for "enabled" field + needed bool // whether we need a write barrier for current GC phase + cgo bool // whether we need a write barrier for a cgo check + alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load +} + +// gcBlackenEnabled is 1 if mutator assists and background mark +// workers are allowed to blacken objects. This must only be set when +// gcphase == _GCmark. +var gcBlackenEnabled uint32 + +// gcBlackenPromptly indicates that optimizations that may +// hide work from the global work queue should be disabled. +// +// If gcBlackenPromptly is true, per-P gcWork caches should +// be flushed immediately and new objects should be allocated black. +// +// There is a tension between allocating objects white and +// allocating them black. If white and the objects die before being +// marked they can be collected during this GC cycle. On the other +// hand allocating them black will reduce _GCmarktermination latency +// since more work is done in the mark phase. This tension is resolved +// by allocating white until the mark phase is approaching its end and +// then allocating black for the remainder of the mark phase. +var gcBlackenPromptly bool + +const ( + _GCoff = iota // GC not running; sweeping in background, write barrier disabled + _GCmark // GC marking roots and workbufs: allocate black, write barrier ENABLED + _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED +) + +//go:nosplit +func setGCPhase(x uint32) { + atomic.Store(&gcphase, x) + writeBarrier.needed = gcphase == _GCmark || gcphase == _GCmarktermination + writeBarrier.enabled = writeBarrier.needed || writeBarrier.cgo +} + +// gcMarkWorkerMode represents the mode that a concurrent mark worker +// should operate in. +// +// Concurrent marking happens through four different mechanisms. One +// is mutator assists, which happen in response to allocations and are +// not scheduled. The other three are variations in the per-P mark +// workers and are distinguished by gcMarkWorkerMode. +type gcMarkWorkerMode int + +const ( + // gcMarkWorkerDedicatedMode indicates that the P of a mark + // worker is dedicated to running that mark worker. The mark + // worker should run without preemption. + gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota + + // gcMarkWorkerFractionalMode indicates that a P is currently + // running the "fractional" mark worker. The fractional worker + // is necessary when GOMAXPROCS*gcGoalUtilization is not an + // integer. The fractional worker should run until it is + // preempted and will be scheduled to pick up the fractional + // part of GOMAXPROCS*gcGoalUtilization. + gcMarkWorkerFractionalMode + + // gcMarkWorkerIdleMode indicates that a P is running the mark + // worker because it has nothing else to do. The idle worker + // should run until it is preempted and account its time + // against gcController.idleMarkTime. + gcMarkWorkerIdleMode +) + +// gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes +// to use in execution traces. +var gcMarkWorkerModeStrings = [...]string{ + "GC (dedicated)", + "GC (fractional)", + "GC (idle)", +} + +// gcController implements the GC pacing controller that determines +// when to trigger concurrent garbage collection and how much marking +// work to do in mutator assists and background marking. +// +// It uses a feedback control algorithm to adjust the memstats.gc_trigger +// trigger based on the heap growth and GC CPU utilization each cycle. +// This algorithm optimizes for heap growth to match GOGC and for CPU +// utilization between assist and background marking to be 25% of +// GOMAXPROCS. The high-level design of this algorithm is documented +// at https://golang.org/s/go15gcpacing. +var gcController = gcControllerState{ + // Initial trigger ratio guess. + triggerRatio: 7 / 8.0, +} + +type gcControllerState struct { + // scanWork is the total scan work performed this cycle. This + // is updated atomically during the cycle. Updates occur in + // bounded batches, since it is both written and read + // throughout the cycle. At the end of the cycle, this is how + // much of the retained heap is scannable. + // + // Currently this is the bytes of heap scanned. For most uses, + // this is an opaque unit of work, but for estimation the + // definition is important. + scanWork int64 + + // bgScanCredit is the scan work credit accumulated by the + // concurrent background scan. This credit is accumulated by + // the background scan and stolen by mutator assists. This is + // updated atomically. Updates occur in bounded batches, since + // it is both written and read throughout the cycle. + bgScanCredit int64 + + // assistTime is the nanoseconds spent in mutator assists + // during this cycle. This is updated atomically. Updates + // occur in bounded batches, since it is both written and read + // throughout the cycle. + assistTime int64 + + // dedicatedMarkTime is the nanoseconds spent in dedicated + // mark workers during this cycle. This is updated atomically + // at the end of the concurrent mark phase. + dedicatedMarkTime int64 + + // fractionalMarkTime is the nanoseconds spent in the + // fractional mark worker during this cycle. This is updated + // atomically throughout the cycle and will be up-to-date if + // the fractional mark worker is not currently running. + fractionalMarkTime int64 + + // idleMarkTime is the nanoseconds spent in idle marking + // during this cycle. This is updated atomically throughout + // the cycle. + idleMarkTime int64 + + // markStartTime is the absolute start time in nanoseconds + // that assists and background mark workers started. + markStartTime int64 + + // dedicatedMarkWorkersNeeded is the number of dedicated mark + // workers that need to be started. This is computed at the + // beginning of each cycle and decremented atomically as + // dedicated mark workers get started. + dedicatedMarkWorkersNeeded int64 + + // assistWorkPerByte is the ratio of scan work to allocated + // bytes that should be performed by mutator assists. This is + // computed at the beginning of each cycle and updated every + // time heap_scan is updated. + assistWorkPerByte float64 + + // assistBytesPerWork is 1/assistWorkPerByte. + assistBytesPerWork float64 + + // fractionalUtilizationGoal is the fraction of wall clock + // time that should be spent in the fractional mark worker. + // For example, if the overall mark utilization goal is 25% + // and GOMAXPROCS is 6, one P will be a dedicated mark worker + // and this will be set to 0.5 so that 50% of the time some P + // is in a fractional mark worker. This is computed at the + // beginning of each cycle. + fractionalUtilizationGoal float64 + + // triggerRatio is the heap growth ratio at which the garbage + // collection cycle should start. E.g., if this is 0.6, then + // GC should start when the live heap has reached 1.6 times + // the heap size marked by the previous cycle. This should be + // ≤ GOGC/100 so the trigger heap size is less than the goal + // heap size. This is updated at the end of of each cycle. + triggerRatio float64 + + _ [sys.CacheLineSize]byte + + // fractionalMarkWorkersNeeded is the number of fractional + // mark workers that need to be started. This is either 0 or + // 1. This is potentially updated atomically at every + // scheduling point (hence it gets its own cache line). + fractionalMarkWorkersNeeded int64 + + _ [sys.CacheLineSize]byte +} + +// startCycle resets the GC controller's state and computes estimates +// for a new GC cycle. The caller must hold worldsema. +func (c *gcControllerState) startCycle() { + c.scanWork = 0 + c.bgScanCredit = 0 + c.assistTime = 0 + c.dedicatedMarkTime = 0 + c.fractionalMarkTime = 0 + c.idleMarkTime = 0 + + // If this is the first GC cycle or we're operating on a very + // small heap, fake heap_marked so it looks like gc_trigger is + // the appropriate growth from heap_marked, even though the + // real heap_marked may not have a meaningful value (on the + // first cycle) or may be much smaller (resulting in a large + // error response). + if memstats.gc_trigger <= heapminimum { + memstats.heap_marked = uint64(float64(memstats.gc_trigger) / (1 + c.triggerRatio)) + } + + // Re-compute the heap goal for this cycle in case something + // changed. This is the same calculation we use elsewhere. + memstats.next_gc = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 + if gcpercent < 0 { + memstats.next_gc = ^uint64(0) + } + + // Ensure that the heap goal is at least a little larger than + // the current live heap size. This may not be the case if GC + // start is delayed or if the allocation that pushed heap_live + // over gc_trigger is large or if the trigger is really close to + // GOGC. Assist is proportional to this distance, so enforce a + // minimum distance, even if it means going over the GOGC goal + // by a tiny bit. + if memstats.next_gc < memstats.heap_live+1024*1024 { + memstats.next_gc = memstats.heap_live + 1024*1024 + } + + // Compute the total mark utilization goal and divide it among + // dedicated and fractional workers. + totalUtilizationGoal := float64(gomaxprocs) * gcGoalUtilization + c.dedicatedMarkWorkersNeeded = int64(totalUtilizationGoal) + c.fractionalUtilizationGoal = totalUtilizationGoal - float64(c.dedicatedMarkWorkersNeeded) + if c.fractionalUtilizationGoal > 0 { + c.fractionalMarkWorkersNeeded = 1 + } else { + c.fractionalMarkWorkersNeeded = 0 + } + + // Clear per-P state + for _, p := range &allp { + if p == nil { + break + } + p.gcAssistTime = 0 + } + + // Compute initial values for controls that are updated + // throughout the cycle. + c.revise() + + if debug.gcpacertrace > 0 { + print("pacer: assist ratio=", c.assistWorkPerByte, + " (scan ", memstats.heap_scan>>20, " MB in ", + work.initialHeapLive>>20, "->", + memstats.next_gc>>20, " MB)", + " workers=", c.dedicatedMarkWorkersNeeded, + "+", c.fractionalMarkWorkersNeeded, "\n") + } +} + +// revise updates the assist ratio during the GC cycle to account for +// improved estimates. This should be called either under STW or +// whenever memstats.heap_scan or memstats.heap_live is updated (with +// mheap_.lock held). +// +// It should only be called when gcBlackenEnabled != 0 (because this +// is when assists are enabled and the necessary statistics are +// available). +// +// TODO: Consider removing the periodic controller update altogether. +// Since we switched to allocating black, in theory we shouldn't have +// to change the assist ratio. However, this is still a useful hook +// that we've found many uses for when experimenting. +func (c *gcControllerState) revise() { + // Compute the expected scan work remaining. + // + // Note that we currently count allocations during GC as both + // scannable heap (heap_scan) and scan work completed + // (scanWork), so this difference won't be changed by + // allocations during GC. + // + // This particular estimate is a strict upper bound on the + // possible remaining scan work for the current heap. + // You might consider dividing this by 2 (or by + // (100+GOGC)/100) to counter this over-estimation, but + // benchmarks show that this has almost no effect on mean + // mutator utilization, heap size, or assist time and it + // introduces the danger of under-estimating and letting the + // mutator outpace the garbage collector. + scanWorkExpected := int64(memstats.heap_scan) - c.scanWork + if scanWorkExpected < 1000 { + // We set a somewhat arbitrary lower bound on + // remaining scan work since if we aim a little high, + // we can miss by a little. + // + // We *do* need to enforce that this is at least 1, + // since marking is racy and double-scanning objects + // may legitimately make the expected scan work + // negative. + scanWorkExpected = 1000 + } + + // Compute the heap distance remaining. + heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live) + if heapDistance <= 0 { + // This shouldn't happen, but if it does, avoid + // dividing by zero or setting the assist negative. + heapDistance = 1 + } + + // Compute the mutator assist ratio so by the time the mutator + // allocates the remaining heap bytes up to next_gc, it will + // have done (or stolen) the remaining amount of scan work. + c.assistWorkPerByte = float64(scanWorkExpected) / float64(heapDistance) + c.assistBytesPerWork = float64(heapDistance) / float64(scanWorkExpected) +} + +// endCycle updates the GC controller state at the end of the +// concurrent part of the GC cycle. +func (c *gcControllerState) endCycle() { + h_t := c.triggerRatio // For debugging + + // Proportional response gain for the trigger controller. Must + // be in [0, 1]. Lower values smooth out transient effects but + // take longer to respond to phase changes. Higher values + // react to phase changes quickly, but are more affected by + // transient changes. Values near 1 may be unstable. + const triggerGain = 0.5 + + // Compute next cycle trigger ratio. First, this computes the + // "error" for this cycle; that is, how far off the trigger + // was from what it should have been, accounting for both heap + // growth and GC CPU utilization. We compute the actual heap + // growth during this cycle and scale that by how far off from + // the goal CPU utilization we were (to estimate the heap + // growth if we had the desired CPU utilization). The + // difference between this estimate and the GOGC-based goal + // heap growth is the error. + goalGrowthRatio := float64(gcpercent) / 100 + actualGrowthRatio := float64(memstats.heap_live)/float64(memstats.heap_marked) - 1 + assistDuration := nanotime() - c.markStartTime + + // Assume background mark hit its utilization goal. + utilization := gcGoalUtilization + // Add assist utilization; avoid divide by zero. + if assistDuration > 0 { + utilization += float64(c.assistTime) / float64(assistDuration*int64(gomaxprocs)) + } + + triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio) + + // Finally, we adjust the trigger for next time by this error, + // damped by the proportional gain. + c.triggerRatio += triggerGain * triggerError + if c.triggerRatio < 0 { + // This can happen if the mutator is allocating very + // quickly or the GC is scanning very slowly. + c.triggerRatio = 0 + } else if c.triggerRatio > goalGrowthRatio*0.95 { + // Ensure there's always a little margin so that the + // mutator assist ratio isn't infinity. + c.triggerRatio = goalGrowthRatio * 0.95 + } + + if debug.gcpacertrace > 0 { + // Print controller state in terms of the design + // document. + H_m_prev := memstats.heap_marked + H_T := memstats.gc_trigger + h_a := actualGrowthRatio + H_a := memstats.heap_live + h_g := goalGrowthRatio + H_g := int64(float64(H_m_prev) * (1 + h_g)) + u_a := utilization + u_g := gcGoalUtilization + W_a := c.scanWork + print("pacer: H_m_prev=", H_m_prev, + " h_t=", h_t, " H_T=", H_T, + " h_a=", h_a, " H_a=", H_a, + " h_g=", h_g, " H_g=", H_g, + " u_a=", u_a, " u_g=", u_g, + " W_a=", W_a, + " goalΔ=", goalGrowthRatio-h_t, + " actualΔ=", h_a-h_t, + " u_a/u_g=", u_a/u_g, + "\n") + } +} + +// enlistWorker encourages another dedicated mark worker to start on +// another P if there are spare worker slots. It is used by putfull +// when more work is made available. +// +//go:nowritebarrier +func (c *gcControllerState) enlistWorker() { + // If there are idle Ps, wake one so it will run an idle worker. + // NOTE: This is suspected of causing deadlocks. See golang.org/issue/19112. + // + // if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { + // wakep() + // return + // } + + // There are no idle Ps. If we need more dedicated workers, + // try to preempt a running P so it will switch to a worker. + if c.dedicatedMarkWorkersNeeded <= 0 { + return + } + // Pick a random other P to preempt. + if gomaxprocs <= 1 { + return + } + gp := getg() + if gp == nil || gp.m == nil || gp.m.p == 0 { + return + } + myID := gp.m.p.ptr().id + for tries := 0; tries < 5; tries++ { + id := int32(fastrand() % uint32(gomaxprocs-1)) + if id >= myID { + id++ + } + p := allp[id] + if p.status != _Prunning { + continue + } + if preemptone(p) { + return + } + } +} + +// findRunnableGCWorker returns the background mark worker for _p_ if it +// should be run. This must only be called when gcBlackenEnabled != 0. +func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { + if gcBlackenEnabled == 0 { + throw("gcControllerState.findRunnable: blackening not enabled") + } + if _p_.gcBgMarkWorker == 0 { + // The mark worker associated with this P is blocked + // performing a mark transition. We can't run it + // because it may be on some other run or wait queue. + return nil + } + + if !gcMarkWorkAvailable(_p_) { + // No work to be done right now. This can happen at + // the end of the mark phase when there are still + // assists tapering off. Don't bother running a worker + // now because it'll just return immediately. + return nil + } + + decIfPositive := func(ptr *int64) bool { + if *ptr > 0 { + if atomic.Xaddint64(ptr, -1) >= 0 { + return true + } + // We lost a race + atomic.Xaddint64(ptr, +1) + } + return false + } + + if decIfPositive(&c.dedicatedMarkWorkersNeeded) { + // This P is now dedicated to marking until the end of + // the concurrent mark phase. + _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode + // TODO(austin): This P isn't going to run anything + // else for a while, so kick everything out of its run + // queue. + } else { + if !decIfPositive(&c.fractionalMarkWorkersNeeded) { + // No more workers are need right now. + return nil + } + + // This P has picked the token for the fractional worker. + // Is the GC currently under or at the utilization goal? + // If so, do more work. + // + // We used to check whether doing one time slice of work + // would remain under the utilization goal, but that has the + // effect of delaying work until the mutator has run for + // enough time slices to pay for the work. During those time + // slices, write barriers are enabled, so the mutator is running slower. + // Now instead we do the work whenever we're under or at the + // utilization work and pay for it by letting the mutator run later. + // This doesn't change the overall utilization averages, but it + // front loads the GC work so that the GC finishes earlier and + // write barriers can be turned off sooner, effectively giving + // the mutator a faster machine. + // + // The old, slower behavior can be restored by setting + // gcForcePreemptNS = forcePreemptNS. + const gcForcePreemptNS = 0 + + // TODO(austin): We could fast path this and basically + // eliminate contention on c.fractionalMarkWorkersNeeded by + // precomputing the minimum time at which it's worth + // next scheduling the fractional worker. Then Ps + // don't have to fight in the window where we've + // passed that deadline and no one has started the + // worker yet. + // + // TODO(austin): Shorter preemption interval for mark + // worker to improve fairness and give this + // finer-grained control over schedule? + now := nanotime() - gcController.markStartTime + then := now + gcForcePreemptNS + timeUsed := c.fractionalMarkTime + gcForcePreemptNS + if then > 0 && float64(timeUsed)/float64(then) > c.fractionalUtilizationGoal { + // Nope, we'd overshoot the utilization goal + atomic.Xaddint64(&c.fractionalMarkWorkersNeeded, +1) + return nil + } + _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode + } + + // Run the background mark worker + gp := _p_.gcBgMarkWorker.ptr() + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp +} + +// gcGoalUtilization is the goal CPU utilization for background +// marking as a fraction of GOMAXPROCS. +const gcGoalUtilization = 0.25 + +// gcCreditSlack is the amount of scan work credit that can can +// accumulate locally before updating gcController.scanWork and, +// optionally, gcController.bgScanCredit. Lower values give a more +// accurate assist ratio and make it more likely that assists will +// successfully steal background credit. Higher values reduce memory +// contention. +const gcCreditSlack = 2000 + +// gcAssistTimeSlack is the nanoseconds of mutator assist time that +// can accumulate on a P before updating gcController.assistTime. +const gcAssistTimeSlack = 5000 + +// gcOverAssistWork determines how many extra units of scan work a GC +// assist does when an assist happens. This amortizes the cost of an +// assist by pre-paying for this many bytes of future allocations. +const gcOverAssistWork = 64 << 10 + +var work struct { + full uint64 // lock-free list of full blocks workbuf + empty uint64 // lock-free list of empty blocks workbuf + pad0 [sys.CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait + + // bytesMarked is the number of bytes marked this cycle. This + // includes bytes blackened in scanned objects, noscan objects + // that go straight to black, and permagrey objects scanned by + // markroot during the concurrent scan phase. This is updated + // atomically during the cycle. Updates may be batched + // arbitrarily, since the value is only read at the end of the + // cycle. + // + // Because of benign races during marking, this number may not + // be the exact number of marked bytes, but it should be very + // close. + // + // Put this field here because it needs 64-bit atomic access + // (and thus 8-byte alignment even on 32-bit architectures). + bytesMarked uint64 + + markrootNext uint32 // next markroot job + markrootJobs uint32 // number of markroot jobs + + nproc uint32 + tstart int64 + nwait uint32 + ndone uint32 + alldone note + + // helperDrainBlock indicates that GC mark termination helpers + // should pass gcDrainBlock to gcDrain to block in the + // getfull() barrier. Otherwise, they should pass gcDrainNoBlock. + // + // TODO: This is a temporary fallback to support + // debug.gcrescanstacks > 0 and to work around some known + // races. Remove this when we remove the debug option and fix + // the races. + helperDrainBlock bool + + // Number of roots of various root types. Set by gcMarkRootPrepare. + nFlushCacheRoots int + nDataRoots, nSpanRoots, nStackRoots, nRescanRoots int + + // markrootDone indicates that roots have been marked at least + // once during the current GC cycle. This is checked by root + // marking operations that have to happen only during the + // first root marking pass, whether that's during the + // concurrent mark phase in current GC or mark termination in + // STW GC. + markrootDone bool + + // Each type of GC state transition is protected by a lock. + // Since multiple threads can simultaneously detect the state + // transition condition, any thread that detects a transition + // condition must acquire the appropriate transition lock, + // re-check the transition condition and return if it no + // longer holds or perform the transition if it does. + // Likewise, any transition must invalidate the transition + // condition before releasing the lock. This ensures that each + // transition is performed by exactly one thread and threads + // that need the transition to happen block until it has + // happened. + // + // startSema protects the transition from "off" to mark or + // mark termination. + startSema uint32 + // markDoneSema protects transitions from mark 1 to mark 2 and + // from mark 2 to mark termination. + markDoneSema uint32 + + bgMarkReady note // signal background mark worker has started + bgMarkDone uint32 // cas to 1 when at a background mark completion point + // Background mark completion signaling + + // mode is the concurrency mode of the current GC cycle. + mode gcMode + + // totaltime is the CPU nanoseconds spent in GC since the + // program started if debug.gctrace > 0. + totaltime int64 + + // initialHeapLive is the value of memstats.heap_live at the + // beginning of this GC cycle. + initialHeapLive uint64 + + // assistQueue is a queue of assists that are blocked because + // there was neither enough credit to steal or enough work to + // do. + assistQueue struct { + lock mutex + head, tail guintptr + } + + // rescan is a list of G's that need to be rescanned during + // mark termination. A G adds itself to this list when it + // first invalidates its stack scan. + rescan struct { + lock mutex + list []guintptr + } + + // Timing/utilization stats for this cycle. + stwprocs, maxprocs int32 + tSweepTerm, tMark, tMarkTerm, tEnd int64 // nanotime() of phase start + + pauseNS int64 // total STW time this cycle + pauseStart int64 // nanotime() of last STW + + // debug.gctrace heap sizes for this cycle. + heap0, heap1, heap2, heapGoal uint64 +} + +// GC runs a garbage collection and blocks the caller until the +// garbage collection is complete. It may also block the entire +// program. +func GC() { + gcStart(gcForceBlockMode, false) +} + +// gcMode indicates how concurrent a GC cycle should be. +type gcMode int + +const ( + gcBackgroundMode gcMode = iota // concurrent GC and sweep + gcForceMode // stop-the-world GC now, concurrent sweep + gcForceBlockMode // stop-the-world GC now and STW sweep (forced by user) +) + +// gcShouldStart returns true if the exit condition for the _GCoff +// phase has been met. The exit condition should be tested when +// allocating. +// +// If forceTrigger is true, it ignores the current heap size, but +// checks all other conditions. In general this should be false. +func gcShouldStart(forceTrigger bool) bool { + return gcphase == _GCoff && (forceTrigger || memstats.heap_live >= memstats.gc_trigger) && memstats.enablegc && panicking == 0 && gcpercent >= 0 +} + +// gcStart transitions the GC from _GCoff to _GCmark (if mode == +// gcBackgroundMode) or _GCmarktermination (if mode != +// gcBackgroundMode) by performing sweep termination and GC +// initialization. +// +// This may return without performing this transition in some cases, +// such as when called on a system stack or with locks held. +func gcStart(mode gcMode, forceTrigger bool) { + // Since this is called from malloc and malloc is called in + // the guts of a number of libraries that might be holding + // locks, don't attempt to start GC in non-preemptible or + // potentially unstable situations. + mp := acquirem() + if gp := getg(); gp == mp.g0 || mp.locks > 1 || mp.preemptoff != "" { + releasem(mp) + return + } + releasem(mp) + mp = nil + + // Pick up the remaining unswept/not being swept spans concurrently + // + // This shouldn't happen if we're being invoked in background + // mode since proportional sweep should have just finished + // sweeping everything, but rounding errors, etc, may leave a + // few spans unswept. In forced mode, this is necessary since + // GC can be forced at any point in the sweeping cycle. + // + // We check the transition condition continuously here in case + // this G gets delayed in to the next GC cycle. + for (mode != gcBackgroundMode || gcShouldStart(forceTrigger)) && gosweepone() != ^uintptr(0) { + sweep.nbgsweep++ + } + + // Perform GC initialization and the sweep termination + // transition. + // + // If this is a forced GC, don't acquire the transition lock + // or re-check the transition condition because we + // specifically *don't* want to share the transition with + // another thread. + useStartSema := mode == gcBackgroundMode + if useStartSema { + semacquire(&work.startSema, 0) + // Re-check transition condition under transition lock. + if !gcShouldStart(forceTrigger) { + semrelease(&work.startSema) + return + } + } + + // For stats, check if this GC was forced by the user. + forced := mode != gcBackgroundMode + + // In gcstoptheworld debug mode, upgrade the mode accordingly. + // We do this after re-checking the transition condition so + // that multiple goroutines that detect the heap trigger don't + // start multiple STW GCs. + if mode == gcBackgroundMode { + if debug.gcstoptheworld == 1 { + mode = gcForceMode + } else if debug.gcstoptheworld == 2 { + mode = gcForceBlockMode + } + } + + // Ok, we're doing it! Stop everybody else + semacquire(&worldsema, 0) + + if trace.enabled { + traceGCStart() + } + + if mode == gcBackgroundMode { + gcBgMarkStartWorkers() + } + + gcResetMarkState() + + now := nanotime() + work.stwprocs, work.maxprocs = gcprocs(), gomaxprocs + work.tSweepTerm = now + work.heap0 = memstats.heap_live + work.pauseNS = 0 + work.mode = mode + + work.pauseStart = now + systemstack(stopTheWorldWithSema) + // Finish sweep before we start concurrent scan. + systemstack(func() { + finishsweep_m() + }) + // clearpools before we start the GC. If we wait they memory will not be + // reclaimed until the next GC cycle. + clearpools() + + if mode == gcBackgroundMode { // Do as much work concurrently as possible + gcController.startCycle() + work.heapGoal = memstats.next_gc + + // Enter concurrent mark phase and enable + // write barriers. + // + // Because the world is stopped, all Ps will + // observe that write barriers are enabled by + // the time we start the world and begin + // scanning. + // + // It's necessary to enable write barriers + // during the scan phase for several reasons: + // + // They must be enabled for writes to higher + // stack frames before we scan stacks and + // install stack barriers because this is how + // we track writes to inactive stack frames. + // (Alternatively, we could not install stack + // barriers over frame boundaries with + // up-pointers). + // + // They must be enabled before assists are + // enabled because they must be enabled before + // any non-leaf heap objects are marked. Since + // allocations are blocked until assists can + // happen, we want enable assists as early as + // possible. + setGCPhase(_GCmark) + + gcBgMarkPrepare() // Must happen before assist enable. + gcMarkRootPrepare() + + // Mark all active tinyalloc blocks. Since we're + // allocating from these, they need to be black like + // other allocations. The alternative is to blacken + // the tiny block on every allocation from it, which + // would slow down the tiny allocator. + gcMarkTinyAllocs() + + // At this point all Ps have enabled the write + // barrier, thus maintaining the no white to + // black invariant. Enable mutator assists to + // put back-pressure on fast allocating + // mutators. + atomic.Store(&gcBlackenEnabled, 1) + + // Assists and workers can start the moment we start + // the world. + gcController.markStartTime = now + + // Concurrent mark. + systemstack(startTheWorldWithSema) + now = nanotime() + work.pauseNS += now - work.pauseStart + work.tMark = now + } else { + t := nanotime() + work.tMark, work.tMarkTerm = t, t + work.heapGoal = work.heap0 + + if forced { + memstats.numforcedgc++ + } + + // Perform mark termination. This will restart the world. + gcMarkTermination() + } + + if useStartSema { + semrelease(&work.startSema) + } +} + +// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2 +// to mark termination. +// +// This should be called when all mark work has been drained. In mark +// 1, this includes all root marking jobs, global work buffers, and +// active work buffers in assists and background workers; however, +// work may still be cached in per-P work buffers. In mark 2, per-P +// caches are disabled. +// +// The calling context must be preemptible. +// +// Note that it is explicitly okay to have write barriers in this +// function because completion of concurrent mark is best-effort +// anyway. Any work created by write barriers here will be cleaned up +// by mark termination. +func gcMarkDone() { +top: + semacquire(&work.markDoneSema, 0) + + // Re-check transition condition under transition lock. + if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { + semrelease(&work.markDoneSema) + return + } + + // Disallow starting new workers so that any remaining workers + // in the current mark phase will drain out. + // + // TODO(austin): Should dedicated workers keep an eye on this + // and exit gcDrain promptly? + atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff) + atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff) + + if !gcBlackenPromptly { + // Transition from mark 1 to mark 2. + // + // The global work list is empty, but there can still be work + // sitting in the per-P work caches. + // Flush and disable work caches. + + // Disallow caching workbufs and indicate that we're in mark 2. + gcBlackenPromptly = true + + // Prevent completion of mark 2 until we've flushed + // cached workbufs. + atomic.Xadd(&work.nwait, -1) + + // GC is set up for mark 2. Let Gs blocked on the + // transition lock go while we flush caches. + semrelease(&work.markDoneSema) + + systemstack(func() { + // Flush all currently cached workbufs and + // ensure all Ps see gcBlackenPromptly. This + // also blocks until any remaining mark 1 + // workers have exited their loop so we can + // start new mark 2 workers. + forEachP(func(_p_ *p) { + _p_.gcw.dispose() + }) + }) + + // Check that roots are marked. We should be able to + // do this before the forEachP, but based on issue + // #16083 there may be a (harmless) race where we can + // enter mark 2 while some workers are still scanning + // stacks. The forEachP ensures these scans are done. + // + // TODO(austin): Figure out the race and fix this + // properly. + gcMarkRootCheck() + + // Now we can start up mark 2 workers. + atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff) + atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff) + + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + // This loop will make progress because + // gcBlackenPromptly is now true, so it won't + // take this same "if" branch. + goto top + } + } else { + // Transition to mark termination. + now := nanotime() + work.tMarkTerm = now + work.pauseStart = now + getg().m.preemptoff = "gcing" + systemstack(stopTheWorldWithSema) + // The gcphase is _GCmark, it will transition to _GCmarktermination + // below. The important thing is that the wb remains active until + // all marking is complete. This includes writes made by the GC. + + // Record that one root marking pass has completed. + work.markrootDone = true + + // Disable assists and background workers. We must do + // this before waking blocked assists. + atomic.Store(&gcBlackenEnabled, 0) + + // Wake all blocked assists. These will run when we + // start the world again. + gcWakeAllAssists() + + // Likewise, release the transition lock. Blocked + // workers and assists will run when we start the + // world again. + semrelease(&work.markDoneSema) + + // endCycle depends on all gcWork cache stats being + // flushed. This is ensured by mark 2. + gcController.endCycle() + + // Perform mark termination. This will restart the world. + gcMarkTermination() + } +} + +func gcMarkTermination() { + // World is stopped. + // Start marktermination which includes enabling the write barrier. + atomic.Store(&gcBlackenEnabled, 0) + gcBlackenPromptly = false + setGCPhase(_GCmarktermination) + + work.heap1 = memstats.heap_live + startTime := nanotime() + + mp := acquirem() + mp.preemptoff = "gcing" + _g_ := getg() + _g_.m.traceback = 2 + gp := _g_.m.curg + casgstatus(gp, _Grunning, _Gwaiting) + gp.waitreason = "garbage collection" + + // Run gc on the g0 stack. We do this so that the g stack + // we're currently running on will no longer change. Cuts + // the root set down a bit (g0 stacks are not scanned, and + // we don't need to scan gc's internal state). We also + // need to switch to g0 so we can shrink the stack. + systemstack(func() { + gcMark(startTime) + // Must return immediately. + // The outer function's stack may have moved + // during gcMark (it shrinks stacks, including the + // outer function's stack), so we must not refer + // to any of its variables. Return back to the + // non-system stack to pick up the new addresses + // before continuing. + }) + + systemstack(func() { + work.heap2 = work.bytesMarked + if debug.gccheckmark > 0 { + // Run a full stop-the-world mark using checkmark bits, + // to check that we didn't forget to mark anything during + // the concurrent mark process. + gcResetMarkState() + initCheckmarks() + gcMark(startTime) + clearCheckmarks() + } + + // marking is complete so we can turn the write barrier off + setGCPhase(_GCoff) + gcSweep(work.mode) + + if debug.gctrace > 1 { + startTime = nanotime() + // The g stacks have been scanned so + // they have gcscanvalid==true and gcworkdone==true. + // Reset these so that all stacks will be rescanned. + gcResetMarkState() + finishsweep_m() + + // Still in STW but gcphase is _GCoff, reset to _GCmarktermination + // At this point all objects will be found during the gcMark which + // does a complete STW mark and object scan. + setGCPhase(_GCmarktermination) + gcMark(startTime) + setGCPhase(_GCoff) // marking is done, turn off wb. + gcSweep(work.mode) + } + }) + + _g_.m.traceback = 0 + casgstatus(gp, _Gwaiting, _Grunning) + + if trace.enabled { + traceGCDone() + } + + // all done + mp.preemptoff = "" + + if gcphase != _GCoff { + throw("gc done but gcphase != _GCoff") + } + + // Update timing memstats + now, unixNow := nanotime(), unixnanotime() + work.pauseNS += now - work.pauseStart + work.tEnd = now + atomic.Store64(&memstats.last_gc, uint64(unixNow)) // must be Unix time to make sense to user + memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) + memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow) + memstats.pause_total_ns += uint64(work.pauseNS) + + // Update work.totaltime. + sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm) + // We report idle marking time below, but omit it from the + // overall utilization here since it's "free". + markCpu := gcController.assistTime + gcController.dedicatedMarkTime + gcController.fractionalMarkTime + markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm) + cycleCpu := sweepTermCpu + markCpu + markTermCpu + work.totaltime += cycleCpu + + // Compute overall GC CPU utilization. + totalCpu := sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs) + memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu) + + memstats.numgc++ + + // Reset sweep state. + sweep.nbgsweep = 0 + sweep.npausesweep = 0 + + systemstack(startTheWorldWithSema) + + // Update heap profile stats if gcSweep didn't do it. This is + // relatively expensive, so we don't want to do it while the + // world is stopped, but it needs to happen ASAP after + // starting the world to prevent too many allocations from the + // next cycle leaking in. It must happen before releasing + // worldsema since there are applications that do a + // runtime.GC() to update the heap profile and then + // immediately collect the profile. + if _ConcurrentSweep && work.mode != gcForceBlockMode { + mProf_GC() + } + + // Print gctrace before dropping worldsema. As soon as we drop + // worldsema another cycle could start and smash the stats + // we're trying to print. + if debug.gctrace > 0 { + util := int(memstats.gc_cpu_fraction * 100) + + var sbuf [24]byte + printlock() + print("gc ", memstats.numgc, + " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", + util, "%: ") + prev := work.tSweepTerm + for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { + if i != 0 { + print("+") + } + print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev)))) + prev = ns + } + print(" ms clock, ") + for i, ns := range []int64{sweepTermCpu, gcController.assistTime, gcController.dedicatedMarkTime + gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} { + if i == 2 || i == 3 { + // Separate mark time components with /. + print("/") + } else if i != 0 { + print("+") + } + print(string(fmtNSAsMS(sbuf[:], uint64(ns)))) + } + print(" ms cpu, ", + work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ", + work.heapGoal>>20, " MB goal, ", + work.maxprocs, " P") + if work.mode != gcBackgroundMode { + print(" (forced)") + } + print("\n") + printunlock() + } + + semrelease(&worldsema) + // Careful: another GC cycle may start now. + + releasem(mp) + mp = nil + + // now that gc is done, kick off finalizer thread if needed + if !concurrentSweep { + // give the queued finalizers, if any, a chance to run + Gosched() + } +} + +// gcBgMarkStartWorkers prepares background mark worker goroutines. +// These goroutines will not run until the mark phase, but they must +// be started while the work is not stopped and from a regular G +// stack. The caller must hold worldsema. +func gcBgMarkStartWorkers() { + // Background marking is performed by per-P G's. Ensure that + // each P has a background GC G. + for _, p := range &allp { + if p == nil || p.status == _Pdead { + break + } + if p.gcBgMarkWorker == 0 { + go gcBgMarkWorker(p) + notetsleepg(&work.bgMarkReady, -1) + noteclear(&work.bgMarkReady) + } + } +} + +// gcBgMarkPrepare sets up state for background marking. +// Mutator assists must not yet be enabled. +func gcBgMarkPrepare() { + // Background marking will stop when the work queues are empty + // and there are no more workers (note that, since this is + // concurrent, this may be a transient state, but mark + // termination will clean it up). Between background workers + // and assists, we don't really know how many workers there + // will be, so we pretend to have an arbitrarily large number + // of workers, almost all of which are "waiting". While a + // worker is working it decrements nwait. If nproc == nwait, + // there are no workers. + work.nproc = ^uint32(0) + work.nwait = ^uint32(0) +} + +func gcBgMarkWorker(_p_ *p) { + gp := getg() + + type parkInfo struct { + m muintptr // Release this m on park. + attach puintptr // If non-nil, attach to this p on park. + } + // We pass park to a gopark unlock function, so it can't be on + // the stack (see gopark). Prevent deadlock from recursively + // starting GC by disabling preemption. + gp.m.preemptoff = "GC worker init" + park := new(parkInfo) + gp.m.preemptoff = "" + + park.m.set(acquirem()) + park.attach.set(_p_) + // Inform gcBgMarkStartWorkers that this worker is ready. + // After this point, the background mark worker is scheduled + // cooperatively by gcController.findRunnable. Hence, it must + // never be preempted, as this would put it into _Grunnable + // and put it on a run queue. Instead, when the preempt flag + // is set, this puts itself into _Gwaiting to be woken up by + // gcController.findRunnable at the appropriate time. + notewakeup(&work.bgMarkReady) + + for { + // Go to sleep until woken by gcController.findRunnable. + // We can't releasem yet since even the call to gopark + // may be preempted. + gopark(func(g *g, parkp unsafe.Pointer) bool { + park := (*parkInfo)(parkp) + + // The worker G is no longer running, so it's + // now safe to allow preemption. + releasem(park.m.ptr()) + + // If the worker isn't attached to its P, + // attach now. During initialization and after + // a phase change, the worker may have been + // running on a different P. As soon as we + // attach, the owner P may schedule the + // worker, so this must be done after the G is + // stopped. + if park.attach != 0 { + p := park.attach.ptr() + park.attach.set(nil) + // cas the worker because we may be + // racing with a new worker starting + // on this P. + if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) { + // The P got a new worker. + // Exit this worker. + return false + } + } + return true + }, unsafe.Pointer(park), "GC worker (idle)", traceEvGoBlock, 0) + + // Loop until the P dies and disassociates this + // worker (the P may later be reused, in which case + // it will get a new worker) or we failed to associate. + if _p_.gcBgMarkWorker.ptr() != gp { + break + } + + // Disable preemption so we can use the gcw. If the + // scheduler wants to preempt us, we'll stop draining, + // dispose the gcw, and then preempt. + park.m.set(acquirem()) + + if gcBlackenEnabled == 0 { + throw("gcBgMarkWorker: blackening not enabled") + } + + startTime := nanotime() + + decnwait := atomic.Xadd(&work.nwait, -1) + if decnwait == work.nproc { + println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) + throw("work.nwait was > work.nproc") + } + + systemstack(func() { + // Mark our goroutine preemptible so its stack + // can be scanned. This lets two mark workers + // scan each other (otherwise, they would + // deadlock). We must not modify anything on + // the G stack. However, stack shrinking is + // disabled for mark workers, so it is safe to + // read from the G stack. + casgstatus(gp, _Grunning, _Gwaiting) + switch _p_.gcMarkWorkerMode { + default: + throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") + case gcMarkWorkerDedicatedMode: + gcDrain(&_p_.gcw, gcDrainNoBlock|gcDrainFlushBgCredit) + case gcMarkWorkerFractionalMode: + gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) + case gcMarkWorkerIdleMode: + gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) + } + casgstatus(gp, _Gwaiting, _Grunning) + }) + + // If we are nearing the end of mark, dispose + // of the cache promptly. We must do this + // before signaling that we're no longer + // working so that other workers can't observe + // no workers and no work while we have this + // cached, and before we compute done. + if gcBlackenPromptly { + _p_.gcw.dispose() + } + + // Account for time. + duration := nanotime() - startTime + switch _p_.gcMarkWorkerMode { + case gcMarkWorkerDedicatedMode: + atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) + atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) + case gcMarkWorkerFractionalMode: + atomic.Xaddint64(&gcController.fractionalMarkTime, duration) + atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 1) + case gcMarkWorkerIdleMode: + atomic.Xaddint64(&gcController.idleMarkTime, duration) + } + + // Was this the last worker and did we run out + // of work? + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait > work.nproc { + println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode, + "work.nwait=", incnwait, "work.nproc=", work.nproc) + throw("work.nwait > work.nproc") + } + + // If this worker reached a background mark completion + // point, signal the main GC goroutine. + if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + // Make this G preemptible and disassociate it + // as the worker for this P so + // findRunnableGCWorker doesn't try to + // schedule it. + _p_.gcBgMarkWorker.set(nil) + releasem(park.m.ptr()) + + gcMarkDone() + + // Disable preemption and prepare to reattach + // to the P. + // + // We may be running on a different P at this + // point, so we can't reattach until this G is + // parked. + park.m.set(acquirem()) + park.attach.set(_p_) + } + } +} + +// gcMarkWorkAvailable returns true if executing a mark worker +// on p is potentially useful. p may be nil, in which case it only +// checks the global sources of work. +func gcMarkWorkAvailable(p *p) bool { + if p != nil && !p.gcw.empty() { + return true + } + if atomic.Load64(&work.full) != 0 { + return true // global work available + } + if work.markrootNext < work.markrootJobs { + return true // root scan work available + } + return false +} + +// gcMark runs the mark (or, for concurrent GC, mark termination) +// All gcWork caches must be empty. +// STW is in effect at this point. +//TODO go:nowritebarrier +func gcMark(start_time int64) { + if debug.allocfreetrace > 0 { + tracegc() + } + + if gcphase != _GCmarktermination { + throw("in gcMark expecting to see gcphase as _GCmarktermination") + } + work.tstart = start_time + + // Queue root marking jobs. + gcMarkRootPrepare() + + work.nwait = 0 + work.ndone = 0 + work.nproc = uint32(gcprocs()) + + if debug.gcrescanstacks == 0 && work.full == 0 && work.nDataRoots+work.nSpanRoots+work.nStackRoots+work.nRescanRoots == 0 { + // There's no work on the work queue and no root jobs + // that can produce work, so don't bother entering the + // getfull() barrier. + // + // With the hybrid barrier enabled, this will be the + // situation the vast majority of the time after + // concurrent mark. However, we still need a fallback + // for STW GC and because there are some known races + // that occasionally leave work around for mark + // termination. + // + // We're still hedging our bets here: if we do + // accidentally produce some work, we'll still process + // it, just not necessarily in parallel. + // + // TODO(austin): When we eliminate + // debug.gcrescanstacks: fix the races, and remove + // work draining from mark termination so we don't + // need the fallback path. + work.helperDrainBlock = false + } else { + work.helperDrainBlock = true + } + + if trace.enabled { + traceGCScanStart() + } + + if work.nproc > 1 { + noteclear(&work.alldone) + helpgc(int32(work.nproc)) + } + + gchelperstart() + + gcw := &getg().m.p.ptr().gcw + if work.helperDrainBlock { + gcDrain(gcw, gcDrainBlock) + } else { + gcDrain(gcw, gcDrainNoBlock) + } + gcw.dispose() + + if debug.gccheckmark > 0 { + // This is expensive when there's a large number of + // Gs, so only do it if checkmark is also enabled. + gcMarkRootCheck() + } + if work.full != 0 { + throw("work.full != 0") + } + + if work.nproc > 1 { + notesleep(&work.alldone) + } + + // Record that at least one root marking pass has completed. + work.markrootDone = true + + // Double-check that all gcWork caches are empty. This should + // be ensured by mark 2 before we enter mark termination. + for i := 0; i < int(gomaxprocs); i++ { + gcw := &allp[i].gcw + if !gcw.empty() { + throw("P has cached GC work at end of mark termination") + } + if gcw.scanWork != 0 || gcw.bytesMarked != 0 { + throw("P has unflushed stats at end of mark termination") + } + } + + if trace.enabled { + traceGCScanDone() + } + + cachestats() + + // Update the marked heap stat. + memstats.heap_marked = work.bytesMarked + + // Trigger the next GC cycle when the allocated heap has grown + // by triggerRatio over the marked heap size. Assume that + // we're in steady state, so the marked heap size is the + // same now as it was at the beginning of the GC cycle. + memstats.gc_trigger = uint64(float64(memstats.heap_marked) * (1 + gcController.triggerRatio)) + if memstats.gc_trigger < heapminimum { + memstats.gc_trigger = heapminimum + } + if int64(memstats.gc_trigger) < 0 { + print("next_gc=", memstats.next_gc, " bytesMarked=", work.bytesMarked, " heap_live=", memstats.heap_live, " initialHeapLive=", work.initialHeapLive, "\n") + throw("gc_trigger underflow") + } + + // Update other GC heap size stats. This must happen after + // cachestats (which flushes local statistics to these) and + // flushallmcaches (which modifies heap_live). + memstats.heap_live = work.bytesMarked + memstats.heap_scan = uint64(gcController.scanWork) + + minTrigger := memstats.heap_live + sweepMinHeapDistance*uint64(gcpercent)/100 + if memstats.gc_trigger < minTrigger { + // The allocated heap is already past the trigger. + // This can happen if the triggerRatio is very low and + // the marked heap is less than the live heap size. + // + // Concurrent sweep happens in the heap growth from + // heap_live to gc_trigger, so bump gc_trigger up to ensure + // that concurrent sweep has some heap growth in which + // to perform sweeping before we start the next GC + // cycle. + memstats.gc_trigger = minTrigger + } + + // The next GC cycle should finish before the allocated heap + // has grown by GOGC/100. + memstats.next_gc = memstats.heap_marked + memstats.heap_marked*uint64(gcpercent)/100 + if gcpercent < 0 { + memstats.next_gc = ^uint64(0) + } + if memstats.next_gc < memstats.gc_trigger { + memstats.next_gc = memstats.gc_trigger + } + + if trace.enabled { + traceHeapAlloc() + traceNextGC() + } +} + +func gcSweep(mode gcMode) { + if gcphase != _GCoff { + throw("gcSweep being done but phase is not GCoff") + } + + lock(&mheap_.lock) + mheap_.sweepgen += 2 + mheap_.sweepdone = 0 + if mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { + // We should have drained this list during the last + // sweep phase. We certainly need to start this phase + // with an empty swept list. + throw("non-empty swept list") + } + unlock(&mheap_.lock) + + if !_ConcurrentSweep || mode == gcForceBlockMode { + // Special case synchronous sweep. + // Record that no proportional sweeping has to happen. + lock(&mheap_.lock) + mheap_.sweepPagesPerByte = 0 + mheap_.pagesSwept = 0 + unlock(&mheap_.lock) + // Sweep all spans eagerly. + for sweepone() != ^uintptr(0) { + sweep.npausesweep++ + } + // Do an additional mProf_GC, because all 'free' events are now real as well. + mProf_GC() + mProf_GC() + return + } + + // Concurrent sweep needs to sweep all of the in-use pages by + // the time the allocated heap reaches the GC trigger. Compute + // the ratio of in-use pages to sweep per byte allocated. + heapDistance := int64(memstats.gc_trigger) - int64(memstats.heap_live) + // Add a little margin so rounding errors and concurrent + // sweep are less likely to leave pages unswept when GC starts. + heapDistance -= 1024 * 1024 + if heapDistance < _PageSize { + // Avoid setting the sweep ratio extremely high + heapDistance = _PageSize + } + lock(&mheap_.lock) + mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance) + mheap_.pagesSwept = 0 + mheap_.spanBytesAlloc = 0 + unlock(&mheap_.lock) + + // Background sweep. + lock(&sweep.lock) + if sweep.parked { + sweep.parked = false + ready(sweep.g, 0, true) + } + unlock(&sweep.lock) +} + +// gcResetMarkState resets global state prior to marking (concurrent +// or STW) and resets the stack scan state of all Gs. +// +// This is safe to do without the world stopped because any Gs created +// during or after this will start out in the reset state. +func gcResetMarkState() { + // This may be called during a concurrent phase, so make sure + // allgs doesn't change. + if !(gcphase == _GCoff || gcphase == _GCmarktermination) { + // Accessing gcRescan is unsafe. + throw("bad GC phase") + } + lock(&allglock) + for _, gp := range allgs { + gp.gcscandone = false // set to true in gcphasework + gp.gcscanvalid = false // stack has not been scanned + gp.gcRescan = -1 + gp.gcAssistBytes = 0 + } + unlock(&allglock) + + // Clear rescan list. + work.rescan.list = work.rescan.list[:0] + + work.bytesMarked = 0 + work.initialHeapLive = memstats.heap_live + work.markrootDone = false +} + +// Hooks for other packages + +var poolcleanup func() + +//go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup +func sync_runtime_registerPoolCleanup(f func()) { + poolcleanup = f +} + +func clearpools() { + // clear sync.Pools + if poolcleanup != nil { + poolcleanup() + } + + // Clear central sudog cache. + // Leave per-P caches alone, they have strictly bounded size. + // Disconnect cached list before dropping it on the floor, + // so that a dangling ref to one entry does not pin all of them. + lock(&sched.sudoglock) + var sg, sgnext *sudog + for sg = sched.sudogcache; sg != nil; sg = sgnext { + sgnext = sg.next + sg.next = nil + } + sched.sudogcache = nil + unlock(&sched.sudoglock) + + // Clear central defer pools. + // Leave per-P pools alone, they have strictly bounded size. + lock(&sched.deferlock) + // disconnect cached list before dropping it on the floor, + // so that a dangling ref to one entry does not pin all of them. + var d, dlink *_defer + for d = sched.deferpool; d != nil; d = dlink { + dlink = d.link + d.link = nil + } + sched.deferpool = nil + unlock(&sched.deferlock) +} + +// Timing + +//go:nowritebarrier +func gchelper() { + _g_ := getg() + _g_.m.traceback = 2 + gchelperstart() + + if trace.enabled { + traceGCScanStart() + } + + // Parallel mark over GC roots and heap + if gcphase == _GCmarktermination { + gcw := &_g_.m.p.ptr().gcw + if work.helperDrainBlock { + gcDrain(gcw, gcDrainBlock) // blocks in getfull + } else { + gcDrain(gcw, gcDrainNoBlock) + } + gcw.dispose() + } + + if trace.enabled { + traceGCScanDone() + } + + nproc := work.nproc // work.nproc can change right after we increment work.ndone + if atomic.Xadd(&work.ndone, +1) == nproc-1 { + notewakeup(&work.alldone) + } + _g_.m.traceback = 0 +} + +func gchelperstart() { + _g_ := getg() + + if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc { + throw("gchelperstart: bad m->helpgc") + } + // For gccgo we run gchelper on the normal g stack. + // if _g_ != _g_.m.g0 { + // throw("gchelper not running on g0 stack") + // } +} + +// itoaDiv formats val/(10**dec) into buf. +func itoaDiv(buf []byte, val uint64, dec int) []byte { + i := len(buf) - 1 + idec := i - dec + for val >= 10 || i >= idec { + buf[i] = byte(val%10 + '0') + i-- + if i == idec { + buf[i] = '.' + i-- + } + val /= 10 + } + buf[i] = byte(val + '0') + return buf[i:] +} + +// fmtNSAsMS nicely formats ns nanoseconds as milliseconds. +func fmtNSAsMS(buf []byte, ns uint64) []byte { + if ns >= 10e6 { + // Format as whole milliseconds. + return itoaDiv(buf, ns/1e6, 0) + } + // Format two digits of precision, with at most three decimal places. + x := ns / 1e3 + if x == 0 { + buf[0] = '0' + return buf[:1] + } + dec := 3 + for x >= 100 { + x /= 10 + dec-- + } + return itoaDiv(buf, x, dec) +} diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go new file mode 100644 index 0000000..c1fa154 --- /dev/null +++ b/libgo/go/runtime/mgc_gccgo.go @@ -0,0 +1,87 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// gccgo-specific support for GC. + +package runtime + +import "unsafe" + +// gcRoot is a single GC root: a variable plus a ptrmask. +type gcRoot struct { + decl unsafe.Pointer // Pointer to variable. + size uintptr // Size of variable. + ptrdata uintptr // Length of gcdata. + gcdata *uint8 // Pointer mask. +} + +// gcRootList is the set of GC roots for a package. +// The next field is used to put this all into a linked list. +// count gives the real length of the array. +type gcRootList struct { + next *gcRootList + count int + roots [1 << 26]gcRoot +} + +// roots is the list of GC roots for the program. +// The compiler keeps this variable itself off the list. +var gcRoots *gcRootList + +// registerGCRoots is called by compiler-generated code. +//go:linkname registerGCRoots runtime.registerGCRoots + +// registerGCRoots is called by init functions to register the GC +// roots for a package. The init functions are run sequentially at +// the start of the program, so no locking is needed. +func registerGCRoots(r *gcRootList) { + r.next = gcRoots + gcRoots = r +} + +// checkPreempt is called when the preempt field in the running G is true. +// It preempts the goroutine if it is safe to do so. +// If preemptscan is true, this scans the stack for the garbage collector +// and carries on. +func checkPreempt() { + gp := getg() + if !gp.preempt || gp != gp.m.curg || gp.m.locks != 0 || gp.m.mallocing != 0 || gp.m.preemptoff != "" { + return + } + + // Synchronize with scang. + gp.scanningself = true + casgstatus(gp, _Grunning, _Gwaiting) + if gp.preemptscan { + for !castogscanstatus(gp, _Gwaiting, _Gscanwaiting) { + // Likely to be racing with the GC as + // it sees a _Gwaiting and does the + // stack scan. If so, gcworkdone will + // be set and gcphasework will simply + // return. + } + if !gp.gcscandone { + mp := acquirem() + gcw := &gp.m.p.ptr().gcw + scanstack(gp, gcw) + if gcBlackenPromptly { + gcw.dispose() + } + releasem(mp) + gp.gcscandone = true + } + gp.preemptscan = false + gp.preempt = false + casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting) + // This clears gcscanvalid. + casgstatus(gp, _Gwaiting, _Grunning) + gp.scanningself = false + return + } + + // Act like goroutine called runtime.Gosched. + casgstatus(gp, _Gwaiting, _Grunning) + gp.scanningself = false + mcall(gopreempt_m) +} diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go new file mode 100644 index 0000000..93252ba --- /dev/null +++ b/libgo/go/runtime/mgcmark.go @@ -0,0 +1,1374 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector: marking and scanning + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +const ( + fixedRootFinalizers = iota + fixedRootFreeGStacks + fixedRootCount + + // rootBlockBytes is the number of bytes to scan per data or + // BSS root. + rootBlockBytes = 256 << 10 + + // rootBlockSpans is the number of spans to scan per span + // root. + rootBlockSpans = 8 * 1024 // 64MB worth of spans + + // maxObletBytes is the maximum bytes of an object to scan at + // once. Larger objects will be split up into "oblets" of at + // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds + // scan preemption at ~100 µs. + // + // This must be > _MaxSmallSize so that the object base is the + // span base. + maxObletBytes = 128 << 10 + + // idleCheckThreshold specifies how many units of work to do + // between run queue checks in an idle worker. Assuming a scan + // rate of 1 MB/ms, this is ~100 µs. Lower values have higher + // overhead in the scan loop (the scheduler check may perform + // a syscall, so its overhead is nontrivial). Higher values + // make the system less responsive to incoming work. + idleCheckThreshold = 100000 +) + +// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and +// some miscellany) and initializes scanning-related state. +// +// The caller must have call gcCopySpans(). +// +// The world must be stopped. +// +//go:nowritebarrier +func gcMarkRootPrepare() { + if gcphase == _GCmarktermination { + work.nFlushCacheRoots = int(gomaxprocs) + } else { + work.nFlushCacheRoots = 0 + } + + work.nDataRoots = 0 + + // Only scan globals once per cycle; preferably concurrently. + if !work.markrootDone { + roots := gcRoots + for roots != nil { + work.nDataRoots++ + roots = roots.next + } + } + + if !work.markrootDone { + // On the first markroot, we need to scan span roots. + // In concurrent GC, this happens during concurrent + // mark and we depend on addfinalizer to ensure the + // above invariants for objects that get finalizers + // after concurrent mark. In STW GC, this will happen + // during mark termination. + // + // We're only interested in scanning the in-use spans, + // which will all be swept at this point. More spans + // may be added to this list during concurrent GC, but + // we only care about spans that were allocated before + // this mark phase. + work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() + + // On the first markroot, we need to scan all Gs. Gs + // may be created after this point, but it's okay that + // we ignore them because they begin life without any + // roots, so there's nothing to scan, and any roots + // they create during the concurrent phase will be + // scanned during mark termination. During mark + // termination, allglen isn't changing, so we'll scan + // all Gs. + work.nStackRoots = int(atomic.Loaduintptr(&allglen)) + work.nRescanRoots = 0 + } else { + // We've already scanned span roots and kept the scan + // up-to-date during concurrent mark. + work.nSpanRoots = 0 + + // On the second pass of markroot, we're just scanning + // dirty stacks. It's safe to access rescan since the + // world is stopped. + work.nStackRoots = 0 + work.nRescanRoots = len(work.rescan.list) + } + + work.markrootNext = 0 + work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nSpanRoots + work.nStackRoots + work.nRescanRoots) +} + +// gcMarkRootCheck checks that all roots have been scanned. It is +// purely for debugging. +func gcMarkRootCheck() { + if work.markrootNext < work.markrootJobs { + print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") + throw("left over markroot jobs") + } + + lock(&allglock) + // Check that stacks have been scanned. + var gp *g + if gcphase == _GCmarktermination && debug.gcrescanstacks > 0 { + for i := 0; i < len(allgs); i++ { + gp = allgs[i] + if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead { + goto fail + } + } + } else { + for i := 0; i < work.nStackRoots; i++ { + gp = allgs[i] + if !gp.gcscandone { + goto fail + } + } + } + unlock(&allglock) + return + +fail: + println("gp", gp, "goid", gp.goid, + "status", readgstatus(gp), + "gcscandone", gp.gcscandone, + "gcscanvalid", gp.gcscanvalid) + unlock(&allglock) // Avoid self-deadlock with traceback. + throw("scan missed a g") +} + +// ptrmask for an allocation containing a single pointer. +var oneptrmask = [...]uint8{1} + +// markroot scans the i'th root. +// +// Preemption must be disabled (because this uses a gcWork). +// +// nowritebarrier is only advisory here. +// +//go:nowritebarrier +func markroot(gcw *gcWork, i uint32) { + // TODO(austin): This is a bit ridiculous. Compute and store + // the bases in gcMarkRootPrepare instead of the counts. + baseFlushCache := uint32(fixedRootCount) + baseData := baseFlushCache + uint32(work.nFlushCacheRoots) + baseSpans := baseData + uint32(work.nDataRoots) + baseStacks := baseSpans + uint32(work.nSpanRoots) + baseRescan := baseStacks + uint32(work.nStackRoots) + end := baseRescan + uint32(work.nRescanRoots) + + // Note: if you add a case here, please also update heapdump.go:dumproots. + switch { + case baseFlushCache <= i && i < baseData: + flushmcache(int(i - baseFlushCache)) + + case baseData <= i && i < baseSpans: + roots := gcRoots + c := baseData + for roots != nil { + if i == c { + markrootBlock(roots, gcw) + break + } + roots = roots.next + c++ + } + + case i == fixedRootFinalizers: + for fb := allfin; fb != nil; fb = fb.alllink { + cnt := uintptr(atomic.Load(&fb.cnt)) + scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw) + } + + case i == fixedRootFreeGStacks: + // FIXME: We don't do this for gccgo. + + case baseSpans <= i && i < baseStacks: + // mark MSpan.specials + markrootSpans(gcw, int(i-baseSpans)) + + default: + // the rest is scanning goroutine stacks + var gp *g + if baseStacks <= i && i < baseRescan { + gp = allgs[i-baseStacks] + } else if baseRescan <= i && i < end { + gp = work.rescan.list[i-baseRescan].ptr() + if gp.gcRescan != int32(i-baseRescan) { + // Looking for issue #17099. + println("runtime: gp", gp, "found at rescan index", i-baseRescan, "but should be at", gp.gcRescan) + throw("bad g rescan index") + } + } else { + throw("markroot: bad index") + } + + // remember when we've first observed the G blocked + // needed only to output in traceback + status := readgstatus(gp) // We are not in a scan state + if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 { + gp.waitsince = work.tstart + } + + // scang must be done on the system stack in case + // we're trying to scan our own stack. + systemstack(func() { + // If this is a self-scan, put the user G in + // _Gwaiting to prevent self-deadlock. It may + // already be in _Gwaiting if this is a mark + // worker or we're in mark termination. + userG := getg().m.curg + selfScan := gp == userG && readgstatus(userG) == _Grunning + if selfScan { + casgstatus(userG, _Grunning, _Gwaiting) + userG.waitreason = "garbage collection scan" + } + + // TODO: scang blocks until gp's stack has + // been scanned, which may take a while for + // running goroutines. Consider doing this in + // two phases where the first is non-blocking: + // we scan the stacks we can and ask running + // goroutines to scan themselves; and the + // second blocks. + scang(gp, gcw) + + if selfScan { + casgstatus(userG, _Gwaiting, _Grunning) + } + }) + } +} + +// markrootBlock scans one element of the list of GC roots. +// +//go:nowritebarrier +func markrootBlock(roots *gcRootList, gcw *gcWork) { + for i := 0; i < roots.count; i++ { + r := &roots.roots[i] + scanblock(uintptr(r.decl), r.ptrdata, r.gcdata, gcw) + } +} + +// markrootSpans marks roots for one shard of work.spans. +// +//go:nowritebarrier +func markrootSpans(gcw *gcWork, shard int) { + // Objects with finalizers have two GC-related invariants: + // + // 1) Everything reachable from the object must be marked. + // This ensures that when we pass the object to its finalizer, + // everything the finalizer can reach will be retained. + // + // 2) Finalizer specials (which are not in the garbage + // collected heap) are roots. In practice, this means the fn + // field must be scanned. + // + // TODO(austin): There are several ideas for making this more + // efficient in issue #11485. + + if work.markrootDone { + throw("markrootSpans during second markroot") + } + + sg := mheap_.sweepgen + spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard) + // Note that work.spans may not include spans that were + // allocated between entering the scan phase and now. This is + // okay because any objects with finalizers in those spans + // must have been allocated and given finalizers after we + // entered the scan phase, so addfinalizer will have ensured + // the above invariants for them. + for _, s := range spans { + if s.state != mSpanInUse { + continue + } + if !useCheckmark && s.sweepgen != sg { + // sweepgen was updated (+2) during non-checkmark GC pass + print("sweep ", s.sweepgen, " ", sg, "\n") + throw("gc: unswept span") + } + + // Speculatively check if there are any specials + // without acquiring the span lock. This may race with + // adding the first special to a span, but in that + // case addfinalizer will observe that the GC is + // active (which is globally synchronized) and ensure + // the above invariants. We may also ensure the + // invariants, but it's okay to scan an object twice. + if s.specials == nil { + continue + } + + // Lock the specials to prevent a special from being + // removed from the list while we're traversing it. + lock(&s.speciallock) + + for sp := s.specials; sp != nil; sp = sp.next { + if sp.kind != _KindSpecialFinalizer { + continue + } + // don't mark finalized object, but scan it so we + // retain everything it points to. + spf := (*specialfinalizer)(unsafe.Pointer(sp)) + // A finalizer can be set for an inner byte of an object, find object beginning. + p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize + + // Mark everything that can be reached from + // the object (but *not* the object itself or + // we'll never collect it). + scanobject(p, gcw) + + // The special itself is a root. + scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw) + } + + unlock(&s.speciallock) + } +} + +// gcAssistAlloc performs GC work to make gp's assist debt positive. +// gp must be the calling user gorountine. +// +// This must be called with preemption enabled. +func gcAssistAlloc(gp *g) { + // Don't assist in non-preemptible contexts. These are + // generally fragile and won't allow the assist to block. + if getg() == gp.m.g0 { + return + } + if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" { + return + } + +retry: + // Compute the amount of scan work we need to do to make the + // balance positive. When the required amount of work is low, + // we over-assist to build up credit for future allocations + // and amortize the cost of assisting. + debtBytes := -gp.gcAssistBytes + scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) + if scanWork < gcOverAssistWork { + scanWork = gcOverAssistWork + debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork)) + } + + // Steal as much credit as we can from the background GC's + // scan credit. This is racy and may drop the background + // credit below 0 if two mutators steal at the same time. This + // will just cause steals to fail until credit is accumulated + // again, so in the long run it doesn't really matter, but we + // do have to handle the negative credit case. + bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit) + stolen := int64(0) + if bgScanCredit > 0 { + if bgScanCredit < scanWork { + stolen = bgScanCredit + gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) + } else { + stolen = scanWork + gp.gcAssistBytes += debtBytes + } + atomic.Xaddint64(&gcController.bgScanCredit, -stolen) + + scanWork -= stolen + + if scanWork == 0 { + // We were able to steal all of the credit we + // needed. + return + } + } + + // Perform assist work + systemstack(func() { + gcAssistAlloc1(gp, scanWork) + // The user stack may have moved, so this can't touch + // anything on it until it returns from systemstack. + }) + + completed := gp.param != nil + gp.param = nil + if completed { + gcMarkDone() + } + + if gp.gcAssistBytes < 0 { + // We were unable steal enough credit or perform + // enough work to pay off the assist debt. We need to + // do one of these before letting the mutator allocate + // more to prevent over-allocation. + // + // If this is because we were preempted, reschedule + // and try some more. + if gp.preempt { + Gosched() + goto retry + } + + // Add this G to an assist queue and park. When the GC + // has more background credit, it will satisfy queued + // assists before flushing to the global credit pool. + // + // Note that this does *not* get woken up when more + // work is added to the work list. The theory is that + // there wasn't enough work to do anyway, so we might + // as well let background marking take care of the + // work that is available. + if !gcParkAssist() { + goto retry + } + + // At this point either background GC has satisfied + // this G's assist debt, or the GC cycle is over. + } +} + +// gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system +// stack. This is a separate function to make it easier to see that +// we're not capturing anything from the user stack, since the user +// stack may move while we're in this function. +// +// gcAssistAlloc1 indicates whether this assist completed the mark +// phase by setting gp.param to non-nil. This can't be communicated on +// the stack since it may move. +// +//go:systemstack +func gcAssistAlloc1(gp *g, scanWork int64) { + // Clear the flag indicating that this assist completed the + // mark phase. + gp.param = nil + + if atomic.Load(&gcBlackenEnabled) == 0 { + // The gcBlackenEnabled check in malloc races with the + // store that clears it but an atomic check in every malloc + // would be a performance hit. + // Instead we recheck it here on the non-preemptable system + // stack to determine if we should preform an assist. + + // GC is done, so ignore any remaining debt. + gp.gcAssistBytes = 0 + return + } + // Track time spent in this assist. Since we're on the + // system stack, this is non-preemptible, so we can + // just measure start and end time. + startTime := nanotime() + + decnwait := atomic.Xadd(&work.nwait, -1) + if decnwait == work.nproc { + println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) + throw("nwait > work.nprocs") + } + + // gcDrainN requires the caller to be preemptible. + casgstatus(gp, _Grunning, _Gwaiting) + gp.waitreason = "GC assist marking" + + // drain own cached work first in the hopes that it + // will be more cache friendly. + gcw := &getg().m.p.ptr().gcw + workDone := gcDrainN(gcw, scanWork) + // If we are near the end of the mark phase + // dispose of the gcw. + if gcBlackenPromptly { + gcw.dispose() + } + + casgstatus(gp, _Gwaiting, _Grunning) + + // Record that we did this much scan work. + // + // Back out the number of bytes of assist credit that + // this scan work counts for. The "1+" is a poor man's + // round-up, to ensure this adds credit even if + // assistBytesPerWork is very low. + gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) + + // If this is the last worker and we ran out of work, + // signal a completion point. + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait > work.nproc { + println("runtime: work.nwait=", incnwait, + "work.nproc=", work.nproc, + "gcBlackenPromptly=", gcBlackenPromptly) + throw("work.nwait > work.nproc") + } + + if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + // This has reached a background completion point. Set + // gp.param to a non-nil value to indicate this. It + // doesn't matter what we set it to (it just has to be + // a valid pointer). + gp.param = unsafe.Pointer(gp) + } + duration := nanotime() - startTime + _p_ := gp.m.p.ptr() + _p_.gcAssistTime += duration + if _p_.gcAssistTime > gcAssistTimeSlack { + atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime) + _p_.gcAssistTime = 0 + } +} + +// gcWakeAllAssists wakes all currently blocked assists. This is used +// at the end of a GC cycle. gcBlackenEnabled must be false to prevent +// new assists from going to sleep after this point. +func gcWakeAllAssists() { + lock(&work.assistQueue.lock) + injectglist(work.assistQueue.head.ptr()) + work.assistQueue.head.set(nil) + work.assistQueue.tail.set(nil) + unlock(&work.assistQueue.lock) +} + +// gcParkAssist puts the current goroutine on the assist queue and parks. +// +// gcParkAssist returns whether the assist is now satisfied. If it +// returns false, the caller must retry the assist. +// +//go:nowritebarrier +func gcParkAssist() bool { + lock(&work.assistQueue.lock) + // If the GC cycle finished while we were getting the lock, + // exit the assist. The cycle can't finish while we hold the + // lock. + if atomic.Load(&gcBlackenEnabled) == 0 { + unlock(&work.assistQueue.lock) + return true + } + + gp := getg() + oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail + if oldHead == 0 { + work.assistQueue.head.set(gp) + } else { + oldTail.ptr().schedlink.set(gp) + } + work.assistQueue.tail.set(gp) + gp.schedlink.set(nil) + + // Recheck for background credit now that this G is in + // the queue, but can still back out. This avoids a + // race in case background marking has flushed more + // credit since we checked above. + if atomic.Loadint64(&gcController.bgScanCredit) > 0 { + work.assistQueue.head = oldHead + work.assistQueue.tail = oldTail + if oldTail != 0 { + oldTail.ptr().schedlink.set(nil) + } + unlock(&work.assistQueue.lock) + return false + } + // Park. + goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlockGC, 2) + return true +} + +// gcFlushBgCredit flushes scanWork units of background scan work +// credit. This first satisfies blocked assists on the +// work.assistQueue and then flushes any remaining credit to +// gcController.bgScanCredit. +// +// Write barriers are disallowed because this is used by gcDrain after +// it has ensured that all work is drained and this must preserve that +// condition. +// +//go:nowritebarrierrec +func gcFlushBgCredit(scanWork int64) { + if work.assistQueue.head == 0 { + // Fast path; there are no blocked assists. There's a + // small window here where an assist may add itself to + // the blocked queue and park. If that happens, we'll + // just get it on the next flush. + atomic.Xaddint64(&gcController.bgScanCredit, scanWork) + return + } + + scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) + + lock(&work.assistQueue.lock) + gp := work.assistQueue.head.ptr() + for gp != nil && scanBytes > 0 { + // Note that gp.gcAssistBytes is negative because gp + // is in debt. Think carefully about the signs below. + if scanBytes+gp.gcAssistBytes >= 0 { + // Satisfy this entire assist debt. + scanBytes += gp.gcAssistBytes + gp.gcAssistBytes = 0 + xgp := gp + gp = gp.schedlink.ptr() + // It's important that we *not* put xgp in + // runnext. Otherwise, it's possible for user + // code to exploit the GC worker's high + // scheduler priority to get itself always run + // before other goroutines and always in the + // fresh quantum started by GC. + ready(xgp, 0, false) + } else { + // Partially satisfy this assist. + gp.gcAssistBytes += scanBytes + scanBytes = 0 + // As a heuristic, we move this assist to the + // back of the queue so that large assists + // can't clog up the assist queue and + // substantially delay small assists. + xgp := gp + gp = gp.schedlink.ptr() + if gp == nil { + // gp is the only assist in the queue. + gp = xgp + } else { + xgp.schedlink = 0 + work.assistQueue.tail.ptr().schedlink.set(xgp) + work.assistQueue.tail.set(xgp) + } + break + } + } + work.assistQueue.head.set(gp) + if gp == nil { + work.assistQueue.tail.set(nil) + } + + if scanBytes > 0 { + // Convert from scan bytes back to work. + scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) + atomic.Xaddint64(&gcController.bgScanCredit, scanWork) + } + unlock(&work.assistQueue.lock) +} + +// We use a C function to find the stack. +func doscanstack(*g, *gcWork) + +// scanstack scans gp's stack, greying all pointers found on the stack. +// +// During mark phase, it also installs stack barriers while traversing +// gp's stack. During mark termination, it stops scanning when it +// reaches an unhit stack barrier. +// +// scanstack is marked go:systemstack because it must not be preempted +// while using a workbuf. +// +//go:nowritebarrier +//go:systemstack +func scanstack(gp *g, gcw *gcWork) { + if gp.gcscanvalid { + return + } + + if readgstatus(gp)&_Gscan == 0 { + print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n") + throw("scanstack - bad status") + } + + switch readgstatus(gp) &^ _Gscan { + default: + print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") + throw("mark - bad status") + case _Gdead: + return + case _Grunning: + // ok for gccgo, though not for gc. + case _Grunnable, _Gsyscall, _Gwaiting: + // ok + } + + mp := gp.m + if mp != nil && mp.helpgc != 0 { + throw("can't scan gchelper stack") + } + + // Scan the stack. + doscanstack(gp, gcw) + + // Conservatively scan the saved register values. + scanstackblock(uintptr(unsafe.Pointer(&gp.gcregs)), unsafe.Sizeof(gp.gcregs), gcw) + scanstackblock(uintptr(unsafe.Pointer(&gp.context)), unsafe.Sizeof(gp.context), gcw) + + if gcphase == _GCmark { + // gp may have added itself to the rescan list between + // when GC started and now. It's clean now, so remove + // it. This isn't safe during mark termination because + // mark termination is consuming this list, but it's + // also not necessary. + dequeueRescan(gp) + } + gp.gcscanvalid = true +} + +// queueRescan adds gp to the stack rescan list and clears +// gp.gcscanvalid. The caller must own gp and ensure that gp isn't +// already on the rescan list. +func queueRescan(gp *g) { + if debug.gcrescanstacks == 0 { + // Clear gcscanvalid to keep assertions happy. + // + // TODO: Remove gcscanvalid entirely when we remove + // stack rescanning. + gp.gcscanvalid = false + return + } + + if gcphase == _GCoff { + gp.gcscanvalid = false + return + } + if gp.gcRescan != -1 { + throw("g already on rescan list") + } + + lock(&work.rescan.lock) + gp.gcscanvalid = false + + // Recheck gcphase under the lock in case there was a phase change. + if gcphase == _GCoff { + unlock(&work.rescan.lock) + return + } + if len(work.rescan.list) == cap(work.rescan.list) { + throw("rescan list overflow") + } + n := len(work.rescan.list) + gp.gcRescan = int32(n) + work.rescan.list = work.rescan.list[:n+1] + work.rescan.list[n].set(gp) + unlock(&work.rescan.lock) +} + +// dequeueRescan removes gp from the stack rescan list, if gp is on +// the rescan list. The caller must own gp. +func dequeueRescan(gp *g) { + if debug.gcrescanstacks == 0 { + return + } + + if gp.gcRescan == -1 { + return + } + if gcphase == _GCoff { + gp.gcRescan = -1 + return + } + + lock(&work.rescan.lock) + if work.rescan.list[gp.gcRescan].ptr() != gp { + throw("bad dequeueRescan") + } + // Careful: gp may itself be the last G on the list. + last := work.rescan.list[len(work.rescan.list)-1] + work.rescan.list[gp.gcRescan] = last + last.ptr().gcRescan = gp.gcRescan + gp.gcRescan = -1 + work.rescan.list = work.rescan.list[:len(work.rescan.list)-1] + unlock(&work.rescan.lock) +} + +type gcDrainFlags int + +const ( + gcDrainUntilPreempt gcDrainFlags = 1 << iota + gcDrainNoBlock + gcDrainFlushBgCredit + gcDrainIdle + + // gcDrainBlock means neither gcDrainUntilPreempt or + // gcDrainNoBlock. It is the default, but callers should use + // the constant for documentation purposes. + gcDrainBlock gcDrainFlags = 0 +) + +// gcDrain scans roots and objects in work buffers, blackening grey +// objects until all roots and work buffers have been drained. +// +// If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt +// is set. This implies gcDrainNoBlock. +// +// If flags&gcDrainIdle != 0, gcDrain returns when there is other work +// to do. This implies gcDrainNoBlock. +// +// If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is +// unable to get more work. Otherwise, it will block until all +// blocking calls are blocked in gcDrain. +// +// If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work +// credit to gcController.bgScanCredit every gcCreditSlack units of +// scan work. +// +//go:nowritebarrier +func gcDrain(gcw *gcWork, flags gcDrainFlags) { + if !writeBarrier.needed { + throw("gcDrain phase incorrect") + } + + gp := getg().m.curg + preemptible := flags&gcDrainUntilPreempt != 0 + blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0 + flushBgCredit := flags&gcDrainFlushBgCredit != 0 + idle := flags&gcDrainIdle != 0 + + initScanWork := gcw.scanWork + // idleCheck is the scan work at which to perform the next + // idle check with the scheduler. + idleCheck := initScanWork + idleCheckThreshold + + // Drain root marking jobs. + if work.markrootNext < work.markrootJobs { + for !(preemptible && gp.preempt) { + job := atomic.Xadd(&work.markrootNext, +1) - 1 + if job >= work.markrootJobs { + break + } + markroot(gcw, job) + if idle && pollWork() { + goto done + } + } + } + + // Drain heap marking jobs. + for !(preemptible && gp.preempt) { + // Try to keep work available on the global queue. We used to + // check if there were waiting workers, but it's better to + // just keep work available than to make workers wait. In the + // worst case, we'll do O(log(_WorkbufSize)) unnecessary + // balances. + if work.full == 0 { + gcw.balance() + } + + var b uintptr + if blocking { + b = gcw.get() + } else { + b = gcw.tryGetFast() + if b == 0 { + b = gcw.tryGet() + } + } + if b == 0 { + // work barrier reached or tryGet failed. + break + } + scanobject(b, gcw) + + // Flush background scan work credit to the global + // account if we've accumulated enough locally so + // mutator assists can draw on it. + if gcw.scanWork >= gcCreditSlack { + atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) + if flushBgCredit { + gcFlushBgCredit(gcw.scanWork - initScanWork) + initScanWork = 0 + } + idleCheck -= gcw.scanWork + gcw.scanWork = 0 + + if idle && idleCheck <= 0 { + idleCheck += idleCheckThreshold + if pollWork() { + break + } + } + } + } + + // In blocking mode, write barriers are not allowed after this + // point because we must preserve the condition that the work + // buffers are empty. + +done: + // Flush remaining scan work credit. + if gcw.scanWork > 0 { + atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) + if flushBgCredit { + gcFlushBgCredit(gcw.scanWork - initScanWork) + } + gcw.scanWork = 0 + } +} + +// gcDrainN blackens grey objects until it has performed roughly +// scanWork units of scan work or the G is preempted. This is +// best-effort, so it may perform less work if it fails to get a work +// buffer. Otherwise, it will perform at least n units of work, but +// may perform more because scanning is always done in whole object +// increments. It returns the amount of scan work performed. +// +// The caller goroutine must be in a preemptible state (e.g., +// _Gwaiting) to prevent deadlocks during stack scanning. As a +// consequence, this must be called on the system stack. +// +//go:nowritebarrier +//go:systemstack +func gcDrainN(gcw *gcWork, scanWork int64) int64 { + if !writeBarrier.needed { + throw("gcDrainN phase incorrect") + } + + // There may already be scan work on the gcw, which we don't + // want to claim was done by this call. + workFlushed := -gcw.scanWork + + gp := getg().m.curg + for !gp.preempt && workFlushed+gcw.scanWork < scanWork { + // See gcDrain comment. + if work.full == 0 { + gcw.balance() + } + + // This might be a good place to add prefetch code... + // if(wbuf.nobj > 4) { + // PREFETCH(wbuf->obj[wbuf.nobj - 3]; + // } + // + b := gcw.tryGetFast() + if b == 0 { + b = gcw.tryGet() + } + + if b == 0 { + // Try to do a root job. + // + // TODO: Assists should get credit for this + // work. + if work.markrootNext < work.markrootJobs { + job := atomic.Xadd(&work.markrootNext, +1) - 1 + if job < work.markrootJobs { + markroot(gcw, job) + continue + } + } + // No heap or root jobs. + break + } + scanobject(b, gcw) + + // Flush background scan work credit. + if gcw.scanWork >= gcCreditSlack { + atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) + workFlushed += gcw.scanWork + gcw.scanWork = 0 + } + } + + // Unlike gcDrain, there's no need to flush remaining work + // here because this never flushes to bgScanCredit and + // gcw.dispose will flush any remaining work to scanWork. + + return workFlushed + gcw.scanWork +} + +// scanblock scans b as scanobject would, but using an explicit +// pointer bitmap instead of the heap bitmap. +// +// This is used to scan non-heap roots, so it does not update +// gcw.bytesMarked or gcw.scanWork. +// +//go:nowritebarrier +func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) { + // Use local copies of original parameters, so that a stack trace + // due to one of the throws below shows the original block + // base and extent. + b := b0 + n := n0 + + arena_start := mheap_.arena_start + arena_used := mheap_.arena_used + + for i := uintptr(0); i < n; { + // Find bits for the next word. + bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8))) + if bits == 0 { + i += sys.PtrSize * 8 + continue + } + for j := 0; j < 8 && i < n; j++ { + if bits&1 != 0 { + // Same work as in scanobject; see comments there. + obj := *(*uintptr)(unsafe.Pointer(b + i)) + if obj != 0 && arena_start <= obj && obj < arena_used { + if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 { + greyobject(obj, b, i, hbits, span, gcw, objIndex, false) + } + } + } + bits >>= 1 + i += sys.PtrSize + } + } +} + +// scanobject scans the object starting at b, adding pointers to gcw. +// b must point to the beginning of a heap object or an oblet. +// scanobject consults the GC bitmap for the pointer mask and the +// spans for the size of the object. +// +//go:nowritebarrier +func scanobject(b uintptr, gcw *gcWork) { + // Note that arena_used may change concurrently during + // scanobject and hence scanobject may encounter a pointer to + // a newly allocated heap object that is *not* in + // [start,used). It will not mark this object; however, we + // know that it was just installed by a mutator, which means + // that mutator will execute a write barrier and take care of + // marking it. This is even more pronounced on relaxed memory + // architectures since we access arena_used without barriers + // or synchronization, but the same logic applies. + arena_start := mheap_.arena_start + arena_used := mheap_.arena_used + + // Find the bits for b and the size of the object at b. + // + // b is either the beginning of an object, in which case this + // is the size of the object to scan, or it points to an + // oblet, in which case we compute the size to scan below. + hbits := heapBitsForAddr(b) + s := spanOfUnchecked(b) + n := s.elemsize + if n == 0 { + throw("scanobject n == 0") + } + + if n > maxObletBytes { + // Large object. Break into oblets for better + // parallelism and lower latency. + if b == s.base() { + // It's possible this is a noscan object (not + // from greyobject, but from other code + // paths), in which case we must *not* enqueue + // oblets since their bitmaps will be + // uninitialized. + if !hbits.hasPointers(n) { + // Bypass the whole scan. + gcw.bytesMarked += uint64(n) + return + } + + // Enqueue the other oblets to scan later. + // Some oblets may be in b's scalar tail, but + // these will be marked as "no more pointers", + // so we'll drop out immediately when we go to + // scan those. + for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes { + if !gcw.putFast(oblet) { + gcw.put(oblet) + } + } + } + + // Compute the size of the oblet. Since this object + // must be a large object, s.base() is the beginning + // of the object. + n = s.base() + s.elemsize - b + if n > maxObletBytes { + n = maxObletBytes + } + } + + var i uintptr + for i = 0; i < n; i += sys.PtrSize { + // Find bits for this word. + if i != 0 { + // Avoid needless hbits.next() on last iteration. + hbits = hbits.next() + } + // Load bits once. See CL 22712 and issue 16973 for discussion. + bits := hbits.bits() + // During checkmarking, 1-word objects store the checkmark + // in the type bit for the one word. The only one-word objects + // are pointers, or else they'd be merged with other non-pointer + // data into larger allocations. + if i != 1*sys.PtrSize && bits&bitScan == 0 { + break // no more pointers in this object + } + if bits&bitPointer == 0 { + continue // not a pointer + } + + // Work here is duplicated in scanblock and above. + // If you make changes here, make changes there too. + obj := *(*uintptr)(unsafe.Pointer(b + i)) + + // At this point we have extracted the next potential pointer. + // Check if it points into heap and not back at the current object. + if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n { + // Mark the object. + if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 { + greyobject(obj, b, i, hbits, span, gcw, objIndex, false) + } + } + } + gcw.bytesMarked += uint64(n) + gcw.scanWork += int64(i) +} + +//go:linkname scanstackblock runtime.scanstackblock + +// scanstackblock is called by the stack scanning code in C to +// actually find and mark pointers in the stack block. This is like +// scanblock, but we scan the stack conservatively, so there is no +// bitmask of pointers. +func scanstackblock(b, n uintptr, gcw *gcWork) { + arena_start := mheap_.arena_start + arena_used := mheap_.arena_used + + for i := uintptr(0); i < n; i += sys.PtrSize { + // Same work as in scanobject; see comments there. + obj := *(*uintptr)(unsafe.Pointer(b + i)) + if obj != 0 && arena_start <= obj && obj < arena_used { + if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, true); obj != 0 { + greyobject(obj, b, i, hbits, span, gcw, objIndex, true) + } + } + } +} + +// Shade the object if it isn't already. +// The object is not nil and known to be in the heap. +// Preemption must be disabled. +//go:nowritebarrier +func shade(b uintptr) { + // shade can be called to shade a pointer found on the stack, + // so pass forStack as true to heapBitsForObject and greyobject. + if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0, true); obj != 0 { + gcw := &getg().m.p.ptr().gcw + greyobject(obj, 0, 0, hbits, span, gcw, objIndex, true) + if gcphase == _GCmarktermination || gcBlackenPromptly { + // Ps aren't allowed to cache work during mark + // termination. + gcw.dispose() + } + } +} + +// obj is the start of an object with mark mbits. +// If it isn't already marked, mark it and enqueue into gcw. +// base and off are for debugging only and could be removed. +//go:nowritebarrierrec +func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) { + // obj should be start of allocation, and so must be at least pointer-aligned. + if obj&(sys.PtrSize-1) != 0 { + throw("greyobject: obj not pointer-aligned") + } + mbits := span.markBitsForIndex(objIndex) + + if useCheckmark { + if !mbits.isMarked() { + // Stack scanning is conservative, so we can + // see a reference to an object not previously + // found. Assume the object was correctly not + // marked and ignore the pointer. + if forStack { + return + } + printlock() + print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n") + print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n") + + // Dump the source (base) object + gcDumpObject("base", base, off) + + // Dump the object + gcDumpObject("obj", obj, ^uintptr(0)) + + throw("checkmark found unmarked object") + } + if hbits.isCheckmarked(span.elemsize) { + return + } + hbits.setCheckmarked(span.elemsize) + if !hbits.isCheckmarked(span.elemsize) { + throw("setCheckmarked and isCheckmarked disagree") + } + } else { + // Stack scanning is conservative, so we can see a + // pointer to a free object. Assume the object was + // correctly freed and we must ignore the pointer. + if forStack && span.isFree(objIndex) { + return + } + + if debug.gccheckmark > 0 && span.isFree(objIndex) { + print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n") + gcDumpObject("base", base, off) + gcDumpObject("obj", obj, ^uintptr(0)) + throw("marking free object") + } + + // If marked we have nothing to do. + if mbits.isMarked() { + return + } + // mbits.setMarked() // Avoid extra call overhead with manual inlining. + atomic.Or8(mbits.bytep, mbits.mask) + // If this is a noscan object, fast-track it to black + // instead of greying it. + if !hbits.hasPointers(span.elemsize) { + gcw.bytesMarked += uint64(span.elemsize) + return + } + } + + // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but + // seems like a nice optimization that can be added back in. + // There needs to be time between the PREFETCH and the use. + // Previously we put the obj in an 8 element buffer that is drained at a rate + // to give the PREFETCH time to do its work. + // Use of PREFETCHNTA might be more appropriate than PREFETCH + if !gcw.putFast(obj) { + gcw.put(obj) + } +} + +// gcDumpObject dumps the contents of obj for debugging and marks the +// field at byte offset off in obj. +func gcDumpObject(label string, obj, off uintptr) { + if obj < mheap_.arena_start || obj >= mheap_.arena_used { + print(label, "=", hex(obj), " is not in the Go heap\n") + return + } + k := obj >> _PageShift + x := k + x -= mheap_.arena_start >> _PageShift + s := mheap_.spans[x] + print(label, "=", hex(obj), " k=", hex(k)) + if s == nil { + print(" s=nil\n") + return + } + print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, " s.state=") + if 0 <= s.state && int(s.state) < len(mSpanStateNames) { + print(mSpanStateNames[s.state], "\n") + } else { + print("unknown(", s.state, ")\n") + } + + skipped := false + size := s.elemsize + if s.state == _MSpanStack && size == 0 { + // We're printing something from a stack frame. We + // don't know how big it is, so just show up to an + // including off. + size = off + sys.PtrSize + } + for i := uintptr(0); i < size; i += sys.PtrSize { + // For big objects, just print the beginning (because + // that usually hints at the object's type) and the + // fields around off. + if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) { + skipped = true + continue + } + if skipped { + print(" ...\n") + skipped = false + } + print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i)))) + if i == off { + print(" <==") + } + print("\n") + } + if skipped { + print(" ...\n") + } +} + +// gcmarknewobject marks a newly allocated object black. obj must +// not contain any non-nil pointers. +// +// This is nosplit so it can manipulate a gcWork without preemption. +// +//go:nowritebarrier +//go:nosplit +func gcmarknewobject(obj, size, scanSize uintptr) { + if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen. + throw("gcmarknewobject called while doing checkmark") + } + markBitsForAddr(obj).setMarked() + gcw := &getg().m.p.ptr().gcw + gcw.bytesMarked += uint64(size) + gcw.scanWork += int64(scanSize) + if gcBlackenPromptly { + // There shouldn't be anything in the work queue, but + // we still need to flush stats. + gcw.dispose() + } +} + +// gcMarkTinyAllocs greys all active tiny alloc blocks. +// +// The world must be stopped. +func gcMarkTinyAllocs() { + for _, p := range &allp { + if p == nil || p.status == _Pdead { + break + } + c := p.mcache + if c == nil || c.tiny == 0 { + continue + } + _, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0, false) + gcw := &p.gcw + greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex, false) + if gcBlackenPromptly { + gcw.dispose() + } + } +} + +// Checkmarking + +// To help debug the concurrent GC we remark with the world +// stopped ensuring that any object encountered has their normal +// mark bit set. To do this we use an orthogonal bit +// pattern to indicate the object is marked. The following pattern +// uses the upper two bits in the object's boundary nibble. +// 01: scalar not marked +// 10: pointer not marked +// 11: pointer marked +// 00: scalar marked +// Xoring with 01 will flip the pattern from marked to unmarked and vica versa. +// The higher bit is 1 for pointers and 0 for scalars, whether the object +// is marked or not. +// The first nibble no longer holds the typeDead pattern indicating that the +// there are no more pointers in the object. This information is held +// in the second nibble. + +// If useCheckmark is true, marking of an object uses the +// checkmark bits (encoding above) instead of the standard +// mark bits. +var useCheckmark = false + +//go:nowritebarrier +func initCheckmarks() { + useCheckmark = true + for _, s := range mheap_.allspans { + if s.state == _MSpanInUse { + heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout()) + } + } +} + +func clearCheckmarks() { + useCheckmark = false + for _, s := range mheap_.allspans { + if s.state == _MSpanInUse { + heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout()) + } + } +} diff --git a/libgo/go/runtime/mgcsweep.go b/libgo/go/runtime/mgcsweep.go new file mode 100644 index 0000000..9f24fb1 --- /dev/null +++ b/libgo/go/runtime/mgcsweep.go @@ -0,0 +1,428 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Garbage collector: sweeping + +package runtime + +import ( + "runtime/internal/atomic" + "unsafe" +) + +var sweep sweepdata + +// State of background sweep. +type sweepdata struct { + lock mutex + g *g + parked bool + started bool + + nbgsweep uint32 + npausesweep uint32 + + // pacertracegen is the sweepgen at which the last pacer trace + // "sweep finished" message was printed. + pacertracegen uint32 +} + +// finishsweep_m ensures that all spans are swept. +// +// The world must be stopped. This ensures there are no sweeps in +// progress. +// +//go:nowritebarrier +func finishsweep_m() { + // Sweeping must be complete before marking commences, so + // sweep any unswept spans. If this is a concurrent GC, there + // shouldn't be any spans left to sweep, so this should finish + // instantly. If GC was forced before the concurrent sweep + // finished, there may be spans to sweep. + for sweepone() != ^uintptr(0) { + sweep.npausesweep++ + } + + nextMarkBitArenaEpoch() +} + +func bgsweep(c chan int) { + sweep.g = getg() + + lock(&sweep.lock) + sweep.parked = true + c <- 1 + goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) + + for { + for gosweepone() != ^uintptr(0) { + sweep.nbgsweep++ + Gosched() + } + lock(&sweep.lock) + if !gosweepdone() { + // This can happen if a GC runs between + // gosweepone returning ^0 above + // and the lock being acquired. + unlock(&sweep.lock) + continue + } + sweep.parked = true + goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) + } +} + +// sweeps one span +// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep +//go:nowritebarrier +func sweepone() uintptr { + _g_ := getg() + + // increment locks to ensure that the goroutine is not preempted + // in the middle of sweep thus leaving the span in an inconsistent state for next GC + _g_.m.locks++ + sg := mheap_.sweepgen + for { + s := mheap_.sweepSpans[1-sg/2%2].pop() + if s == nil { + mheap_.sweepdone = 1 + _g_.m.locks-- + if debug.gcpacertrace > 0 && atomic.Cas(&sweep.pacertracegen, sg-2, sg) { + print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", mheap_.spanBytesAlloc>>20, "MB of spans; swept ", mheap_.pagesSwept, " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n") + } + return ^uintptr(0) + } + if s.state != mSpanInUse { + // This can happen if direct sweeping already + // swept this span, but in that case the sweep + // generation should always be up-to-date. + if s.sweepgen != sg { + print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "\n") + throw("non in-use span in unswept list") + } + continue + } + if s.sweepgen != sg-2 || !atomic.Cas(&s.sweepgen, sg-2, sg-1) { + continue + } + npages := s.npages + if !s.sweep(false) { + // Span is still in-use, so this returned no + // pages to the heap and the span needs to + // move to the swept in-use list. + npages = 0 + } + _g_.m.locks-- + return npages + } +} + +//go:nowritebarrier +func gosweepone() uintptr { + var ret uintptr + systemstack(func() { + ret = sweepone() + }) + return ret +} + +//go:nowritebarrier +func gosweepdone() bool { + return mheap_.sweepdone != 0 +} + +// Returns only when span s has been swept. +//go:nowritebarrier +func (s *mspan) ensureSwept() { + // Caller must disable preemption. + // Otherwise when this function returns the span can become unswept again + // (if GC is triggered on another goroutine). + _g_ := getg() + if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { + throw("MSpan_EnsureSwept: m is not locked") + } + + sg := mheap_.sweepgen + if atomic.Load(&s.sweepgen) == sg { + return + } + // The caller must be sure that the span is a MSpanInUse span. + if atomic.Cas(&s.sweepgen, sg-2, sg-1) { + s.sweep(false) + return + } + // unfortunate condition, and we don't have efficient means to wait + for atomic.Load(&s.sweepgen) != sg { + osyield() + } +} + +// Sweep frees or collects finalizers for blocks not marked in the mark phase. +// It clears the mark bits in preparation for the next GC round. +// Returns true if the span was returned to heap. +// If preserve=true, don't return it to heap nor relink in MCentral lists; +// caller takes care of it. +//TODO go:nowritebarrier +func (s *mspan) sweep(preserve bool) bool { + // It's critical that we enter this function with preemption disabled, + // GC must not start while we are in the middle of this function. + _g_ := getg() + if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { + throw("MSpan_Sweep: m is not locked") + } + sweepgen := mheap_.sweepgen + if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { + print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + throw("MSpan_Sweep: bad span state") + } + + if trace.enabled { + traceGCSweepStart() + } + + atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) + + cl := s.sizeclass + size := s.elemsize + res := false + nfree := 0 + + c := _g_.m.mcache + freeToHeap := false + + // The allocBits indicate which unmarked objects don't need to be + // processed since they were free at the end of the last GC cycle + // and were not allocated since then. + // If the allocBits index is >= s.freeindex and the bit + // is not marked then the object remains unallocated + // since the last GC. + // This situation is analogous to being on a freelist. + + // Unlink & free special records for any objects we're about to free. + // Two complications here: + // 1. An object can have both finalizer and profile special records. + // In such case we need to queue finalizer for execution, + // mark the object as live and preserve the profile special. + // 2. A tiny object can have several finalizers setup for different offsets. + // If such object is not marked, we need to queue all finalizers at once. + // Both 1 and 2 are possible at the same time. + specialp := &s.specials + special := *specialp + for special != nil { + // A finalizer can be set for an inner byte of an object, find object beginning. + objIndex := uintptr(special.offset) / size + p := s.base() + objIndex*size + mbits := s.markBitsForIndex(objIndex) + if !mbits.isMarked() { + // This object is not marked and has at least one special record. + // Pass 1: see if it has at least one finalizer. + hasFin := false + endOffset := p - s.base() + size + for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { + if tmp.kind == _KindSpecialFinalizer { + // Stop freeing of object if it has a finalizer. + mbits.setMarkedNonAtomic() + hasFin = true + break + } + } + // Pass 2: queue all finalizers _or_ handle profile record. + for special != nil && uintptr(special.offset) < endOffset { + // Find the exact byte for which the special was setup + // (as opposed to object beginning). + p := s.base() + uintptr(special.offset) + if special.kind == _KindSpecialFinalizer || !hasFin { + // Splice out special record. + y := special + special = special.next + *specialp = special + freespecial(y, unsafe.Pointer(p), size) + } else { + // This is profile record, but the object has finalizers (so kept alive). + // Keep special record. + specialp = &special.next + special = *specialp + } + } + } else { + // object is still live: keep special record + specialp = &special.next + special = *specialp + } + } + + if debug.allocfreetrace != 0 || raceenabled || msanenabled { + // Find all newly freed objects. This doesn't have to + // efficient; allocfreetrace has massive overhead. + mbits := s.markBitsForBase() + abits := s.allocBitsForIndex(0) + for i := uintptr(0); i < s.nelems; i++ { + if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) { + x := s.base() + i*s.elemsize + if debug.allocfreetrace != 0 { + tracefree(unsafe.Pointer(x), size) + } + if raceenabled { + racefree(unsafe.Pointer(x), size) + } + if msanenabled { + msanfree(unsafe.Pointer(x), size) + } + } + mbits.advance() + abits.advance() + } + } + + // Count the number of free objects in this span. + nfree = s.countFree() + if cl == 0 && nfree != 0 { + s.needzero = 1 + freeToHeap = true + } + nalloc := uint16(s.nelems) - uint16(nfree) + nfreed := s.allocCount - nalloc + + // This test is not reliable with gccgo, because of + // conservative stack scanning. The test boils down to + // checking that no new bits have been set in gcmarkBits since + // the span was added to the sweep count. New bits are set by + // greyobject. Seeing a new bit means that a live pointer has + // appeared that was not found during the mark phase. That can + // not happen when pointers are followed strictly. However, + // with conservative checking, it is possible for a pointer + // that will never be used to appear live and to cause a mark + // to be added. That is unfortunate in that it causes this + // check to be inaccurate, and it will keep an object live + // unnecessarily, but provided the pointer is not really live + // it is not otherwise a problem. So we disable the test for gccgo. + if false && nalloc > s.allocCount { + print("runtime: nelems=", s.nelems, " nfree=", nfree, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") + throw("sweep increased allocation count") + } + + s.allocCount = nalloc + wasempty := s.nextFreeIndex() == s.nelems + s.freeindex = 0 // reset allocation index to start of span. + + // gcmarkBits becomes the allocBits. + // get a fresh cleared gcmarkBits in preparation for next GC + s.allocBits = s.gcmarkBits + s.gcmarkBits = newMarkBits(s.nelems) + + // Initialize alloc bits cache. + s.refillAllocCache(0) + + // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, + // because of the potential for a concurrent free/SetFinalizer. + // But we need to set it before we make the span available for allocation + // (return it to heap or mcentral), because allocation code assumes that a + // span is already swept if available for allocation. + if freeToHeap || nfreed == 0 { + // The span must be in our exclusive ownership until we update sweepgen, + // check for potential races. + if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { + print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") + throw("MSpan_Sweep: bad span state after sweep") + } + // Serialization point. + // At this point the mark bits are cleared and allocation ready + // to go so release the span. + atomic.Store(&s.sweepgen, sweepgen) + } + + if nfreed > 0 && cl != 0 { + c.local_nsmallfree[cl] += uintptr(nfreed) + res = mheap_.central[cl].mcentral.freeSpan(s, preserve, wasempty) + // MCentral_FreeSpan updates sweepgen + } else if freeToHeap { + // Free large span to heap + + // NOTE(rsc,dvyukov): The original implementation of efence + // in CL 22060046 used SysFree instead of SysFault, so that + // the operating system would eventually give the memory + // back to us again, so that an efence program could run + // longer without running out of memory. Unfortunately, + // calling SysFree here without any kind of adjustment of the + // heap data structures means that when the memory does + // come back to us, we have the wrong metadata for it, either in + // the MSpan structures or in the garbage collection bitmap. + // Using SysFault here means that the program will run out of + // memory fairly quickly in efence mode, but at least it won't + // have mysterious crashes due to confused memory reuse. + // It should be possible to switch back to SysFree if we also + // implement and then call some kind of MHeap_DeleteSpan. + if debug.efence > 0 { + s.limit = 0 // prevent mlookup from finding this span + sysFault(unsafe.Pointer(s.base()), size) + } else { + mheap_.freeSpan(s, 1) + } + c.local_nlargefree++ + c.local_largefree += size + res = true + } + if !res { + // The span has been swept and is still in-use, so put + // it on the swept in-use list. + mheap_.sweepSpans[sweepgen/2%2].push(s) + } + if trace.enabled { + traceGCSweepDone() + } + return res +} + +// deductSweepCredit deducts sweep credit for allocating a span of +// size spanBytes. This must be performed *before* the span is +// allocated to ensure the system has enough credit. If necessary, it +// performs sweeping to prevent going in to debt. If the caller will +// also sweep pages (e.g., for a large allocation), it can pass a +// non-zero callerSweepPages to leave that many pages unswept. +// +// deductSweepCredit makes a worst-case assumption that all spanBytes +// bytes of the ultimately allocated span will be available for object +// allocation. The caller should call reimburseSweepCredit if that +// turns out not to be the case once the span is allocated. +// +// deductSweepCredit is the core of the "proportional sweep" system. +// It uses statistics gathered by the garbage collector to perform +// enough sweeping so that all pages are swept during the concurrent +// sweep phase between GC cycles. +// +// mheap_ must NOT be locked. +func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) { + if mheap_.sweepPagesPerByte == 0 { + // Proportional sweep is done or disabled. + return + } + + // Account for this span allocation. + spanBytesAlloc := atomic.Xadd64(&mheap_.spanBytesAlloc, int64(spanBytes)) + + // Fix debt if necessary. + pagesOwed := int64(mheap_.sweepPagesPerByte * float64(spanBytesAlloc)) + for pagesOwed-int64(atomic.Load64(&mheap_.pagesSwept)) > int64(callerSweepPages) { + if gosweepone() == ^uintptr(0) { + mheap_.sweepPagesPerByte = 0 + break + } + } +} + +// reimburseSweepCredit records that unusableBytes bytes of a +// just-allocated span are not available for object allocation. This +// offsets the worst-case charge performed by deductSweepCredit. +func reimburseSweepCredit(unusableBytes uintptr) { + if mheap_.sweepPagesPerByte == 0 { + // Nobody cares about the credit. Avoid the atomic. + return + } + nval := atomic.Xadd64(&mheap_.spanBytesAlloc, -int64(unusableBytes)) + if int64(nval) < 0 { + // Debugging for #18043. + print("runtime: bad spanBytesAlloc=", nval, " (was ", nval+uint64(unusableBytes), ") unusableBytes=", unusableBytes, " sweepPagesPerByte=", mheap_.sweepPagesPerByte, "\n") + throw("spanBytesAlloc underflow") + } +} diff --git a/libgo/go/runtime/mgcsweepbuf.go b/libgo/go/runtime/mgcsweepbuf.go new file mode 100644 index 0000000..6c1118e --- /dev/null +++ b/libgo/go/runtime/mgcsweepbuf.go @@ -0,0 +1,178 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// A gcSweepBuf is a set of *mspans. +// +// gcSweepBuf is safe for concurrent push operations *or* concurrent +// pop operations, but not both simultaneously. +type gcSweepBuf struct { + // A gcSweepBuf is a two-level data structure consisting of a + // growable spine that points to fixed-sized blocks. The spine + // can be accessed without locks, but adding a block or + // growing it requires taking the spine lock. + // + // Because each mspan covers at least 8K of heap and takes at + // most 8 bytes in the gcSweepBuf, the growth of the spine is + // quite limited. + // + // The spine and all blocks are allocated off-heap, which + // allows this to be used in the memory manager and avoids the + // need for write barriers on all of these. We never release + // this memory because there could be concurrent lock-free + // access and we're likely to reuse it anyway. (In principle, + // we could do this during STW.) + + spineLock mutex + spine unsafe.Pointer // *[N]*gcSweepBlock, accessed atomically + spineLen uintptr // Spine array length, accessed atomically + spineCap uintptr // Spine array cap, accessed under lock + + // index is the first unused slot in the logical concatenation + // of all blocks. It is accessed atomically. + index uint32 +} + +const ( + gcSweepBlockEntries = 512 // 4KB on 64-bit + gcSweepBufInitSpineCap = 256 // Enough for 1GB heap on 64-bit +) + +type gcSweepBlock struct { + spans [gcSweepBlockEntries]*mspan +} + +// push adds span s to buffer b. push is safe to call concurrently +// with other push operations, but NOT to call concurrently with pop. +func (b *gcSweepBuf) push(s *mspan) { + // Obtain our slot. + cursor := uintptr(atomic.Xadd(&b.index, +1) - 1) + top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries + + // Do we need to add a block? + spineLen := atomic.Loaduintptr(&b.spineLen) + var block *gcSweepBlock +retry: + if top < spineLen { + spine := atomic.Loadp(unsafe.Pointer(&b.spine)) + blockp := add(spine, sys.PtrSize*top) + block = (*gcSweepBlock)(atomic.Loadp(blockp)) + } else { + // Add a new block to the spine, potentially growing + // the spine. + lock(&b.spineLock) + // spineLen cannot change until we release the lock, + // but may have changed while we were waiting. + spineLen = atomic.Loaduintptr(&b.spineLen) + if top < spineLen { + unlock(&b.spineLock) + goto retry + } + + if spineLen == b.spineCap { + // Grow the spine. + newCap := b.spineCap * 2 + if newCap == 0 { + newCap = gcSweepBufInitSpineCap + } + newSpine := persistentalloc(newCap*sys.PtrSize, sys.CacheLineSize, &memstats.gc_sys) + if b.spineCap != 0 { + // Blocks are allocated off-heap, so + // no write barriers. + memmove(newSpine, b.spine, b.spineCap*sys.PtrSize) + } + // Spine is allocated off-heap, so no write barrier. + atomic.StorepNoWB(unsafe.Pointer(&b.spine), newSpine) + b.spineCap = newCap + // We can't immediately free the old spine + // since a concurrent push with a lower index + // could still be reading from it. We let it + // leak because even a 1TB heap would waste + // less than 2MB of memory on old spines. If + // this is a problem, we could free old spines + // during STW. + } + + // Allocate a new block and add it to the spine. + block = (*gcSweepBlock)(persistentalloc(unsafe.Sizeof(gcSweepBlock{}), sys.CacheLineSize, &memstats.gc_sys)) + blockp := add(b.spine, sys.PtrSize*top) + // Blocks are allocated off-heap, so no write barrier. + atomic.StorepNoWB(blockp, unsafe.Pointer(block)) + atomic.Storeuintptr(&b.spineLen, spineLen+1) + unlock(&b.spineLock) + } + + // We have a block. Insert the span. + block.spans[bottom] = s +} + +// pop removes and returns a span from buffer b, or nil if b is empty. +// pop is safe to call concurrently with other pop operations, but NOT +// to call concurrently with push. +func (b *gcSweepBuf) pop() *mspan { + cursor := atomic.Xadd(&b.index, -1) + if int32(cursor) < 0 { + atomic.Xadd(&b.index, +1) + return nil + } + + // There are no concurrent spine or block modifications during + // pop, so we can omit the atomics. + top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries + blockp := (**gcSweepBlock)(add(b.spine, sys.PtrSize*uintptr(top))) + block := *blockp + s := block.spans[bottom] + // Clear the pointer for block(i). + block.spans[bottom] = nil + return s +} + +// numBlocks returns the number of blocks in buffer b. numBlocks is +// safe to call concurrently with any other operation. Spans that have +// been pushed prior to the call to numBlocks are guaranteed to appear +// in some block in the range [0, numBlocks()), assuming there are no +// intervening pops. Spans that are pushed after the call may also +// appear in these blocks. +func (b *gcSweepBuf) numBlocks() int { + return int((atomic.Load(&b.index) + gcSweepBlockEntries - 1) / gcSweepBlockEntries) +} + +// block returns the spans in the i'th block of buffer b. block is +// safe to call concurrently with push. +func (b *gcSweepBuf) block(i int) []*mspan { + // Perform bounds check before loading spine address since + // push ensures the allocated length is at least spineLen. + if i < 0 || uintptr(i) >= atomic.Loaduintptr(&b.spineLen) { + throw("block index out of range") + } + + // Get block i. + spine := atomic.Loadp(unsafe.Pointer(&b.spine)) + blockp := add(spine, sys.PtrSize*uintptr(i)) + block := (*gcSweepBlock)(atomic.Loadp(blockp)) + + // Slice the block if necessary. + cursor := uintptr(atomic.Load(&b.index)) + top, bottom := cursor/gcSweepBlockEntries, cursor%gcSweepBlockEntries + var spans []*mspan + if uintptr(i) < top { + spans = block.spans[:] + } else { + spans = block.spans[:bottom] + } + + // push may have reserved a slot but not filled it yet, so + // trim away unused entries. + for len(spans) > 0 && spans[len(spans)-1] == nil { + spans = spans[:len(spans)-1] + } + return spans +} diff --git a/libgo/go/runtime/mgcwork.go b/libgo/go/runtime/mgcwork.go new file mode 100644 index 0000000..5eb05a7 --- /dev/null +++ b/libgo/go/runtime/mgcwork.go @@ -0,0 +1,444 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +const ( + _WorkbufSize = 2048 // in bytes; larger values result in less contention +) + +// Garbage collector work pool abstraction. +// +// This implements a producer/consumer model for pointers to grey +// objects. A grey object is one that is marked and on a work +// queue. A black object is marked and not on a work queue. +// +// Write barriers, root discovery, stack scanning, and object scanning +// produce pointers to grey objects. Scanning consumes pointers to +// grey objects, thus blackening them, and then scans them, +// potentially producing new pointers to grey objects. + +// A wbufptr holds a workbuf*, but protects it from write barriers. +// workbufs never live on the heap, so write barriers are unnecessary. +// Write barriers on workbuf pointers may also be dangerous in the GC. +// +// TODO: Since workbuf is now go:notinheap, this isn't necessary. +type wbufptr uintptr + +func wbufptrOf(w *workbuf) wbufptr { + return wbufptr(unsafe.Pointer(w)) +} + +func (wp wbufptr) ptr() *workbuf { + return (*workbuf)(unsafe.Pointer(wp)) +} + +// A gcWork provides the interface to produce and consume work for the +// garbage collector. +// +// A gcWork can be used on the stack as follows: +// +// (preemption must be disabled) +// gcw := &getg().m.p.ptr().gcw +// .. call gcw.put() to produce and gcw.get() to consume .. +// if gcBlackenPromptly { +// gcw.dispose() +// } +// +// It's important that any use of gcWork during the mark phase prevent +// the garbage collector from transitioning to mark termination since +// gcWork may locally hold GC work buffers. This can be done by +// disabling preemption (systemstack or acquirem). +type gcWork struct { + // wbuf1 and wbuf2 are the primary and secondary work buffers. + // + // This can be thought of as a stack of both work buffers' + // pointers concatenated. When we pop the last pointer, we + // shift the stack up by one work buffer by bringing in a new + // full buffer and discarding an empty one. When we fill both + // buffers, we shift the stack down by one work buffer by + // bringing in a new empty buffer and discarding a full one. + // This way we have one buffer's worth of hysteresis, which + // amortizes the cost of getting or putting a work buffer over + // at least one buffer of work and reduces contention on the + // global work lists. + // + // wbuf1 is always the buffer we're currently pushing to and + // popping from and wbuf2 is the buffer that will be discarded + // next. + // + // Invariant: Both wbuf1 and wbuf2 are nil or neither are. + wbuf1, wbuf2 wbufptr + + // Bytes marked (blackened) on this gcWork. This is aggregated + // into work.bytesMarked by dispose. + bytesMarked uint64 + + // Scan work performed on this gcWork. This is aggregated into + // gcController by dispose and may also be flushed by callers. + scanWork int64 +} + +func (w *gcWork) init() { + w.wbuf1 = wbufptrOf(getempty()) + wbuf2 := trygetfull() + if wbuf2 == nil { + wbuf2 = getempty() + } + w.wbuf2 = wbufptrOf(wbuf2) +} + +// put enqueues a pointer for the garbage collector to trace. +// obj must point to the beginning of a heap object or an oblet. +//go:nowritebarrier +func (w *gcWork) put(obj uintptr) { + flushed := false + wbuf := w.wbuf1.ptr() + if wbuf == nil { + w.init() + wbuf = w.wbuf1.ptr() + // wbuf is empty at this point. + } else if wbuf.nobj == len(wbuf.obj) { + w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 + wbuf = w.wbuf1.ptr() + if wbuf.nobj == len(wbuf.obj) { + putfull(wbuf) + wbuf = getempty() + w.wbuf1 = wbufptrOf(wbuf) + flushed = true + } + } + + wbuf.obj[wbuf.nobj] = obj + wbuf.nobj++ + + // If we put a buffer on full, let the GC controller know so + // it can encourage more workers to run. We delay this until + // the end of put so that w is in a consistent state, since + // enlistWorker may itself manipulate w. + if flushed && gcphase == _GCmark { + gcController.enlistWorker() + } +} + +// putFast does a put and returns true if it can be done quickly +// otherwise it returns false and the caller needs to call put. +//go:nowritebarrier +func (w *gcWork) putFast(obj uintptr) bool { + wbuf := w.wbuf1.ptr() + if wbuf == nil { + return false + } else if wbuf.nobj == len(wbuf.obj) { + return false + } + + wbuf.obj[wbuf.nobj] = obj + wbuf.nobj++ + return true +} + +// tryGet dequeues a pointer for the garbage collector to trace. +// +// If there are no pointers remaining in this gcWork or in the global +// queue, tryGet returns 0. Note that there may still be pointers in +// other gcWork instances or other caches. +//go:nowritebarrier +func (w *gcWork) tryGet() uintptr { + wbuf := w.wbuf1.ptr() + if wbuf == nil { + w.init() + wbuf = w.wbuf1.ptr() + // wbuf is empty at this point. + } + if wbuf.nobj == 0 { + w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 + wbuf = w.wbuf1.ptr() + if wbuf.nobj == 0 { + owbuf := wbuf + wbuf = trygetfull() + if wbuf == nil { + return 0 + } + putempty(owbuf) + w.wbuf1 = wbufptrOf(wbuf) + } + } + + wbuf.nobj-- + return wbuf.obj[wbuf.nobj] +} + +// tryGetFast dequeues a pointer for the garbage collector to trace +// if one is readily available. Otherwise it returns 0 and +// the caller is expected to call tryGet(). +//go:nowritebarrier +func (w *gcWork) tryGetFast() uintptr { + wbuf := w.wbuf1.ptr() + if wbuf == nil { + return 0 + } + if wbuf.nobj == 0 { + return 0 + } + + wbuf.nobj-- + return wbuf.obj[wbuf.nobj] +} + +// get dequeues a pointer for the garbage collector to trace, blocking +// if necessary to ensure all pointers from all queues and caches have +// been retrieved. get returns 0 if there are no pointers remaining. +//go:nowritebarrier +func (w *gcWork) get() uintptr { + wbuf := w.wbuf1.ptr() + if wbuf == nil { + w.init() + wbuf = w.wbuf1.ptr() + // wbuf is empty at this point. + } + if wbuf.nobj == 0 { + w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 + wbuf = w.wbuf1.ptr() + if wbuf.nobj == 0 { + owbuf := wbuf + wbuf = getfull() + if wbuf == nil { + return 0 + } + putempty(owbuf) + w.wbuf1 = wbufptrOf(wbuf) + } + } + + // TODO: This might be a good place to add prefetch code + + wbuf.nobj-- + return wbuf.obj[wbuf.nobj] +} + +// dispose returns any cached pointers to the global queue. +// The buffers are being put on the full queue so that the +// write barriers will not simply reacquire them before the +// GC can inspect them. This helps reduce the mutator's +// ability to hide pointers during the concurrent mark phase. +// +//go:nowritebarrier +func (w *gcWork) dispose() { + if wbuf := w.wbuf1.ptr(); wbuf != nil { + if wbuf.nobj == 0 { + putempty(wbuf) + } else { + putfull(wbuf) + } + w.wbuf1 = 0 + + wbuf = w.wbuf2.ptr() + if wbuf.nobj == 0 { + putempty(wbuf) + } else { + putfull(wbuf) + } + w.wbuf2 = 0 + } + if w.bytesMarked != 0 { + // dispose happens relatively infrequently. If this + // atomic becomes a problem, we should first try to + // dispose less and if necessary aggregate in a per-P + // counter. + atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked)) + w.bytesMarked = 0 + } + if w.scanWork != 0 { + atomic.Xaddint64(&gcController.scanWork, w.scanWork) + w.scanWork = 0 + } +} + +// balance moves some work that's cached in this gcWork back on the +// global queue. +//go:nowritebarrier +func (w *gcWork) balance() { + if w.wbuf1 == 0 { + return + } + if wbuf := w.wbuf2.ptr(); wbuf.nobj != 0 { + putfull(wbuf) + w.wbuf2 = wbufptrOf(getempty()) + } else if wbuf := w.wbuf1.ptr(); wbuf.nobj > 4 { + w.wbuf1 = wbufptrOf(handoff(wbuf)) + } else { + return + } + // We flushed a buffer to the full list, so wake a worker. + if gcphase == _GCmark { + gcController.enlistWorker() + } +} + +// empty returns true if w has no mark work available. +//go:nowritebarrier +func (w *gcWork) empty() bool { + return w.wbuf1 == 0 || (w.wbuf1.ptr().nobj == 0 && w.wbuf2.ptr().nobj == 0) +} + +// Internally, the GC work pool is kept in arrays in work buffers. +// The gcWork interface caches a work buffer until full (or empty) to +// avoid contending on the global work buffer lists. + +type workbufhdr struct { + node lfnode // must be first + nobj int +} + +//go:notinheap +type workbuf struct { + workbufhdr + // account for the above fields + obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr +} + +// workbuf factory routines. These funcs are used to manage the +// workbufs. +// If the GC asks for some work these are the only routines that +// make wbufs available to the GC. + +func (b *workbuf) checknonempty() { + if b.nobj == 0 { + throw("workbuf is empty") + } +} + +func (b *workbuf) checkempty() { + if b.nobj != 0 { + throw("workbuf is not empty") + } +} + +// getempty pops an empty work buffer off the work.empty list, +// allocating new buffers if none are available. +//go:nowritebarrier +func getempty() *workbuf { + var b *workbuf + if work.empty != 0 { + b = (*workbuf)(lfstackpop(&work.empty)) + if b != nil { + b.checkempty() + } + } + if b == nil { + b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), sys.CacheLineSize, &memstats.gc_sys)) + } + return b +} + +// putempty puts a workbuf onto the work.empty list. +// Upon entry this go routine owns b. The lfstackpush relinquishes ownership. +//go:nowritebarrier +func putempty(b *workbuf) { + b.checkempty() + lfstackpush(&work.empty, &b.node) +} + +// putfull puts the workbuf on the work.full list for the GC. +// putfull accepts partially full buffers so the GC can avoid competing +// with the mutators for ownership of partially full buffers. +//go:nowritebarrier +func putfull(b *workbuf) { + b.checknonempty() + lfstackpush(&work.full, &b.node) +} + +// trygetfull tries to get a full or partially empty workbuffer. +// If one is not immediately available return nil +//go:nowritebarrier +func trygetfull() *workbuf { + b := (*workbuf)(lfstackpop(&work.full)) + if b != nil { + b.checknonempty() + return b + } + return b +} + +// Get a full work buffer off the work.full list. +// If nothing is available wait until all the other gc helpers have +// finished and then return nil. +// getfull acts as a barrier for work.nproc helpers. As long as one +// gchelper is actively marking objects it +// may create a workbuffer that the other helpers can work on. +// The for loop either exits when a work buffer is found +// or when _all_ of the work.nproc GC helpers are in the loop +// looking for work and thus not capable of creating new work. +// This is in fact the termination condition for the STW mark +// phase. +//go:nowritebarrier +func getfull() *workbuf { + b := (*workbuf)(lfstackpop(&work.full)) + if b != nil { + b.checknonempty() + return b + } + + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait > work.nproc { + println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc) + throw("work.nwait > work.nproc") + } + for i := 0; ; i++ { + if work.full != 0 { + decnwait := atomic.Xadd(&work.nwait, -1) + if decnwait == work.nproc { + println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) + throw("work.nwait > work.nproc") + } + b = (*workbuf)(lfstackpop(&work.full)) + if b != nil { + b.checknonempty() + return b + } + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait > work.nproc { + println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc) + throw("work.nwait > work.nproc") + } + } + if work.nwait == work.nproc && work.markrootNext >= work.markrootJobs { + return nil + } + _g_ := getg() + if i < 10 { + _g_.m.gcstats.nprocyield++ + procyield(20) + } else if i < 20 { + _g_.m.gcstats.nosyield++ + osyield() + } else { + _g_.m.gcstats.nsleep++ + usleep(100) + } + } +} + +//go:nowritebarrier +func handoff(b *workbuf) *workbuf { + // Make new buffer with half of b's pointers. + b1 := getempty() + n := b.nobj / 2 + b.nobj -= n + b1.nobj = n + memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0])) + _g_ := getg() + _g_.m.gcstats.nhandoff++ + _g_.m.gcstats.nhandoffcnt += uint64(n) + + // Put b on full list - let first half of b get stolen. + putfull(b) + return b1 +} diff --git a/libgo/go/runtime/mheap.go b/libgo/go/runtime/mheap.go new file mode 100644 index 0000000..7262748 --- /dev/null +++ b/libgo/go/runtime/mheap.go @@ -0,0 +1,1427 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Page heap. +// +// See malloc.go for overview. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" + "unsafe" +) + +// minPhysPageSize is a lower-bound on the physical page size. The +// true physical page size may be larger than this. In contrast, +// sys.PhysPageSize is an upper-bound on the physical page size. +const minPhysPageSize = 4096 + +// Main malloc heap. +// The heap itself is the "free[]" and "large" arrays, +// but all the other global data is here too. +// +// mheap must not be heap-allocated because it contains mSpanLists, +// which must not be heap-allocated. +// +//go:notinheap +type mheap struct { + lock mutex + free [_MaxMHeapList]mSpanList // free lists of given length + freelarge mSpanList // free lists length >= _MaxMHeapList + busy [_MaxMHeapList]mSpanList // busy lists of large objects of given length + busylarge mSpanList // busy lists of large objects length >= _MaxMHeapList + sweepgen uint32 // sweep generation, see comment in mspan + sweepdone uint32 // all spans are swept + + // allspans is a slice of all mspans ever created. Each mspan + // appears exactly once. + // + // The memory for allspans is manually managed and can be + // reallocated and move as the heap grows. + // + // In general, allspans is protected by mheap_.lock, which + // prevents concurrent access as well as freeing the backing + // store. Accesses during STW might not hold the lock, but + // must ensure that allocation cannot happen around the + // access (since that may free the backing store). + allspans []*mspan // all spans out there + + // spans is a lookup table to map virtual address page IDs to *mspan. + // For allocated spans, their pages map to the span itself. + // For free spans, only the lowest and highest pages map to the span itself. + // Internal pages map to an arbitrary span. + // For pages that have never been allocated, spans entries are nil. + // + // This is backed by a reserved region of the address space so + // it can grow without moving. The memory up to len(spans) is + // mapped. cap(spans) indicates the total reserved memory. + spans []*mspan + + // sweepSpans contains two mspan stacks: one of swept in-use + // spans, and one of unswept in-use spans. These two trade + // roles on each GC cycle. Since the sweepgen increases by 2 + // on each cycle, this means the swept spans are in + // sweepSpans[sweepgen/2%2] and the unswept spans are in + // sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the + // unswept stack and pushes spans that are still in-use on the + // swept stack. Likewise, allocating an in-use span pushes it + // on the swept stack. + sweepSpans [2]gcSweepBuf + + _ uint32 // align uint64 fields on 32-bit for atomics + + // Proportional sweep + pagesInUse uint64 // pages of spans in stats _MSpanInUse; R/W with mheap.lock + spanBytesAlloc uint64 // bytes of spans allocated this cycle; updated atomically + pagesSwept uint64 // pages swept this cycle; updated atomically + sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without + // TODO(austin): pagesInUse should be a uintptr, but the 386 + // compiler can't 8-byte align fields. + + // Malloc stats. + largefree uint64 // bytes freed for large objects (>maxsmallsize) + nlargefree uint64 // number of frees for large objects (>maxsmallsize) + nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) + + // range of addresses we might see in the heap + bitmap uintptr // Points to one byte past the end of the bitmap + bitmap_mapped uintptr + arena_start uintptr + arena_used uintptr // always mHeap_Map{Bits,Spans} before updating + arena_end uintptr + arena_reserved bool + + // central free lists for small size classes. + // the padding makes sure that the MCentrals are + // spaced CacheLineSize bytes apart, so that each MCentral.lock + // gets its own cache line. + central [_NumSizeClasses]struct { + mcentral mcentral + pad [sys.CacheLineSize]byte + } + + spanalloc fixalloc // allocator for span* + cachealloc fixalloc // allocator for mcache* + specialfinalizeralloc fixalloc // allocator for specialfinalizer* + specialprofilealloc fixalloc // allocator for specialprofile* + speciallock mutex // lock for special record allocators. +} + +var mheap_ mheap + +// An MSpan is a run of pages. +// +// When a MSpan is in the heap free list, state == MSpanFree +// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span. +// +// When a MSpan is allocated, state == MSpanInUse or MSpanStack +// and heapmap(i) == span for all s->start <= i < s->start+s->npages. + +// Every MSpan is in one doubly-linked list, +// either one of the MHeap's free lists or one of the +// MCentral's span lists. + +// An MSpan representing actual memory has state _MSpanInUse, +// _MSpanStack, or _MSpanFree. Transitions between these states are +// constrained as follows: +// +// * A span may transition from free to in-use or stack during any GC +// phase. +// +// * During sweeping (gcphase == _GCoff), a span may transition from +// in-use to free (as a result of sweeping) or stack to free (as a +// result of stacks being freed). +// +// * During GC (gcphase != _GCoff), a span *must not* transition from +// stack or in-use to free. Because concurrent GC may read a pointer +// and then look up its span, the span state must be monotonic. +type mSpanState uint8 + +const ( + _MSpanDead mSpanState = iota + _MSpanInUse // allocated for garbage collected heap + _MSpanStack // allocated for use by stack allocator + _MSpanFree +) + +// mSpanStateNames are the names of the span states, indexed by +// mSpanState. +var mSpanStateNames = []string{ + "_MSpanDead", + "_MSpanInUse", + "_MSpanStack", + "_MSpanFree", +} + +// mSpanList heads a linked list of spans. +// +//go:notinheap +type mSpanList struct { + first *mspan // first span in list, or nil if none + last *mspan // last span in list, or nil if none +} + +//go:notinheap +type mspan struct { + next *mspan // next span in list, or nil if none + prev *mspan // previous span in list, or nil if none + list *mSpanList // For debugging. TODO: Remove. + + startAddr uintptr // address of first byte of span aka s.base() + npages uintptr // number of pages in span + stackfreelist gclinkptr // list of free stacks, avoids overloading freelist + + // freeindex is the slot index between 0 and nelems at which to begin scanning + // for the next free object in this span. + // Each allocation scans allocBits starting at freeindex until it encounters a 0 + // indicating a free object. freeindex is then adjusted so that subsequent scans begin + // just past the the newly discovered free object. + // + // If freeindex == nelem, this span has no free objects. + // + // allocBits is a bitmap of objects in this span. + // If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0 + // then object n is free; + // otherwise, object n is allocated. Bits starting at nelem are + // undefined and should never be referenced. + // + // Object n starts at address n*elemsize + (start << pageShift). + freeindex uintptr + // TODO: Look up nelems from sizeclass and remove this field if it + // helps performance. + nelems uintptr // number of object in the span. + + // Cache of the allocBits at freeindex. allocCache is shifted + // such that the lowest bit corresponds to the bit freeindex. + // allocCache holds the complement of allocBits, thus allowing + // ctz (count trailing zero) to use it directly. + // allocCache may contain bits beyond s.nelems; the caller must ignore + // these. + allocCache uint64 + + // allocBits and gcmarkBits hold pointers to a span's mark and + // allocation bits. The pointers are 8 byte aligned. + // There are three arenas where this data is held. + // free: Dirty arenas that are no longer accessed + // and can be reused. + // next: Holds information to be used in the next GC cycle. + // current: Information being used during this GC cycle. + // previous: Information being used during the last GC cycle. + // A new GC cycle starts with the call to finishsweep_m. + // finishsweep_m moves the previous arena to the free arena, + // the current arena to the previous arena, and + // the next arena to the current arena. + // The next arena is populated as the spans request + // memory to hold gcmarkBits for the next GC cycle as well + // as allocBits for newly allocated spans. + // + // The pointer arithmetic is done "by hand" instead of using + // arrays to avoid bounds checks along critical performance + // paths. + // The sweep will free the old allocBits and set allocBits to the + // gcmarkBits. The gcmarkBits are replaced with a fresh zeroed + // out memory. + allocBits *uint8 + gcmarkBits *uint8 + + // sweep generation: + // if sweepgen == h->sweepgen - 2, the span needs sweeping + // if sweepgen == h->sweepgen - 1, the span is currently being swept + // if sweepgen == h->sweepgen, the span is swept and ready to use + // h->sweepgen is incremented by 2 after every GC + + sweepgen uint32 + divMul uint16 // for divide by elemsize - divMagic.mul + baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base + allocCount uint16 // capacity - number of objects in freelist + sizeclass uint8 // size class + incache bool // being used by an mcache + state mSpanState // mspaninuse etc + needzero uint8 // needs to be zeroed before allocation + divShift uint8 // for divide by elemsize - divMagic.shift + divShift2 uint8 // for divide by elemsize - divMagic.shift2 + elemsize uintptr // computed from sizeclass or from npages + unusedsince int64 // first time spotted by gc in mspanfree state + npreleased uintptr // number of pages released to the os + limit uintptr // end of data in span + speciallock mutex // guards specials list + specials *special // linked list of special records sorted by offset. +} + +func (s *mspan) base() uintptr { + return s.startAddr +} + +func (s *mspan) layout() (size, n, total uintptr) { + total = s.npages << _PageShift + size = s.elemsize + if size > 0 { + n = total / size + } + return +} + +func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { + h := (*mheap)(vh) + s := (*mspan)(p) + if len(h.allspans) >= cap(h.allspans) { + n := 64 * 1024 / sys.PtrSize + if n < cap(h.allspans)*3/2 { + n = cap(h.allspans) * 3 / 2 + } + var new []*mspan + sp := (*slice)(unsafe.Pointer(&new)) + sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys) + if sp.array == nil { + throw("runtime: cannot allocate memory") + } + sp.len = len(h.allspans) + sp.cap = n + if len(h.allspans) > 0 { + copy(new, h.allspans) + } + oldAllspans := h.allspans + h.allspans = new + if len(oldAllspans) != 0 { + sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys) + } + } + h.allspans = append(h.allspans, s) +} + +// inheap reports whether b is a pointer into a (potentially dead) heap object. +// It returns false for pointers into stack spans. +// Non-preemptible because it is used by write barriers. +//go:nowritebarrier +//go:nosplit +func inheap(b uintptr) bool { + if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { + return false + } + // Not a beginning of a block, consult span table to find the block beginning. + s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift] + if s == nil || b < s.base() || b >= s.limit || s.state != mSpanInUse { + return false + } + return true +} + +// inHeapOrStack is a variant of inheap that returns true for pointers into stack spans. +//go:nowritebarrier +//go:nosplit +func inHeapOrStack(b uintptr) bool { + if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used { + return false + } + // Not a beginning of a block, consult span table to find the block beginning. + s := mheap_.spans[(b-mheap_.arena_start)>>_PageShift] + if s == nil || b < s.base() { + return false + } + switch s.state { + case mSpanInUse: + return b < s.limit + case _MSpanStack: + return b < s.base()+s.npages<<_PageShift + default: + return false + } +} + +// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places. +// Use the functions instead. + +// spanOf returns the span of p. If p does not point into the heap or +// no span contains p, spanOf returns nil. +func spanOf(p uintptr) *mspan { + if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used { + return nil + } + return spanOfUnchecked(p) +} + +// spanOfUnchecked is equivalent to spanOf, but the caller must ensure +// that p points into the heap (that is, mheap_.arena_start <= p < +// mheap_.arena_used). +func spanOfUnchecked(p uintptr) *mspan { + return mheap_.spans[(p-mheap_.arena_start)>>_PageShift] +} + +func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 { + _g_ := getg() + + _g_.m.mcache.local_nlookup++ + if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 { + // purge cache stats to prevent overflow + lock(&mheap_.lock) + purgecachedstats(_g_.m.mcache) + unlock(&mheap_.lock) + } + + s := mheap_.lookupMaybe(unsafe.Pointer(v)) + if sp != nil { + *sp = s + } + if s == nil { + if base != nil { + *base = 0 + } + if size != nil { + *size = 0 + } + return 0 + } + + p := s.base() + if s.sizeclass == 0 { + // Large object. + if base != nil { + *base = p + } + if size != nil { + *size = s.npages << _PageShift + } + return 1 + } + + n := s.elemsize + if base != nil { + i := (v - p) / n + *base = p + i*n + } + if size != nil { + *size = n + } + + return 1 +} + +// Initialize the heap. +func (h *mheap) init(spansStart, spansBytes uintptr) { + h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys) + h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys) + h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys) + h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys) + + // Don't zero mspan allocations. Background sweeping can + // inspect a span concurrently with allocating it, so it's + // important that the span's sweepgen survive across freeing + // and re-allocating a span to prevent background sweeping + // from improperly cas'ing it from 0. + // + // This is safe because mspan contains no heap pointers. + h.spanalloc.zero = false + + // h->mapcache needs no init + for i := range h.free { + h.free[i].init() + h.busy[i].init() + } + + h.freelarge.init() + h.busylarge.init() + for i := range h.central { + h.central[i].mcentral.init(int32(i)) + } + + sp := (*slice)(unsafe.Pointer(&h.spans)) + sp.array = unsafe.Pointer(spansStart) + sp.len = 0 + sp.cap = int(spansBytes / sys.PtrSize) +} + +// mHeap_MapSpans makes sure that the spans are mapped +// up to the new value of arena_used. +// +// It must be called with the expected new value of arena_used, +// *before* h.arena_used has been updated. +// Waiting to update arena_used until after the memory has been mapped +// avoids faults when other threads try access the bitmap immediately +// after observing the change to arena_used. +func (h *mheap) mapSpans(arena_used uintptr) { + // Map spans array, PageSize at a time. + n := arena_used + n -= h.arena_start + n = n / _PageSize * sys.PtrSize + n = round(n, physPageSize) + need := n / unsafe.Sizeof(h.spans[0]) + have := uintptr(len(h.spans)) + if have >= need { + return + } + h.spans = h.spans[:need] + sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys) +} + +// Sweeps spans in list until reclaims at least npages into heap. +// Returns the actual number of pages reclaimed. +func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr { + n := uintptr(0) + sg := mheap_.sweepgen +retry: + for s := list.first; s != nil; s = s.next { + if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) { + list.remove(s) + // swept spans are at the end of the list + list.insertBack(s) + unlock(&h.lock) + snpages := s.npages + if s.sweep(false) { + n += snpages + } + lock(&h.lock) + if n >= npages { + return n + } + // the span could have been moved elsewhere + goto retry + } + if s.sweepgen == sg-1 { + // the span is being sweept by background sweeper, skip + continue + } + // already swept empty span, + // all subsequent ones must also be either swept or in process of sweeping + break + } + return n +} + +// Sweeps and reclaims at least npage pages into heap. +// Called before allocating npage pages. +func (h *mheap) reclaim(npage uintptr) { + // First try to sweep busy spans with large objects of size >= npage, + // this has good chances of reclaiming the necessary space. + for i := int(npage); i < len(h.busy); i++ { + if h.reclaimList(&h.busy[i], npage) != 0 { + return // Bingo! + } + } + + // Then -- even larger objects. + if h.reclaimList(&h.busylarge, npage) != 0 { + return // Bingo! + } + + // Now try smaller objects. + // One such object is not enough, so we need to reclaim several of them. + reclaimed := uintptr(0) + for i := 0; i < int(npage) && i < len(h.busy); i++ { + reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed) + if reclaimed >= npage { + return + } + } + + // Now sweep everything that is not yet swept. + unlock(&h.lock) + for { + n := sweepone() + if n == ^uintptr(0) { // all spans are swept + break + } + reclaimed += n + if reclaimed >= npage { + break + } + } + lock(&h.lock) +} + +// Allocate a new span of npage pages from the heap for GC'd memory +// and record its size class in the HeapMap and HeapMapCache. +func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan { + _g_ := getg() + lock(&h.lock) + + // To prevent excessive heap growth, before allocating n pages + // we need to sweep and reclaim at least n pages. + if h.sweepdone == 0 { + // TODO(austin): This tends to sweep a large number of + // spans in order to find a few completely free spans + // (for example, in the garbage benchmark, this sweeps + // ~30x the number of pages its trying to allocate). + // If GC kept a bit for whether there were any marks + // in a span, we could release these free spans + // at the end of GC and eliminate this entirely. + h.reclaim(npage) + } + + // transfer stats from cache to global + memstats.heap_scan += uint64(_g_.m.mcache.local_scan) + _g_.m.mcache.local_scan = 0 + memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs) + _g_.m.mcache.local_tinyallocs = 0 + + s := h.allocSpanLocked(npage) + if s != nil { + // Record span info, because gc needs to be + // able to map interior pointer to containing span. + atomic.Store(&s.sweepgen, h.sweepgen) + h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list. + s.state = _MSpanInUse + s.allocCount = 0 + s.sizeclass = uint8(sizeclass) + if sizeclass == 0 { + s.elemsize = s.npages << _PageShift + s.divShift = 0 + s.divMul = 0 + s.divShift2 = 0 + s.baseMask = 0 + } else { + s.elemsize = uintptr(class_to_size[sizeclass]) + m := &class_to_divmagic[sizeclass] + s.divShift = m.shift + s.divMul = m.mul + s.divShift2 = m.shift2 + s.baseMask = m.baseMask + } + + // update stats, sweep lists + h.pagesInUse += uint64(npage) + if large { + memstats.heap_objects++ + atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift)) + // Swept spans are at the end of lists. + if s.npages < uintptr(len(h.free)) { + h.busy[s.npages].insertBack(s) + } else { + h.busylarge.insertBack(s) + } + } + } + // heap_scan and heap_live were updated. + if gcBlackenEnabled != 0 { + gcController.revise() + } + + if trace.enabled { + traceHeapAlloc() + } + + // h.spans is accessed concurrently without synchronization + // from other threads. Hence, there must be a store/store + // barrier here to ensure the writes to h.spans above happen + // before the caller can publish a pointer p to an object + // allocated from s. As soon as this happens, the garbage + // collector running on another processor could read p and + // look up s in h.spans. The unlock acts as the barrier to + // order these writes. On the read side, the data dependency + // between p and the index in h.spans orders the reads. + unlock(&h.lock) + return s +} + +func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool) *mspan { + // Don't do any operations that lock the heap on the G stack. + // It might trigger stack growth, and the stack growth code needs + // to be able to allocate heap. + var s *mspan + systemstack(func() { + s = h.alloc_m(npage, sizeclass, large) + }) + + if s != nil { + if needzero && s.needzero != 0 { + memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift) + } + s.needzero = 0 + } + return s +} + +func (h *mheap) allocStack(npage uintptr) *mspan { + _g_ := getg() + if _g_ != _g_.m.g0 { + throw("mheap_allocstack not on g0 stack") + } + lock(&h.lock) + s := h.allocSpanLocked(npage) + if s != nil { + s.state = _MSpanStack + s.stackfreelist = 0 + s.allocCount = 0 + memstats.stacks_inuse += uint64(s.npages << _PageShift) + } + + // This unlock acts as a release barrier. See mHeap_Alloc_m. + unlock(&h.lock) + return s +} + +// Allocates a span of the given size. h must be locked. +// The returned span has been removed from the +// free list, but its state is still MSpanFree. +func (h *mheap) allocSpanLocked(npage uintptr) *mspan { + var list *mSpanList + var s *mspan + + // Try in fixed-size lists up to max. + for i := int(npage); i < len(h.free); i++ { + list = &h.free[i] + if !list.isEmpty() { + s = list.first + goto HaveSpan + } + } + + // Best fit in list of large spans. + list = &h.freelarge + s = h.allocLarge(npage) + if s == nil { + if !h.grow(npage) { + return nil + } + s = h.allocLarge(npage) + if s == nil { + return nil + } + } + +HaveSpan: + // Mark span in use. + if s.state != _MSpanFree { + throw("MHeap_AllocLocked - MSpan not free") + } + if s.npages < npage { + throw("MHeap_AllocLocked - bad npages") + } + list.remove(s) + if s.inList() { + throw("still in list") + } + if s.npreleased > 0 { + sysUsed(unsafe.Pointer(s.base()), s.npages<<_PageShift) + memstats.heap_released -= uint64(s.npreleased << _PageShift) + s.npreleased = 0 + } + + if s.npages > npage { + // Trim extra and put it back in the heap. + t := (*mspan)(h.spanalloc.alloc()) + t.init(s.base()+npage<<_PageShift, s.npages-npage) + s.npages = npage + p := (t.base() - h.arena_start) >> _PageShift + if p > 0 { + h.spans[p-1] = s + } + h.spans[p] = t + h.spans[p+t.npages-1] = t + t.needzero = s.needzero + s.state = _MSpanStack // prevent coalescing with s + t.state = _MSpanStack + h.freeSpanLocked(t, false, false, s.unusedsince) + s.state = _MSpanFree + } + s.unusedsince = 0 + + p := (s.base() - h.arena_start) >> _PageShift + for n := uintptr(0); n < npage; n++ { + h.spans[p+n] = s + } + + memstats.heap_inuse += uint64(npage << _PageShift) + memstats.heap_idle -= uint64(npage << _PageShift) + + //println("spanalloc", hex(s.start<<_PageShift)) + if s.inList() { + throw("still in list") + } + return s +} + +// Allocate a span of exactly npage pages from the list of large spans. +func (h *mheap) allocLarge(npage uintptr) *mspan { + return bestFit(&h.freelarge, npage, nil) +} + +// Search list for smallest span with >= npage pages. +// If there are multiple smallest spans, take the one +// with the earliest starting address. +func bestFit(list *mSpanList, npage uintptr, best *mspan) *mspan { + for s := list.first; s != nil; s = s.next { + if s.npages < npage { + continue + } + if best == nil || s.npages < best.npages || (s.npages == best.npages && s.base() < best.base()) { + best = s + } + } + return best +} + +// Try to add at least npage pages of memory to the heap, +// returning whether it worked. +// +// h must be locked. +func (h *mheap) grow(npage uintptr) bool { + // Ask for a big chunk, to reduce the number of mappings + // the operating system needs to track; also amortizes + // the overhead of an operating system mapping. + // Allocate a multiple of 64kB. + npage = round(npage, (64<<10)/_PageSize) + ask := npage << _PageShift + if ask < _HeapAllocChunk { + ask = _HeapAllocChunk + } + + v := h.sysAlloc(ask) + if v == nil { + if ask > npage<<_PageShift { + ask = npage << _PageShift + v = h.sysAlloc(ask) + } + if v == nil { + print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n") + return false + } + } + + // Create a fake "in use" span and free it, so that the + // right coalescing happens. + s := (*mspan)(h.spanalloc.alloc()) + s.init(uintptr(v), ask>>_PageShift) + p := (s.base() - h.arena_start) >> _PageShift + for i := p; i < p+s.npages; i++ { + h.spans[i] = s + } + atomic.Store(&s.sweepgen, h.sweepgen) + s.state = _MSpanInUse + h.pagesInUse += uint64(s.npages) + h.freeSpanLocked(s, false, true, 0) + return true +} + +// Look up the span at the given address. +// Address is guaranteed to be in map +// and is guaranteed to be start or end of span. +func (h *mheap) lookup(v unsafe.Pointer) *mspan { + p := uintptr(v) + p -= h.arena_start + return h.spans[p>>_PageShift] +} + +// Look up the span at the given address. +// Address is *not* guaranteed to be in map +// and may be anywhere in the span. +// Map entries for the middle of a span are only +// valid for allocated spans. Free spans may have +// other garbage in their middles, so we have to +// check for that. +func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan { + if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used { + return nil + } + s := h.spans[(uintptr(v)-h.arena_start)>>_PageShift] + if s == nil || uintptr(v) < s.base() || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse { + return nil + } + return s +} + +// Free the span back into the heap. +func (h *mheap) freeSpan(s *mspan, acct int32) { + systemstack(func() { + mp := getg().m + lock(&h.lock) + memstats.heap_scan += uint64(mp.mcache.local_scan) + mp.mcache.local_scan = 0 + memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs) + mp.mcache.local_tinyallocs = 0 + if msanenabled { + // Tell msan that this entire span is no longer in use. + base := unsafe.Pointer(s.base()) + bytes := s.npages << _PageShift + msanfree(base, bytes) + } + if acct != 0 { + memstats.heap_objects-- + } + if gcBlackenEnabled != 0 { + // heap_scan changed. + gcController.revise() + } + h.freeSpanLocked(s, true, true, 0) + unlock(&h.lock) + }) +} + +func (h *mheap) freeStack(s *mspan) { + _g_ := getg() + if _g_ != _g_.m.g0 { + throw("mheap_freestack not on g0 stack") + } + s.needzero = 1 + lock(&h.lock) + memstats.stacks_inuse -= uint64(s.npages << _PageShift) + h.freeSpanLocked(s, true, true, 0) + unlock(&h.lock) +} + +// s must be on a busy list (h.busy or h.busylarge) or unlinked. +func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) { + switch s.state { + case _MSpanStack: + if s.allocCount != 0 { + throw("MHeap_FreeSpanLocked - invalid stack free") + } + case _MSpanInUse: + if s.allocCount != 0 || s.sweepgen != h.sweepgen { + print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n") + throw("MHeap_FreeSpanLocked - invalid free") + } + h.pagesInUse -= uint64(s.npages) + default: + throw("MHeap_FreeSpanLocked - invalid span state") + } + + if acctinuse { + memstats.heap_inuse -= uint64(s.npages << _PageShift) + } + if acctidle { + memstats.heap_idle += uint64(s.npages << _PageShift) + } + s.state = _MSpanFree + if s.inList() { + h.busyList(s.npages).remove(s) + } + + // Stamp newly unused spans. The scavenger will use that + // info to potentially give back some pages to the OS. + s.unusedsince = unusedsince + if unusedsince == 0 { + s.unusedsince = nanotime() + } + s.npreleased = 0 + + // Coalesce with earlier, later spans. + p := (s.base() - h.arena_start) >> _PageShift + if p > 0 { + t := h.spans[p-1] + if t != nil && t.state == _MSpanFree { + s.startAddr = t.startAddr + s.npages += t.npages + s.npreleased = t.npreleased // absorb released pages + s.needzero |= t.needzero + p -= t.npages + h.spans[p] = s + h.freeList(t.npages).remove(t) + t.state = _MSpanDead + h.spanalloc.free(unsafe.Pointer(t)) + } + } + if (p + s.npages) < uintptr(len(h.spans)) { + t := h.spans[p+s.npages] + if t != nil && t.state == _MSpanFree { + s.npages += t.npages + s.npreleased += t.npreleased + s.needzero |= t.needzero + h.spans[p+s.npages-1] = s + h.freeList(t.npages).remove(t) + t.state = _MSpanDead + h.spanalloc.free(unsafe.Pointer(t)) + } + } + + // Insert s into appropriate list. + h.freeList(s.npages).insert(s) +} + +func (h *mheap) freeList(npages uintptr) *mSpanList { + if npages < uintptr(len(h.free)) { + return &h.free[npages] + } + return &h.freelarge +} + +func (h *mheap) busyList(npages uintptr) *mSpanList { + if npages < uintptr(len(h.free)) { + return &h.busy[npages] + } + return &h.busylarge +} + +func scavengelist(list *mSpanList, now, limit uint64) uintptr { + if list.isEmpty() { + return 0 + } + + var sumreleased uintptr + for s := list.first; s != nil; s = s.next { + if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages { + start := s.base() + end := start + s.npages<<_PageShift + if physPageSize > _PageSize { + // We can only release pages in + // physPageSize blocks, so round start + // and end in. (Otherwise, madvise + // will round them *out* and release + // more memory than we want.) + start = (start + physPageSize - 1) &^ (physPageSize - 1) + end &^= physPageSize - 1 + if end <= start { + // start and end don't span a + // whole physical page. + continue + } + } + len := end - start + + released := len - (s.npreleased << _PageShift) + if physPageSize > _PageSize && released == 0 { + continue + } + memstats.heap_released += uint64(released) + sumreleased += released + s.npreleased = len >> _PageShift + sysUnused(unsafe.Pointer(start), len) + } + } + return sumreleased +} + +func (h *mheap) scavenge(k int32, now, limit uint64) { + lock(&h.lock) + var sumreleased uintptr + for i := 0; i < len(h.free); i++ { + sumreleased += scavengelist(&h.free[i], now, limit) + } + sumreleased += scavengelist(&h.freelarge, now, limit) + unlock(&h.lock) + + if debug.gctrace > 0 { + if sumreleased > 0 { + print("scvg", k, ": ", sumreleased>>20, " MB released\n") + } + // TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap. + // But we can't call ReadMemStats on g0 holding locks. + print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n") + } +} + +//go:linkname runtime_debug_freeOSMemory runtime_debug.freeOSMemory +func runtime_debug_freeOSMemory() { + gcStart(gcForceBlockMode, false) + systemstack(func() { mheap_.scavenge(-1, ^uint64(0), 0) }) +} + +// Initialize a new span with the given start and npages. +func (span *mspan) init(base uintptr, npages uintptr) { + // span is *not* zeroed. + span.next = nil + span.prev = nil + span.list = nil + span.startAddr = base + span.npages = npages + span.allocCount = 0 + span.sizeclass = 0 + span.incache = false + span.elemsize = 0 + span.state = _MSpanDead + span.unusedsince = 0 + span.npreleased = 0 + span.speciallock.key = 0 + span.specials = nil + span.needzero = 0 + span.freeindex = 0 + span.allocBits = nil + span.gcmarkBits = nil +} + +func (span *mspan) inList() bool { + return span.list != nil +} + +// Initialize an empty doubly-linked list. +func (list *mSpanList) init() { + list.first = nil + list.last = nil +} + +func (list *mSpanList) remove(span *mspan) { + if span.list != list { + println("runtime: failed MSpanList_Remove", span, span.prev, span.list, list) + throw("MSpanList_Remove") + } + if list.first == span { + list.first = span.next + } else { + span.prev.next = span.next + } + if list.last == span { + list.last = span.prev + } else { + span.next.prev = span.prev + } + span.next = nil + span.prev = nil + span.list = nil +} + +func (list *mSpanList) isEmpty() bool { + return list.first == nil +} + +func (list *mSpanList) insert(span *mspan) { + if span.next != nil || span.prev != nil || span.list != nil { + println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list) + throw("MSpanList_Insert") + } + span.next = list.first + if list.first != nil { + // The list contains at least one span; link it in. + // The last span in the list doesn't change. + list.first.prev = span + } else { + // The list contains no spans, so this is also the last span. + list.last = span + } + list.first = span + span.list = list +} + +func (list *mSpanList) insertBack(span *mspan) { + if span.next != nil || span.prev != nil || span.list != nil { + println("failed MSpanList_InsertBack", span, span.next, span.prev, span.list) + throw("MSpanList_InsertBack") + } + span.prev = list.last + if list.last != nil { + // The list contains at least one span. + list.last.next = span + } else { + // The list contains no spans, so this is also the first span. + list.first = span + } + list.last = span + span.list = list +} + +const ( + _KindSpecialFinalizer = 1 + _KindSpecialProfile = 2 + // Note: The finalizer special must be first because if we're freeing + // an object, a finalizer special will cause the freeing operation + // to abort, and we want to keep the other special records around + // if that happens. +) + +//go:notinheap +type special struct { + next *special // linked list in span + offset uint16 // span offset of object + kind byte // kind of special +} + +// Adds the special record s to the list of special records for +// the object p. All fields of s should be filled in except for +// offset & next, which this routine will fill in. +// Returns true if the special was successfully added, false otherwise. +// (The add will fail only if a record with the same p and s->kind +// already exists.) +func addspecial(p unsafe.Pointer, s *special) bool { + span := mheap_.lookupMaybe(p) + if span == nil { + throw("addspecial on invalid pointer") + } + + // Ensure that the span is swept. + // Sweeping accesses the specials list w/o locks, so we have + // to synchronize with it. And it's just much safer. + mp := acquirem() + span.ensureSwept() + + offset := uintptr(p) - span.base() + kind := s.kind + + lock(&span.speciallock) + + // Find splice point, check for existing record. + t := &span.specials + for { + x := *t + if x == nil { + break + } + if offset == uintptr(x.offset) && kind == x.kind { + unlock(&span.speciallock) + releasem(mp) + return false // already exists + } + if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) { + break + } + t = &x.next + } + + // Splice in record, fill in offset. + s.offset = uint16(offset) + s.next = *t + *t = s + unlock(&span.speciallock) + releasem(mp) + + return true +} + +// Removes the Special record of the given kind for the object p. +// Returns the record if the record existed, nil otherwise. +// The caller must FixAlloc_Free the result. +func removespecial(p unsafe.Pointer, kind uint8) *special { + span := mheap_.lookupMaybe(p) + if span == nil { + throw("removespecial on invalid pointer") + } + + // Ensure that the span is swept. + // Sweeping accesses the specials list w/o locks, so we have + // to synchronize with it. And it's just much safer. + mp := acquirem() + span.ensureSwept() + + offset := uintptr(p) - span.base() + + lock(&span.speciallock) + t := &span.specials + for { + s := *t + if s == nil { + break + } + // This function is used for finalizers only, so we don't check for + // "interior" specials (p must be exactly equal to s->offset). + if offset == uintptr(s.offset) && kind == s.kind { + *t = s.next + unlock(&span.speciallock) + releasem(mp) + return s + } + t = &s.next + } + unlock(&span.speciallock) + releasem(mp) + return nil +} + +// The described object has a finalizer set for it. +// +// specialfinalizer is allocated from non-GC'd memory, so any heap +// pointers must be specially handled. +// +//go:notinheap +type specialfinalizer struct { + special special + fn *funcval // May be a heap pointer. + ft *functype // May be a heap pointer, but always live. + ot *ptrtype // May be a heap pointer, but always live. +} + +// Adds a finalizer to the object p. Returns true if it succeeded. +func addfinalizer(p unsafe.Pointer, f *funcval, ft *functype, ot *ptrtype) bool { + lock(&mheap_.speciallock) + s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc()) + unlock(&mheap_.speciallock) + s.special.kind = _KindSpecialFinalizer + s.fn = f + s.ft = ft + s.ot = ot + if addspecial(p, &s.special) { + // This is responsible for maintaining the same + // GC-related invariants as markrootSpans in any + // situation where it's possible that markrootSpans + // has already run but mark termination hasn't yet. + if gcphase != _GCoff { + _, base, _ := findObject(p) + mp := acquirem() + gcw := &mp.p.ptr().gcw + // Mark everything reachable from the object + // so it's retained for the finalizer. + scanobject(uintptr(base), gcw) + // Mark the finalizer itself, since the + // special isn't part of the GC'd heap. + scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw) + if gcBlackenPromptly { + gcw.dispose() + } + releasem(mp) + } + return true + } + + // There was an old finalizer + lock(&mheap_.speciallock) + mheap_.specialfinalizeralloc.free(unsafe.Pointer(s)) + unlock(&mheap_.speciallock) + return false +} + +// Removes the finalizer (if any) from the object p. +func removefinalizer(p unsafe.Pointer) { + s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer))) + if s == nil { + return // there wasn't a finalizer to remove + } + lock(&mheap_.speciallock) + mheap_.specialfinalizeralloc.free(unsafe.Pointer(s)) + unlock(&mheap_.speciallock) +} + +// The described object is being heap profiled. +// +//go:notinheap +type specialprofile struct { + special special + b *bucket +} + +// Set the heap profile bucket associated with addr to b. +func setprofilebucket(p unsafe.Pointer, b *bucket) { + lock(&mheap_.speciallock) + s := (*specialprofile)(mheap_.specialprofilealloc.alloc()) + unlock(&mheap_.speciallock) + s.special.kind = _KindSpecialProfile + s.b = b + if !addspecial(p, &s.special) { + throw("setprofilebucket: profile already set") + } +} + +// Do whatever cleanup needs to be done to deallocate s. It has +// already been unlinked from the MSpan specials list. +func freespecial(s *special, p unsafe.Pointer, size uintptr) { + switch s.kind { + case _KindSpecialFinalizer: + sf := (*specialfinalizer)(unsafe.Pointer(s)) + queuefinalizer(p, sf.fn, sf.ft, sf.ot) + lock(&mheap_.speciallock) + mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf)) + unlock(&mheap_.speciallock) + case _KindSpecialProfile: + sp := (*specialprofile)(unsafe.Pointer(s)) + mProf_Free(sp.b, size) + lock(&mheap_.speciallock) + mheap_.specialprofilealloc.free(unsafe.Pointer(sp)) + unlock(&mheap_.speciallock) + default: + throw("bad special kind") + panic("not reached") + } +} + +const gcBitsChunkBytes = uintptr(64 << 10) +const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{}) + +type gcBitsHeader struct { + free uintptr // free is the index into bits of the next free byte. + next uintptr // *gcBits triggers recursive type bug. (issue 14620) +} + +//go:notinheap +type gcBits struct { + // gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand. + free uintptr // free is the index into bits of the next free byte. + next *gcBits + bits [gcBitsChunkBytes - gcBitsHeaderBytes]uint8 +} + +var gcBitsArenas struct { + lock mutex + free *gcBits + next *gcBits + current *gcBits + previous *gcBits +} + +// newMarkBits returns a pointer to 8 byte aligned bytes +// to be used for a span's mark bits. +func newMarkBits(nelems uintptr) *uint8 { + lock(&gcBitsArenas.lock) + blocksNeeded := uintptr((nelems + 63) / 64) + bytesNeeded := blocksNeeded * 8 + if gcBitsArenas.next == nil || + gcBitsArenas.next.free+bytesNeeded > uintptr(len(gcBits{}.bits)) { + // Allocate a new arena. + fresh := newArena() + fresh.next = gcBitsArenas.next + gcBitsArenas.next = fresh + } + if gcBitsArenas.next.free >= gcBitsChunkBytes { + println("runtime: gcBitsArenas.next.free=", gcBitsArenas.next.free, gcBitsChunkBytes) + throw("markBits overflow") + } + result := &gcBitsArenas.next.bits[gcBitsArenas.next.free] + gcBitsArenas.next.free += bytesNeeded + unlock(&gcBitsArenas.lock) + return result +} + +// newAllocBits returns a pointer to 8 byte aligned bytes +// to be used for this span's alloc bits. +// newAllocBits is used to provide newly initialized spans +// allocation bits. For spans not being initialized the +// the mark bits are repurposed as allocation bits when +// the span is swept. +func newAllocBits(nelems uintptr) *uint8 { + return newMarkBits(nelems) +} + +// nextMarkBitArenaEpoch establishes a new epoch for the arenas +// holding the mark bits. The arenas are named relative to the +// current GC cycle which is demarcated by the call to finishweep_m. +// +// All current spans have been swept. +// During that sweep each span allocated room for its gcmarkBits in +// gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current +// where the GC will mark objects and after each span is swept these bits +// will be used to allocate objects. +// gcBitsArenas.current becomes gcBitsArenas.previous where the span's +// gcAllocBits live until all the spans have been swept during this GC cycle. +// The span's sweep extinguishes all the references to gcBitsArenas.previous +// by pointing gcAllocBits into the gcBitsArenas.current. +// The gcBitsArenas.previous is released to the gcBitsArenas.free list. +func nextMarkBitArenaEpoch() { + lock(&gcBitsArenas.lock) + if gcBitsArenas.previous != nil { + if gcBitsArenas.free == nil { + gcBitsArenas.free = gcBitsArenas.previous + } else { + // Find end of previous arenas. + last := gcBitsArenas.previous + for last = gcBitsArenas.previous; last.next != nil; last = last.next { + } + last.next = gcBitsArenas.free + gcBitsArenas.free = gcBitsArenas.previous + } + } + gcBitsArenas.previous = gcBitsArenas.current + gcBitsArenas.current = gcBitsArenas.next + gcBitsArenas.next = nil // newMarkBits calls newArena when needed + unlock(&gcBitsArenas.lock) +} + +// newArena allocates and zeroes a gcBits arena. +func newArena() *gcBits { + var result *gcBits + if gcBitsArenas.free == nil { + result = (*gcBits)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys)) + if result == nil { + throw("runtime: cannot allocate memory") + } + } else { + result = gcBitsArenas.free + gcBitsArenas.free = gcBitsArenas.free.next + memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes) + } + result.next = nil + // If result.bits is not 8 byte aligned adjust index so + // that &result.bits[result.free] is 8 byte aligned. + if uintptr(unsafe.Offsetof(gcBits{}.bits))&7 == 0 { + result.free = 0 + } else { + result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7) + } + return result +} diff --git a/libgo/go/runtime/mprof.go b/libgo/go/runtime/mprof.go index 1bfdc39..87f84a7 100644 --- a/libgo/go/runtime/mprof.go +++ b/libgo/go/runtime/mprof.go @@ -12,15 +12,6 @@ import ( "unsafe" ) -// Export temporarily for gccgo's C code to call: -//go:linkname mProf_Malloc runtime.mProf_Malloc -//go:linkname mProf_Free runtime.mProf_Free -//go:linkname mProf_GC runtime.mProf_GC -//go:linkname tracealloc runtime.tracealloc -//go:linkname tracefree runtime.tracefree -//go:linkname tracegc runtime.tracegc -//go:linkname iterate_memprof runtime.iterate_memprof - // NOTE(rsc): Everything here could use cas if contention became an issue. var proflock mutex diff --git a/libgo/go/runtime/msize.go b/libgo/go/runtime/msize.go new file mode 100644 index 0000000..438c987 --- /dev/null +++ b/libgo/go/runtime/msize.go @@ -0,0 +1,47 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Malloc small size classes. +// +// See malloc.go for overview. +// See also mksizeclasses.go for how we decide what size classes to use. + +package runtime + +// sizeToClass(0 <= n <= MaxSmallSize) returns the size class, +// 1 <= sizeclass < NumSizeClasses, for n. +// Size class 0 is reserved to mean "not small". +// +// The sizeToClass lookup is implemented using two arrays, +// one mapping sizes <= 1024 to their class and one mapping +// sizes >= 1024 and <= MaxSmallSize to their class. +// All objects are 8-aligned, so the first array is indexed by +// the size divided by 8 (rounded up). Objects >= 1024 bytes +// are 128-aligned, so the second array is indexed by the +// size divided by 128 (rounded up). The arrays are constants +// in sizeclass.go generated by mksizeclass.go. +func sizeToClass(size uint32) uint32 { + if size > _MaxSmallSize { + throw("invalid size") + } + if size > smallSizeMax-8 { + return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]) + } + return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]) +} + +// Returns size of the memory block that mallocgc will allocate if you ask for the size. +func roundupsize(size uintptr) uintptr { + if size < _MaxSmallSize { + if size <= smallSizeMax-8 { + return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]]) + } else { + return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]]) + } + } + if size+_PageSize < size { + return size + } + return round(size, _PageSize) +} diff --git a/libgo/go/runtime/mstats.go b/libgo/go/runtime/mstats.go index 178c32c..aa3cfef 100644 --- a/libgo/go/runtime/mstats.go +++ b/libgo/go/runtime/mstats.go @@ -467,10 +467,7 @@ func readmemstats_m(stats *MemStats) { // For gccgo this is in runtime/mgc0.c. func updatememstats(stats *gcstats) -/* -For gccgo these are still in runtime/mgc0.c. - -//go:linkname readGCStats runtime/debug.readGCStats +//go:linkname readGCStats runtime_debug.readGCStats func readGCStats(pauses *[]uint64) { systemstack(func() { readGCStats_m(pauses) @@ -618,7 +615,6 @@ func flushmcache(i int) { return } c.releaseAll() - stackcache_clear(c) } // flushallmcaches flushes the mcaches of all Ps. @@ -652,8 +648,6 @@ func purgecachedstats(c *mcache) { } } -*/ - // Atomically increases a given *system* memory stat. We are counting on this // stat never overflowing a uintptr, so this function must only be used for // system memory stats. diff --git a/libgo/go/runtime/netpoll.go b/libgo/go/runtime/netpoll.go index 876eaea..8932455 100644 --- a/libgo/go/runtime/netpoll.go +++ b/libgo/go/runtime/netpoll.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows package runtime diff --git a/libgo/go/runtime/netpoll_aix.go b/libgo/go/runtime/netpoll_aix.go new file mode 100644 index 0000000..e40dfb6 --- /dev/null +++ b/libgo/go/runtime/netpoll_aix.go @@ -0,0 +1,173 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +// This is based on the former libgo/runtime/netpoll_select.c implementation +// except that it uses poll instead of select and is written in Go. + +// These definitions should come from sysinfo.go as they may be OS-dependent. +// These are the definitions for the AIX operating system. +type pollfd struct { + fd int32 + events int16 + revents int16 +} + +const _POLLIN = 0x0001 +const _POLLOUT = 0x0002 +const _POLLHUP = 0x2000 +const _POLLERR = 0x4000 + +//extern poll +func libc_poll(pfds *pollfd, npfds uintptr, timeout uintptr) int32 + +//extern pipe +func libc_pipe(fd *int32) int32 + +//extern __go_fcntl_uintptr +func fcntlUintptr(fd, cmd, arg uintptr) (uintptr, uintptr) + +func closeonexec(fd int32) { + fcntlUintptr(uintptr(fd), _F_SETFD, _FD_CLOEXEC) +} + +var ( + allocated int + pfds []pollfd + mpfds map[uintptr]*pollDesc + pmtx mutex + rdwake int32 + wrwake int32 +) + +func netpollinit() { + var p [2]int32 + + // Create the pipe we use to wakeup poll. + if err := libc_pipe(&p[0]); err < 0 { + throw("netpollinit: failed to create pipe") + } + rdwake = p[0] + wrwake = p[1] + + closeonexec(rdwake) + closeonexec(wrwake) + + // Pre-allocate array of pollfd structures for poll. + allocated = 128 + pfds = make([]pollfd, allocated) + + mpfds = make(map[uintptr]*pollDesc) +} + +func netpollopen(fd uintptr, pd *pollDesc) int32 { + lock(&pmtx) + mpfds[fd] = pd + unlock(&pmtx) + + // Wakeup poll. + b := [1]byte{0} + write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1) + + return 0 +} + +func netpollclose(fd uintptr) int32 { + lock(&pmtx) + delete(mpfds, fd) + unlock(&pmtx) + + // Wakeup poll. + b := [1]byte{0} + write(uintptr(wrwake), unsafe.Pointer(&b[0]), 1) + + return 0 +} + +func netpollarm(pd *pollDesc, mode int) { + throw("unused") +} + +func netpoll(block bool) *g { + if allocated == 0 { + return nil + } + timeout := ^uintptr(0) + if !block { + timeout = 0 + } +retry: + lock(&pmtx) + npfds := len(mpfds) + 1 + unlock(&pmtx) + + if npfds > allocated { + for npfds > allocated { + allocated *= 2 + } + pfds = make([]pollfd, allocated) + } + + // Poll the read side of the pipe. + pfds[0].fd = rdwake + pfds[0].events = _POLLIN + lock(&pmtx) + // Notice that npfds may have changed since we released the lock. + // Just copy what we can, new descriptors will be added at next + // iteration. + i := 1 + for fd := range mpfds { + if i >= allocated { + break + } + pfds[i].fd = int32(fd) + pfds[i].events = _POLLIN | _POLLOUT + i++ + } + npfds = i + unlock(&pmtx) + + n := libc_poll(&pfds[0], uintptr(npfds), timeout) + if n < 0 { + e := errno() + if e != _EINTR { + throw("poll failed") + } + goto retry + } + var gp guintptr + for i = 0; i < npfds && n > 0; i++ { + pfd := pfds[i] + + var mode int32 + if pfd.revents&(_POLLIN|_POLLHUP|_POLLERR) != 0 { + if i == 0 { + var b [1]byte + read(pfd.fd, unsafe.Pointer(&b[0]), 1) + n-- + continue + } + mode += 'r' + } + if pfd.revents&(_POLLOUT|_POLLHUP|_POLLERR) != 0 { + mode += 'w' + } + if mode != 0 { + lock(&pmtx) + pd := mpfds[uintptr(pfd.fd)] + unlock(&pmtx) + if pd != nil { + netpollready(&gp, pd, mode) + } + n-- + } + } + if block && gp == 0 { + goto retry + } + return gp.ptr() +} diff --git a/libgo/go/runtime/os_aix.go b/libgo/go/runtime/os_aix.go new file mode 100644 index 0000000..246b9c3 --- /dev/null +++ b/libgo/go/runtime/os_aix.go @@ -0,0 +1,98 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +type mOS struct { + waitsema uintptr // semaphore for parking on locks +} + +//extern malloc +func libc_malloc(uintptr) unsafe.Pointer + +//go:noescape +//extern sem_init +func sem_init(sem *semt, pshared int32, value uint32) int32 + +//go:noescape +//extern sem_wait +func sem_wait(sem *semt) int32 + +//go:noescape +//extern sem_post +func sem_post(sem *semt) int32 + +//go:noescape +//extern sem_timedwait +func sem_timedwait(sem *semt, timeout *timespec) int32 + +//go:noescape +//extern clock_gettime +func clock_gettime(clock_id int64, timeout *timespec) int32 + +//go:nosplit +func semacreate(mp *m) { + if mp.mos.waitsema != 0 { + return + } + + var sem *semt + + // Call libc's malloc rather than malloc. This will + // allocate space on the C heap. We can't call malloc + // here because it could cause a deadlock. + sem = (*semt)(libc_malloc(unsafe.Sizeof(*sem))) + if sem_init(sem, 0, 0) != 0 { + throw("sem_init") + } + mp.mos.waitsema = uintptr(unsafe.Pointer(sem)) +} + +//go:nosplit +func semasleep(ns int64) int32 { + _m_ := getg().m + if ns >= 0 { + const CLOCK_REALTIME int64 = 9 + var ts timespec + + if clock_gettime(CLOCK_REALTIME, &ts) != 0 { + throw("clock_gettime") + } + ts.tv_sec += timespec_sec_t(ns / 1000000000) + ts.tv_nsec += timespec_nsec_t(ns % 1000000000) + if ts.tv_nsec >= 1000000000 { + ts.tv_sec += timespec_sec_t(1) + ts.tv_nsec -= timespec_nsec_t(1000000000) + } + + if sem_timedwait((*semt)(unsafe.Pointer(_m_.mos.waitsema)), &ts) != 0 { + err := errno() + if err == _ETIMEDOUT || err == _EAGAIN || err == _EINTR { + return -1 + } + throw("sem_timedwait") + } + return 0 + } + for { + r1 := sem_wait((*semt)(unsafe.Pointer(_m_.mos.waitsema))) + if r1 == 0 { + break + } + if errno() == _EINTR { + continue + } + throw("sem_wait") + } + return 0 +} + +//go:nosplit +func semawakeup(mp *m) { + if sem_post((*semt)(unsafe.Pointer(mp.mos.waitsema))) != 0 { + throw("sem_post") + } +} diff --git a/libgo/go/runtime/os_gccgo.go b/libgo/go/runtime/os_gccgo.go index a8f05a4..358a38b 100644 --- a/libgo/go/runtime/os_gccgo.go +++ b/libgo/go/runtime/os_gccgo.go @@ -11,6 +11,10 @@ import ( // Temporary for C code to call: //go:linkname minit runtime.minit +func goenvs() { + goenvs_unix() +} + // Called to initialize a new m (including the bootstrap m). // Called on the parent thread (main thread in case of bootstrap), can allocate memory. func mpreinit(mp *m) { diff --git a/libgo/go/runtime/os_linux.go b/libgo/go/runtime/os_linux.go index ad33486..e1a6a30 100644 --- a/libgo/go/runtime/os_linux.go +++ b/libgo/go/runtime/os_linux.go @@ -166,6 +166,3 @@ func sysauxv(auxv []uintptr) int { } return i / 2 } - -// Temporary for gccgo until we port mem_GOOS.go. -var addrspace_vec [1]byte diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go index b76bb21..aa196ae 100644 --- a/libgo/go/runtime/panic.go +++ b/libgo/go/runtime/panic.go @@ -97,7 +97,6 @@ func deferproc(frame *bool, pfn uintptr, arg unsafe.Pointer) { n.arg = arg n.retaddr = 0 n.makefunccanrecover = false - n.special = false } // Allocate a Defer, usually using per-P pool. @@ -141,10 +140,6 @@ func newdefer() *_defer { // //go:nosplit func freedefer(d *_defer) { - if d.special { - return - } - // When C code calls a Go function on a non-Go thread, the // deferred call to cgocallBackDone will set g to nil. // Don't crash trying to put d on the free list; just let it diff --git a/libgo/go/runtime/pprof/mprof_test.go b/libgo/go/runtime/pprof/mprof_test.go index 079af15..5ebd46b 100644 --- a/libgo/go/runtime/pprof/mprof_test.go +++ b/libgo/go/runtime/pprof/mprof_test.go @@ -103,9 +103,11 @@ func TestMemoryProfiler(t *testing.T) { # 0x[0-9,a-f]+ runtime_pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/mprof_test.go:74 `, memoryProfilerRun, (2<<20)*memoryProfilerRun, memoryProfilerRun, (2<<20)*memoryProfilerRun), - fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+ + // This should start with "0: 0" but gccgo's imprecise + // GC means that sometimes the value is not collected. + fmt.Sprintf(`(0|%v): (0|%v) \[%v: %v\] @( 0x[0-9,a-f]+)+ # 0x[0-9,a-f]+ pprof_test\.allocateReflectTransient\+0x[0-9,a-f]+ .*/mprof_test.go:49 -`, memoryProfilerRun, (2<<20)*memoryProfilerRun), +`, memoryProfilerRun, (2<<20)*memoryProfilerRun, memoryProfilerRun, (2<<20)*memoryProfilerRun), } for _, test := range tests { diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index ea7f84e..b28e26b 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -6,61 +6,128 @@ package runtime import ( "runtime/internal/atomic" + "runtime/internal/sys" "unsafe" ) -// Functions temporarily called by C code. +// Functions called by C code. +//go:linkname main runtime.main +//go:linkname goparkunlock runtime.goparkunlock //go:linkname newextram runtime.newextram //go:linkname acquirep runtime.acquirep //go:linkname releasep runtime.releasep //go:linkname incidlelocked runtime.incidlelocked -//go:linkname checkdead runtime.checkdead -//go:linkname sysmon runtime.sysmon -//go:linkname schedtrace runtime.schedtrace -//go:linkname allgadd runtime.allgadd -//go:linkname mcommoninit runtime.mcommoninit +//go:linkname schedinit runtime.schedinit //go:linkname ready runtime.ready //go:linkname gcprocs runtime.gcprocs -//go:linkname needaddgcproc runtime.needaddgcproc //go:linkname stopm runtime.stopm //go:linkname handoffp runtime.handoffp //go:linkname wakep runtime.wakep //go:linkname stoplockedm runtime.stoplockedm //go:linkname schedule runtime.schedule //go:linkname execute runtime.execute -//go:linkname gfput runtime.gfput +//go:linkname goexit1 runtime.goexit1 +//go:linkname reentersyscall runtime.reentersyscall +//go:linkname reentersyscallblock runtime.reentersyscallblock +//go:linkname exitsyscall runtime.exitsyscall //go:linkname gfget runtime.gfget -//go:linkname lockOSThread runtime.lockOSThread -//go:linkname unlockOSThread runtime.unlockOSThread -//go:linkname procresize runtime.procresize //go:linkname helpgc runtime.helpgc -//go:linkname stopTheWorldWithSema runtime.stopTheWorldWithSema -//go:linkname startTheWorldWithSema runtime.startTheWorldWithSema -//go:linkname mput runtime.mput -//go:linkname mget runtime.mget +//go:linkname kickoff runtime.kickoff +//go:linkname mstart1 runtime.mstart1 //go:linkname globrunqput runtime.globrunqput //go:linkname pidleget runtime.pidleget -//go:linkname runqempty runtime.runqempty -//go:linkname runqput runtime.runqput // Function called by misc/cgo/test. //go:linkname lockedOSThread runtime.lockedOSThread -// Functions temporarily in C that have not yet been ported. -func allocm(*p, bool, *unsafe.Pointer, *uintptr) *m +// C functions for thread and context management. +func newosproc(*m) func malg(bool, bool, *unsafe.Pointer, *uintptr) *g -func startm(*p, bool) -func newm(unsafe.Pointer, *p) -func gchelper() -func getfingwait() bool -func getfingwake() bool -func wakefing() *g - -// C functions for ucontext management. +func resetNewG(*g, *unsafe.Pointer, *uintptr) func gogo(*g) func setGContext() func makeGContext(*g, unsafe.Pointer, uintptr) func getTraceback(me, gp *g) +func gtraceback(*g) +func _cgo_notify_runtime_init_done() +func alreadyInCallers() bool + +// Functions created by the compiler. +//extern __go_init_main +func main_init() + +//extern main.main +func main_main() + +var buildVersion = sys.TheVersion + +// Goroutine scheduler +// The scheduler's job is to distribute ready-to-run goroutines over worker threads. +// +// The main concepts are: +// G - goroutine. +// M - worker thread, or machine. +// P - processor, a resource that is required to execute Go code. +// M must have an associated P to execute Go code, however it can be +// blocked or in a syscall w/o an associated P. +// +// Design doc at https://golang.org/s/go11sched. + +// Worker thread parking/unparking. +// We need to balance between keeping enough running worker threads to utilize +// available hardware parallelism and parking excessive running worker threads +// to conserve CPU resources and power. This is not simple for two reasons: +// (1) scheduler state is intentionally distributed (in particular, per-P work +// queues), so it is not possible to compute global predicates on fast paths; +// (2) for optimal thread management we would need to know the future (don't park +// a worker thread when a new goroutine will be readied in near future). +// +// Three rejected approaches that would work badly: +// 1. Centralize all scheduler state (would inhibit scalability). +// 2. Direct goroutine handoff. That is, when we ready a new goroutine and there +// is a spare P, unpark a thread and handoff it the thread and the goroutine. +// This would lead to thread state thrashing, as the thread that readied the +// goroutine can be out of work the very next moment, we will need to park it. +// Also, it would destroy locality of computation as we want to preserve +// dependent goroutines on the same thread; and introduce additional latency. +// 3. Unpark an additional thread whenever we ready a goroutine and there is an +// idle P, but don't do handoff. This would lead to excessive thread parking/ +// unparking as the additional threads will instantly park without discovering +// any work to do. +// +// The current approach: +// We unpark an additional thread when we ready a goroutine if (1) there is an +// idle P and there are no "spinning" worker threads. A worker thread is considered +// spinning if it is out of local work and did not find work in global run queue/ +// netpoller; the spinning state is denoted in m.spinning and in sched.nmspinning. +// Threads unparked this way are also considered spinning; we don't do goroutine +// handoff so such threads are out of work initially. Spinning threads do some +// spinning looking for work in per-P run queues before parking. If a spinning +// thread finds work it takes itself out of the spinning state and proceeds to +// execution. If it does not find work it takes itself out of the spinning state +// and then parks. +// If there is at least one spinning thread (sched.nmspinning>1), we don't unpark +// new threads when readying goroutines. To compensate for that, if the last spinning +// thread finds work and stops spinning, it must unpark a new spinning thread. +// This approach smooths out unjustified spikes of thread unparking, +// but at the same time guarantees eventual maximal CPU parallelism utilization. +// +// The main implementation complication is that we need to be very careful during +// spinning->non-spinning thread transition. This transition can race with submission +// of a new goroutine, and either one part or another needs to unpark another worker +// thread. If they both fail to do that, we can end up with semi-persistent CPU +// underutilization. The general pattern for goroutine readying is: submit a goroutine +// to local work queue, #StoreLoad-style memory barrier, check sched.nmspinning. +// The general pattern for spinning->non-spinning transition is: decrement nmspinning, +// #StoreLoad-style memory barrier, check all per-P work queues for new work. +// Note that all this complexity does not apply to global run queue as we are not +// sloppy about thread unparking when submitting to global queue. Also see comments +// for nmspinning manipulation. + +var ( + m0 m + g0 g +) // main_init_done is a signal used by cgocallbackg that initialization // has been completed. It is made before _cgo_notify_runtime_init_done, @@ -68,6 +135,159 @@ func getTraceback(me, gp *g) // it is closed, meaning cgocallbackg can reliably receive from it. var main_init_done chan bool +// runtimeInitTime is the nanotime() at which the runtime started. +var runtimeInitTime int64 + +// Value to use for signal mask for newly created M's. +var initSigmask sigset + +// The main goroutine. +func main() { + g := getg() + + // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. + // Using decimal instead of binary GB and MB because + // they look nicer in the stack overflow failure message. + if sys.PtrSize == 8 { + maxstacksize = 1000000000 + } else { + maxstacksize = 250000000 + } + + // Record when the world started. + runtimeInitTime = nanotime() + + systemstack(func() { + newm(sysmon, nil) + }) + + // Lock the main goroutine onto this, the main OS thread, + // during initialization. Most programs won't care, but a few + // do require certain calls to be made by the main thread. + // Those can arrange for main.main to run in the main thread + // by calling runtime.LockOSThread during initialization + // to preserve the lock. + lockOSThread() + + if g.m != &m0 { + throw("runtime.main not on m0") + } + + // Defer unlock so that runtime.Goexit during init does the unlock too. + needUnlock := true + defer func() { + if needUnlock { + unlockOSThread() + } + }() + + main_init_done = make(chan bool) + if iscgo { + _cgo_notify_runtime_init_done() + } + + fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime + fn() + close(main_init_done) + + needUnlock = false + unlockOSThread() + + // For gccgo we have to wait until after main is initialized + // to enable GC, because initializing main registers the GC roots. + gcenable() + + if isarchive || islibrary { + // A program compiled with -buildmode=c-archive or c-shared + // has a main, but it is not executed. + return + } + fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime + fn() + if raceenabled { + racefini() + } + + // Make racy client program work: if panicking on + // another goroutine at the same time as main returns, + // let the other goroutine finish printing the panic trace. + // Once it does, it will exit. See issue 3934. + if panicking != 0 { + gopark(nil, nil, "panicwait", traceEvGoStop, 1) + } + + exit(0) + for { + var x *int32 + *x = 0 + } +} + +// os_beforeExit is called from os.Exit(0). +//go:linkname os_beforeExit os.runtime_beforeExit +func os_beforeExit() { + if raceenabled { + racefini() + } +} + +// start forcegc helper goroutine +func init() { + go forcegchelper() +} + +func forcegchelper() { + forcegc.g = getg() + for { + lock(&forcegc.lock) + if forcegc.idle != 0 { + throw("forcegc: phase error") + } + atomic.Store(&forcegc.idle, 1) + goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock, 1) + // this goroutine is explicitly resumed by sysmon + if debug.gctrace > 0 { + println("GC forced") + } + gcStart(gcBackgroundMode, true) + } +} + +//go:nosplit + +// Gosched yields the processor, allowing other goroutines to run. It does not +// suspend the current goroutine, so execution resumes automatically. +func Gosched() { + mcall(gosched_m) +} + +// Puts the current goroutine into a waiting state and calls unlockf. +// If unlockf returns false, the goroutine is resumed. +// unlockf must not access this G's stack, as it may be moved between +// the call to gopark and the call to unlockf. +func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte, traceskip int) { + mp := acquirem() + gp := mp.curg + status := readgstatus(gp) + if status != _Grunning && status != _Gscanrunning { + throw("gopark: bad g status") + } + mp.waitlock = lock + mp.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf)) + gp.waitreason = reason + mp.waittraceev = traceEv + mp.waittraceskip = traceskip + releasem(mp) + // can't do anything that might move the G between Ms here. + mcall(park_m) +} + +// Puts the current goroutine into a waiting state and unlocks the lock. +// The goroutine can be made runnable again by calling goready(gp). +func goparkunlock(lock *mutex, reason string, traceEv byte, traceskip int) { + gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip) +} + func goready(gp *g, traceskip int) { systemstack(func() { ready(gp, traceskip, true) @@ -164,12 +384,11 @@ func releaseSudog(s *sudog) { // funcPC returns the entry PC of the function f. // It assumes that f is a func value. Otherwise the behavior is undefined. -// For gccgo here unless and until we port proc.go. -// Note that this differs from the gc implementation; the gc implementation -// adds sys.PtrSize to the address of the interface value, but GCC's -// alias analysis decides that that can not be a reference to the second -// field of the interface, and in some cases it drops the initialization -// of the second field as a dead store. +// For gccgo note that this differs from the gc implementation; the gc +// implementation adds sys.PtrSize to the address of the interface +// value, but GCC's alias analysis decides that that can not be a +// reference to the second field of the interface, and in some cases +// it drops the initialization of the second field as a dead store. //go:nosplit func funcPC(f interface{}) uintptr { i := (*iface)(unsafe.Pointer(&f)) @@ -207,6 +426,62 @@ func allgadd(gp *g) { unlock(&allglock) } +const ( + // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. + // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. + _GoidCacheBatch = 16 +) + +// The bootstrap sequence is: +// +// call osinit +// call schedinit +// make & queue new G +// call runtime·mstart +// +// The new G calls runtime·main. +func schedinit() { + _m_ := &m0 + _g_ := &g0 + _m_.g0 = _g_ + _m_.curg = _g_ + _g_.m = _m_ + setg(_g_) + + sched.maxmcount = 10000 + + tracebackinit() + mallocinit() + mcommoninit(_g_.m) + alginit() // maps must not be used before this call + + msigsave(_g_.m) + initSigmask = _g_.m.sigmask + + goargs() + goenvs() + parsedebugvars() + gcinit() + + sched.lastpoll = uint64(nanotime()) + procs := ncpu + if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { + procs = n + } + if procs > _MaxGomaxprocs { + procs = _MaxGomaxprocs + } + if procresize(procs) != nil { + throw("unknown runnable goroutine during bootstrap") + } + + if buildVersion == "" { + // Condition should never trigger. This code just serves + // to ensure runtime·buildVersion is kept in the resulting binary. + buildVersion = "unknown" + } +} + func dumpgstatus(gp *g) { _g_ := getg() print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") @@ -491,6 +766,122 @@ func casgstatus(gp *g, oldval, newval uint32) { } } +// scang blocks until gp's stack has been scanned. +// It might be scanned by scang or it might be scanned by the goroutine itself. +// Either way, the stack scan has completed when scang returns. +func scang(gp *g, gcw *gcWork) { + // Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone. + // Nothing is racing with us now, but gcscandone might be set to true left over + // from an earlier round of stack scanning (we scan twice per GC). + // We use gcscandone to record whether the scan has been done during this round. + // It is important that the scan happens exactly once: if called twice, + // the installation of stack barriers will detect the double scan and die. + + gp.gcscandone = false + + // See http://golang.org/cl/21503 for justification of the yield delay. + const yieldDelay = 10 * 1000 + var nextYield int64 + + // Endeavor to get gcscandone set to true, + // either by doing the stack scan ourselves or by coercing gp to scan itself. + // gp.gcscandone can transition from false to true when we're not looking + // (if we asked for preemption), so any time we lock the status using + // castogscanstatus we have to double-check that the scan is still not done. +loop: + for i := 0; !gp.gcscandone; i++ { + switch s := readgstatus(gp); s { + default: + dumpgstatus(gp) + throw("stopg: invalid status") + + case _Gdead: + // No stack. + gp.gcscandone = true + break loop + + case _Gcopystack: + // Stack being switched. Go around again. + + case _Grunnable, _Gsyscall, _Gwaiting: + // Claim goroutine by setting scan bit. + // Racing with execution or readying of gp. + // The scan bit keeps them from running + // the goroutine until we're done. + if castogscanstatus(gp, s, s|_Gscan) { + if gp.scanningself { + // Don't try to scan the stack + // if the goroutine is going to do + // it itself. + restartg(gp) + break + } + if !gp.gcscandone { + scanstack(gp, gcw) + gp.gcscandone = true + } + restartg(gp) + break loop + } + + case _Gscanwaiting: + // newstack is doing a scan for us right now. Wait. + + case _Gscanrunning: + // checkPreempt is scanning. Wait. + + case _Grunning: + // Goroutine running. Try to preempt execution so it can scan itself. + // The preemption handler (in newstack) does the actual scan. + + // Optimization: if there is already a pending preemption request + // (from the previous loop iteration), don't bother with the atomics. + if gp.preemptscan && gp.preempt { + break + } + + // Ask for preemption and self scan. + if castogscanstatus(gp, _Grunning, _Gscanrunning) { + if !gp.gcscandone { + gp.preemptscan = true + gp.preempt = true + } + casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) + } + } + + if i == 0 { + nextYield = nanotime() + yieldDelay + } + if nanotime() < nextYield { + procyield(10) + } else { + osyield() + nextYield = nanotime() + yieldDelay/2 + } + } + + gp.preemptscan = false // cancel scan request if no longer needed +} + +// The GC requests that this routine be moved from a scanmumble state to a mumble state. +func restartg(gp *g) { + s := readgstatus(gp) + switch s { + default: + dumpgstatus(gp) + throw("restartg: unexpected status") + + case _Gdead: + // ok + + case _Gscanrunnable, + _Gscanwaiting, + _Gscansyscall: + casfrom_Gscanstatus(gp, s, s&^_Gscan) + } +} + // stopTheWorld stops all P's from executing goroutines, interrupting // all goroutines at GC safe points and records reason as the reason // for the stop. On return, only the current goroutine's P is running. @@ -684,11 +1075,64 @@ func startTheWorldWithSema() { // coordinate. This lazy approach works out in practice: // we don't mind if the first couple gc rounds don't have quite // the maximum number of procs. - newm(unsafe.Pointer(funcPC(mhelpgc)), nil) + newm(mhelpgc, nil) } _g_.m.locks-- } +// First function run by a new goroutine. +// This is passed to makecontext. +func kickoff() { + gp := getg() + + if gp.traceback != nil { + gtraceback(gp) + } + + fv := gp.entry + param := gp.param + gp.entry = nil + gp.param = nil + fv(param) + goexit1() +} + +// This is called from mstart. +func mstart1() { + _g_ := getg() + + if _g_ != _g_.m.g0 { + throw("bad runtime·mstart") + } + + asminit() + minit() + + // Install signal handlers; after minit so that minit can + // prepare the thread to be able to handle the signals. + if _g_.m == &m0 { + // Create an extra M for callbacks on threads not created by Go. + if iscgo && !cgoHasExtraM { + cgoHasExtraM = true + newextram() + } + initsig(false) + } + + if fn := _g_.m.mstartfn; fn != nil { + fn() + } + + if _g_.m.helpgc != 0 { + _g_.m.helpgc = 0 + stopm() + } else if _g_.m != &m0 { + acquirep(_g_.m.nextp.ptr()) + _g_.m.nextp = 0 + } + schedule() +} + // forEachP calls fn(p) for every P p when p reaches a GC safe point. // If a P is currently executing code, this will bring the P to a GC // safe point and execute fn on that P. If the P is not executing code @@ -811,6 +1255,35 @@ func runSafePointFn() { unlock(&sched.lock) } +// Allocate a new m unassociated with any thread. +// Can use p for allocation context if needed. +// fn is recorded as the new m's m.mstartfn. +// +// This function is allowed to have write barriers even if the caller +// isn't because it borrows _p_. +// +//go:yeswritebarrierrec +func allocm(_p_ *p, fn func(), allocatestack bool) (mp *m, g0Stack unsafe.Pointer, g0StackSize uintptr) { + _g_ := getg() + _g_.m.locks++ // disable GC because it can be called from sysmon + if _g_.m.p == 0 { + acquirep(_p_) // temporarily borrow p for mallocs in this function + } + mp = new(m) + mp.mstartfn = fn + mcommoninit(mp) + + mp.g0 = malg(allocatestack, false, &g0Stack, &g0StackSize) + mp.g0.m = mp + + if _p_ == _g_.m.p.ptr() { + releasep() + } + _g_.m.locks-- + + return mp, g0Stack, g0StackSize +} + // needm is called when a cgo callback happens on a // thread without an m (a thread not created by Go). // In this case, needm is expected to find an m to use @@ -884,6 +1357,7 @@ func needm(x byte) { setGContext() // Initialize this thread to use the m. + asminit() minit() } @@ -915,9 +1389,7 @@ func oneNewExtraM() { // The sched.pc will never be returned to, but setting it to // goexit makes clear to the traceback routines where // the goroutine stack ends. - var g0SP unsafe.Pointer - var g0SPSize uintptr - mp := allocm(nil, true, &g0SP, &g0SPSize) + mp, g0SP, g0SPSize := allocm(nil, nil, true) gp := malg(true, false, nil, nil) gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary gp.gcscandone = true @@ -1051,6 +1523,17 @@ func unlockextra(mp *m) { atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp))) } +// Create a new m. It will start off with a call to fn, or else the scheduler. +// fn needs to be static and not a heap allocated closure. +// May run with m.p==nil, so write barriers are not allowed. +//go:nowritebarrierrec +func newm(fn func(), _p_ *p) { + mp, _, _ := allocm(_p_, fn, false) + mp.nextp.set(_p_) + mp.sigmask = initSigmask + newosproc(mp) +} + // Stops execution of the current m until new work is available. // Returns with acquired P. func stopm() { @@ -1083,6 +1566,59 @@ retry: _g_.m.nextp = 0 } +func mspinning() { + // startm's caller incremented nmspinning. Set the new M's spinning. + getg().m.spinning = true +} + +// Schedules some M to run the p (creates an M if necessary). +// If p==nil, tries to get an idle P, if no idle P's does nothing. +// May run with m.p==nil, so write barriers are not allowed. +// If spinning is set, the caller has incremented nmspinning and startm will +// either decrement nmspinning or set m.spinning in the newly started M. +//go:nowritebarrierrec +func startm(_p_ *p, spinning bool) { + lock(&sched.lock) + if _p_ == nil { + _p_ = pidleget() + if _p_ == nil { + unlock(&sched.lock) + if spinning { + // The caller incremented nmspinning, but there are no idle Ps, + // so it's okay to just undo the increment and give up. + if int32(atomic.Xadd(&sched.nmspinning, -1)) < 0 { + throw("startm: negative nmspinning") + } + } + return + } + } + mp := mget() + unlock(&sched.lock) + if mp == nil { + var fn func() + if spinning { + // The caller incremented nmspinning, so set m.spinning in the new M. + fn = mspinning + } + newm(fn, _p_) + return + } + if mp.spinning { + throw("startm: m is spinning") + } + if mp.nextp != 0 { + throw("startm: m has p") + } + if spinning && !runqempty(_p_) { + throw("startm: p has runnable gs") + } + // The caller incremented nmspinning, so set m.spinning in the new M. + mp.spinning = spinning + mp.nextp.set(_p_) + notewakeup(&mp.park) +} + // Hands off P from syscall or locked M. // Always runs without a P, so write barriers are not allowed. //go:nowritebarrierrec @@ -1281,7 +1817,7 @@ top: if _p_.runSafePointFn != 0 { runSafePointFn() } - if getfingwait() && getfingwake() { + if fingwait && fingwake { if gp := wakefing(); gp != nil { ready(gp, 0, true) } @@ -1593,6 +2129,7 @@ top: // goroutines on the global queue. // Since we preempt by storing the goroutine on the global // queue, this is the only place we need to check preempt. + // This does not call checkPreempt because gp is not running. if gp != nil && gp.preempt { gp.preempt = false lock(&sched.lock) @@ -1636,6 +2173,442 @@ func dropg() { setGNoWB(&_g_.m.curg, nil) } +func parkunlock_c(gp *g, lock unsafe.Pointer) bool { + unlock((*mutex)(lock)) + return true +} + +// park continuation on g0. +func park_m(gp *g) { + _g_ := getg() + + if trace.enabled { + traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip, gp) + } + + casgstatus(gp, _Grunning, _Gwaiting) + dropg() + + if _g_.m.waitunlockf != nil { + fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf)) + ok := fn(gp, _g_.m.waitlock) + _g_.m.waitunlockf = nil + _g_.m.waitlock = nil + if !ok { + if trace.enabled { + traceGoUnpark(gp, 2) + } + casgstatus(gp, _Gwaiting, _Grunnable) + execute(gp, true) // Schedule it back, never returns. + } + } + schedule() +} + +func goschedImpl(gp *g) { + status := readgstatus(gp) + if status&^_Gscan != _Grunning { + dumpgstatus(gp) + throw("bad g status") + } + casgstatus(gp, _Grunning, _Grunnable) + dropg() + lock(&sched.lock) + globrunqput(gp) + unlock(&sched.lock) + + schedule() +} + +// Gosched continuation on g0. +func gosched_m(gp *g) { + if trace.enabled { + traceGoSched() + } + goschedImpl(gp) +} + +func gopreempt_m(gp *g) { + if trace.enabled { + traceGoPreempt() + } + goschedImpl(gp) +} + +// Finishes execution of the current goroutine. +func goexit1() { + if trace.enabled { + traceGoEnd() + } + mcall(goexit0) +} + +// goexit continuation on g0. +func goexit0(gp *g) { + _g_ := getg() + + casgstatus(gp, _Grunning, _Gdead) + if isSystemGoroutine(gp) { + atomic.Xadd(&sched.ngsys, -1) + } + gp.m = nil + gp.lockedm = nil + _g_.m.lockedg = nil + gp.entry = nil + gp.paniconfault = false + gp._defer = nil // should be true already but just in case. + gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. + gp.writebuf = nil + gp.waitreason = "" + gp.param = nil + + // Note that gp's stack scan is now "valid" because it has no + // stack. We could dequeueRescan, but that takes a lock and + // isn't really necessary. + gp.gcscanvalid = true + dropg() + + if _g_.m.locked&^_LockExternal != 0 { + print("invalid m->locked = ", _g_.m.locked, "\n") + throw("internal lockOSThread error") + } + _g_.m.locked = 0 + gfput(_g_.m.p.ptr(), gp) + schedule() +} + +// The goroutine g is about to enter a system call. +// Record that it's not using the cpu anymore. +// This is called only from the go syscall library and cgocall, +// not from the low-level system calls used by the runtime. +// +// The entersyscall function is written in C, so that it can save the +// current register context so that the GC will see them. +// It calls reentersyscall. +// +// Syscall tracing: +// At the start of a syscall we emit traceGoSysCall to capture the stack trace. +// If the syscall does not block, that is it, we do not emit any other events. +// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock; +// when syscall returns we emit traceGoSysExit and when the goroutine starts running +// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart. +// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock, +// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick), +// whoever emits traceGoSysBlock increments p.syscalltick afterwards; +// and we wait for the increment before emitting traceGoSysExit. +// Note that the increment is done even if tracing is not enabled, +// because tracing can be enabled in the middle of syscall. We don't want the wait to hang. +// +//go:nosplit +//go:noinline +func reentersyscall(pc, sp uintptr) { + _g_ := getg() + + // Disable preemption because during this function g is in Gsyscall status, + // but can have inconsistent g->sched, do not let GC observe it. + _g_.m.locks++ + + _g_.syscallsp = sp + _g_.syscallpc = pc + casgstatus(_g_, _Grunning, _Gsyscall) + + if trace.enabled { + systemstack(traceGoSysCall) + } + + if atomic.Load(&sched.sysmonwait) != 0 { + systemstack(entersyscall_sysmon) + } + + if _g_.m.p.ptr().runSafePointFn != 0 { + // runSafePointFn may stack split if run on this stack + systemstack(runSafePointFn) + } + + _g_.m.syscalltick = _g_.m.p.ptr().syscalltick + _g_.sysblocktraced = true + _g_.m.mcache = nil + _g_.m.p.ptr().m = 0 + atomic.Store(&_g_.m.p.ptr().status, _Psyscall) + if sched.gcwaiting != 0 { + systemstack(entersyscall_gcwait) + } + + _g_.m.locks-- +} + +func entersyscall_sysmon() { + lock(&sched.lock) + if atomic.Load(&sched.sysmonwait) != 0 { + atomic.Store(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) +} + +func entersyscall_gcwait() { + _g_ := getg() + _p_ := _g_.m.p.ptr() + + lock(&sched.lock) + if sched.stopwait > 0 && atomic.Cas(&_p_.status, _Psyscall, _Pgcstop) { + if trace.enabled { + traceGoSysBlock(_p_) + traceProcStop(_p_) + } + _p_.syscalltick++ + if sched.stopwait--; sched.stopwait == 0 { + notewakeup(&sched.stopnote) + } + } + unlock(&sched.lock) +} + +// The same as reentersyscall(), but with a hint that the syscall is blocking. +//go:nosplit +func reentersyscallblock(pc, sp uintptr) { + _g_ := getg() + + _g_.m.locks++ // see comment in entersyscall + _g_.throwsplit = true + _g_.m.syscalltick = _g_.m.p.ptr().syscalltick + _g_.sysblocktraced = true + _g_.m.p.ptr().syscalltick++ + + // Leave SP around for GC and traceback. + _g_.syscallsp = sp + _g_.syscallpc = pc + casgstatus(_g_, _Grunning, _Gsyscall) + systemstack(entersyscallblock_handoff) + + _g_.m.locks-- +} + +func entersyscallblock_handoff() { + if trace.enabled { + traceGoSysCall() + traceGoSysBlock(getg().m.p.ptr()) + } + handoffp(releasep()) +} + +// The goroutine g exited its system call. +// Arrange for it to run on a cpu again. +// This is called only from the go syscall library, not +// from the low-level system calls used by the runtime. +// +// Write barriers are not allowed because our P may have been stolen. +// +//go:nosplit +//go:nowritebarrierrec +func exitsyscall(dummy int32) { + _g_ := getg() + + _g_.m.locks++ // see comment in entersyscall + + _g_.waitsince = 0 + oldp := _g_.m.p.ptr() + if exitsyscallfast() { + if _g_.m.mcache == nil { + throw("lost mcache") + } + if trace.enabled { + if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { + systemstack(traceGoStart) + } + } + // There's a cpu for us, so we can run. + _g_.m.p.ptr().syscalltick++ + // We need to cas the status and scan before resuming... + casgstatus(_g_, _Gsyscall, _Grunning) + + exitsyscallclear(_g_) + _g_.m.locks-- + _g_.throwsplit = false + return + } + + _g_.sysexitticks = 0 + if trace.enabled { + // Wait till traceGoSysBlock event is emitted. + // This ensures consistency of the trace (the goroutine is started after it is blocked). + for oldp != nil && oldp.syscalltick == _g_.m.syscalltick { + osyield() + } + // We can't trace syscall exit right now because we don't have a P. + // Tracing code can invoke write barriers that cannot run without a P. + // So instead we remember the syscall exit time and emit the event + // in execute when we have a P. + _g_.sysexitticks = cputicks() + } + + _g_.m.locks-- + + // Call the scheduler. + mcall(exitsyscall0) + + if _g_.m.mcache == nil { + throw("lost mcache") + } + + // Scheduler returned, so we're allowed to run now. + // Delete the syscallsp information that we left for + // the garbage collector during the system call. + // Must wait until now because until gosched returns + // we don't know for sure that the garbage collector + // is not running. + exitsyscallclear(_g_) + + _g_.m.p.ptr().syscalltick++ + _g_.throwsplit = false +} + +//go:nosplit +func exitsyscallfast() bool { + _g_ := getg() + + // Freezetheworld sets stopwait but does not retake P's. + if sched.stopwait == freezeStopWait { + _g_.m.mcache = nil + _g_.m.p = 0 + return false + } + + // Try to re-acquire the last P. + if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && atomic.Cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) { + // There's a cpu for us, so we can run. + exitsyscallfast_reacquired() + return true + } + + // Try to get any other idle P. + oldp := _g_.m.p.ptr() + _g_.m.mcache = nil + _g_.m.p = 0 + if sched.pidle != 0 { + var ok bool + systemstack(func() { + ok = exitsyscallfast_pidle() + if ok && trace.enabled { + if oldp != nil { + // Wait till traceGoSysBlock event is emitted. + // This ensures consistency of the trace (the goroutine is started after it is blocked). + for oldp.syscalltick == _g_.m.syscalltick { + osyield() + } + } + traceGoSysExit(0) + } + }) + if ok { + return true + } + } + return false +} + +// exitsyscallfast_reacquired is the exitsyscall path on which this G +// has successfully reacquired the P it was running on before the +// syscall. +// +// This function is allowed to have write barriers because exitsyscall +// has acquired a P at this point. +// +//go:yeswritebarrierrec +//go:nosplit +func exitsyscallfast_reacquired() { + _g_ := getg() + _g_.m.mcache = _g_.m.p.ptr().mcache + _g_.m.p.ptr().m.set(_g_.m) + if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { + if trace.enabled { + // The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed). + // traceGoSysBlock for this syscall was already emitted, + // but here we effectively retake the p from the new syscall running on the same p. + systemstack(func() { + // Denote blocking of the new syscall. + traceGoSysBlock(_g_.m.p.ptr()) + // Denote completion of the current syscall. + traceGoSysExit(0) + }) + } + _g_.m.p.ptr().syscalltick++ + } +} + +func exitsyscallfast_pidle() bool { + lock(&sched.lock) + _p_ := pidleget() + if _p_ != nil && atomic.Load(&sched.sysmonwait) != 0 { + atomic.Store(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + return true + } + return false +} + +// exitsyscall slow path on g0. +// Failed to acquire P, enqueue gp as runnable. +// +//go:nowritebarrierrec +func exitsyscall0(gp *g) { + _g_ := getg() + + casgstatus(gp, _Gsyscall, _Grunnable) + dropg() + lock(&sched.lock) + _p_ := pidleget() + if _p_ == nil { + globrunqput(gp) + } else if atomic.Load(&sched.sysmonwait) != 0 { + atomic.Store(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + if _p_ != nil { + acquirep(_p_) + execute(gp, false) // Never returns. + } + if _g_.m.lockedg != nil { + // Wait until another thread schedules gp and so m again. + stoplockedm() + execute(gp, false) // Never returns. + } + stopm() + schedule() // Never returns. +} + +// exitsyscallclear clears GC-related information that we only track +// during a syscall. +func exitsyscallclear(gp *g) { + // Garbage collector isn't running (since we are), so okay to + // clear syscallsp. + gp.syscallsp = 0 + + gp.gcstack = nil + gp.gcnextsp = nil + memclrNoHeapPointers(unsafe.Pointer(&gp.gcregs), unsafe.Sizeof(gp.gcregs)) +} + +// Code generated by cgo, and some library code, calls syscall.Entersyscall +// and syscall.Exitsyscall. + +//go:linkname syscall_entersyscall syscall.Entersyscall +//go:nosplit +func syscall_entersyscall() { + entersyscall(0) +} + +//go:linkname syscall_exitsyscall syscall.Exitsyscall +//go:nosplit +func syscall_exitsyscall() { + exitsyscall(0) +} + func beforefork() { gp := getg().m.curg @@ -1671,6 +2644,91 @@ func syscall_runtime_AfterFork() { systemstack(afterfork) } +// Create a new g running fn passing arg as the single argument. +// Put it on the queue of g's waiting to run. +// The compiler turns a go statement into a call to this. +//go:linkname newproc __go_go +func newproc(fn uintptr, arg unsafe.Pointer) *g { + _g_ := getg() + + if fn == 0 { + _g_.m.throwing = -1 // do not dump full stacks + throw("go of nil func value") + } + _g_.m.locks++ // disable preemption because it can be holding p in a local var + + _p_ := _g_.m.p.ptr() + newg := gfget(_p_) + var ( + sp unsafe.Pointer + spsize uintptr + ) + if newg == nil { + newg = malg(true, false, &sp, &spsize) + casgstatus(newg, _Gidle, _Gdead) + newg.gcRescan = -1 + allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. + } else { + resetNewG(newg, &sp, &spsize) + } + newg.traceback = nil + + if readgstatus(newg) != _Gdead { + throw("newproc1: new g is not Gdead") + } + + // Store the C function pointer into entryfn, take the address + // of entryfn, convert it to a Go function value, and store + // that in entry. + newg.entryfn = fn + var entry func(unsafe.Pointer) + *(*unsafe.Pointer)(unsafe.Pointer(&entry)) = unsafe.Pointer(&newg.entryfn) + newg.entry = entry + + newg.param = arg + newg.gopc = getcallerpc(unsafe.Pointer(&fn)) + newg.startpc = fn + if isSystemGoroutine(newg) { + atomic.Xadd(&sched.ngsys, +1) + } + // The stack is dirty from the argument frame, so queue it for + // scanning. Do this before setting it to runnable so we still + // own the G. If we're recycling a G, it may already be on the + // rescan list. + if newg.gcRescan == -1 { + queueRescan(newg) + } else { + // The recycled G is already on the rescan list. Just + // mark the stack dirty. + newg.gcscanvalid = false + } + casgstatus(newg, _Gdead, _Grunnable) + + if _p_.goidcache == _p_.goidcacheend { + // Sched.goidgen is the last allocated id, + // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. + // At startup sched.goidgen=0, so main goroutine receives goid=1. + _p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch) + _p_.goidcache -= _GoidCacheBatch - 1 + _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch + } + newg.goid = int64(_p_.goidcache) + _p_.goidcache++ + if trace.enabled { + traceGoCreate(newg, newg.startpc) + } + + makeGContext(newg, sp, spsize) + + runqput(_p_, newg, true) + + if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && runtimeInitTime != 0 { + wakep() + } + _g_.m.locks-- + return newg +} + // Put on gfree list. // If local list is too long, transfer a batch to the global list. func gfput(_p_ *p, gp *g) { @@ -1738,6 +2796,11 @@ func gfpurge(_p_ *p) { unlock(&sched.gflock) } +// Breakpoint executes a breakpoint trap. +func Breakpoint() { + breakpoint() +} + // dolockOSThread is called by LockOSThread and lockOSThread below // after they modify m.locked. Do not allow preemption during this call, // or else the m might be different in this function than in the caller. @@ -1822,6 +2885,152 @@ func mcount() int32 { return sched.mcount } +var prof struct { + lock uint32 + hz int32 +} + +func _System() { _System() } +func _ExternalCode() { _ExternalCode() } +func _GC() { _GC() } + +var _SystemPC = funcPC(_System) +var _ExternalCodePC = funcPC(_ExternalCode) +var _GCPC = funcPC(_GC) + +// Called if we receive a SIGPROF signal. +// Called by the signal handler, may run during STW. +//go:nowritebarrierrec +func sigprof(pc uintptr, gp *g, mp *m) { + if prof.hz == 0 { + return + } + + // Profiling runs concurrently with GC, so it must not allocate. + // Set a trap in case the code does allocate. + // Note that on windows, one thread takes profiles of all the + // other threads, so mp is usually not getg().m. + // In fact mp may not even be stopped. + // See golang.org/issue/17165. + getg().m.mallocing++ + + traceback := true + + // If SIGPROF arrived while already fetching runtime callers + // we can have trouble on older systems because the unwind + // library calls dl_iterate_phdr which was not reentrant in + // the past. alreadyInCallers checks for that. + if gp == nil || alreadyInCallers() { + traceback = false + } + + var stk [maxCPUProfStack]uintptr + n := 0 + if traceback { + var stklocs [maxCPUProfStack]location + n = callers(0, stklocs[:]) + + for i := 0; i < n; i++ { + stk[i] = stklocs[i].pc + } + } + + if n <= 0 { + // Normal traceback is impossible or has failed. + // Account it against abstract "System" or "GC". + n = 2 + stk[0] = pc + if mp.preemptoff != "" || mp.helpgc != 0 { + stk[1] = _GCPC + sys.PCQuantum + } else { + stk[1] = _SystemPC + sys.PCQuantum + } + } + + if prof.hz != 0 { + // Simple cas-lock to coordinate with setcpuprofilerate. + for !atomic.Cas(&prof.lock, 0, 1) { + osyield() + } + if prof.hz != 0 { + cpuprof.add(stk[:n]) + } + atomic.Store(&prof.lock, 0) + } + getg().m.mallocing-- +} + +// Use global arrays rather than using up lots of stack space in the +// signal handler. This is safe since while we are executing a SIGPROF +// signal other SIGPROF signals are blocked. +var nonprofGoStklocs [maxCPUProfStack]location +var nonprofGoStk [maxCPUProfStack]uintptr + +// sigprofNonGo is called if we receive a SIGPROF signal on a non-Go thread, +// and the signal handler collected a stack trace in sigprofCallers. +// When this is called, sigprofCallersUse will be non-zero. +// g is nil, and what we can do is very limited. +//go:nosplit +//go:nowritebarrierrec +func sigprofNonGo(pc uintptr) { + if prof.hz != 0 { + n := callers(0, nonprofGoStklocs[:]) + + for i := 0; i < n; i++ { + nonprofGoStk[i] = nonprofGoStklocs[i].pc + } + + if n <= 0 { + n = 2 + nonprofGoStk[0] = pc + nonprofGoStk[1] = _ExternalCodePC + sys.PCQuantum + } + + // Simple cas-lock to coordinate with setcpuprofilerate. + for !atomic.Cas(&prof.lock, 0, 1) { + osyield() + } + if prof.hz != 0 { + cpuprof.addNonGo(nonprofGoStk[:n]) + } + atomic.Store(&prof.lock, 0) + } +} + +// Arrange to call fn with a traceback hz times a second. +func setcpuprofilerate_m(hz int32) { + // Force sane arguments. + if hz < 0 { + hz = 0 + } + + // Disable preemption, otherwise we can be rescheduled to another thread + // that has profiling enabled. + _g_ := getg() + _g_.m.locks++ + + // Stop profiler on this thread so that it is safe to lock prof. + // if a profiling signal came in while we had prof locked, + // it would deadlock. + resetcpuprofiler(0) + + for !atomic.Cas(&prof.lock, 0, 1) { + osyield() + } + prof.hz = hz + atomic.Store(&prof.lock, 0) + + lock(&sched.lock) + sched.profilehz = hz + unlock(&sched.lock) + + if hz != 0 { + resetcpuprofiler(hz) + } + + _g_.m.locks-- +} + // Change number of processors. The world is stopped, sched is locked. // gcworkbufs are not being modified by either the GC or // the write barrier code. diff --git a/libgo/go/runtime/runtime.go b/libgo/go/runtime/runtime.go index e63130b..58710de 100644 --- a/libgo/go/runtime/runtime.go +++ b/libgo/go/runtime/runtime.go @@ -19,20 +19,17 @@ import ( // //go:linkname tickspersecond runtime.tickspersecond -var ticks struct { - lock mutex - pad uint32 // ensure 8-byte alignment of val on 386 - val uint64 -} +var ticksLock mutex +var ticksVal uint64 // Note: Called by runtime/pprof in addition to runtime code. func tickspersecond() int64 { - r := int64(atomic.Load64(&ticks.val)) + r := int64(atomic.Load64(&ticksVal)) if r != 0 { return r } - lock(&ticks.lock) - r = int64(ticks.val) + lock(&ticksLock) + r = int64(ticksVal) if r == 0 { t0 := nanotime() c0 := cputicks() @@ -46,9 +43,9 @@ func tickspersecond() int64 { if r == 0 { r++ } - atomic.Store64(&ticks.val, uint64(r)) + atomic.Store64(&ticksVal, uint64(r)) } - unlock(&ticks.lock) + unlock(&ticksLock) return r } diff --git a/libgo/go/runtime/runtime1.go b/libgo/go/runtime/runtime1.go index 99c0f11..dd3f7b2 100644 --- a/libgo/go/runtime/runtime1.go +++ b/libgo/go/runtime/runtime1.go @@ -112,10 +112,10 @@ var test_z64, test_x64 uint64 func testAtomic64() { test_z64 = 42 test_x64 = 0 - // prefetcht0(uintptr(unsafe.Pointer(&test_z64))) - // prefetcht1(uintptr(unsafe.Pointer(&test_z64))) - // prefetcht2(uintptr(unsafe.Pointer(&test_z64))) - // prefetchnta(uintptr(unsafe.Pointer(&test_z64))) + prefetcht0(uintptr(unsafe.Pointer(&test_z64))) + prefetcht1(uintptr(unsafe.Pointer(&test_z64))) + prefetcht2(uintptr(unsafe.Pointer(&test_z64))) + prefetchnta(uintptr(unsafe.Pointer(&test_z64))) if atomic.Cas64(&test_z64, test_x64, 1) { throw("cas64 failed") } @@ -151,14 +151,6 @@ func testAtomic64() { } func check() { - - // This doesn't currently work for gccgo. Because escape - // analysis is not turned on by default, the code below that - // takes the address of local variables causes memory - // allocation, but this function is called before the memory - // allocator has been initialized. - return - var ( a int8 b uint8 @@ -390,7 +382,18 @@ var dbgvars = []dbgVar{ func parsedebugvars() { // defaults debug.cgocheck = 1 - debug.invalidptr = 1 + + // Unfortunately, because gccgo uses conservative stack scanning, + // we can not enable invalid pointer checking. It is possible for + // memory block M1 to point to M2, and for both to be dead. + // We release M2, causing the entire span to be released. + // Before we release M1, a stack pointer appears that point into it. + // This stack pointer is presumably dead, but causes M1 to be marked. + // We scan M1 and see the pointer to M2 on a released span. + // At that point, if debug.invalidptr is set, we crash. + // This is not a problem, assuming that M1 really is dead and + // the pointer we discovered to it will not be used. + // debug.invalidptr = 1 for p := gogetenv("GODEBUG"); p != ""; { field := "" diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go index 195d65b..22847ea 100644 --- a/libgo/go/runtime/runtime2.go +++ b/libgo/go/runtime/runtime2.go @@ -409,16 +409,16 @@ type g struct { gcinitialsp unsafe.Pointer gcregs g_ucontext_t - entry unsafe.Pointer // goroutine entry point - fromgogo bool // whether entered from gogo function + entry func(unsafe.Pointer) // goroutine function to run + entryfn uintptr // function address passed to __go_go + fromgogo bool // whether entered from gogo function - issystem bool // do not output in stack dump - isbackground bool // ignore in deadlock detector + scanningself bool // whether goroutine is scanning its own stack traceback *tracebackg // stack traceback buffer - context g_ucontext_t // saved context for setcontext - stackcontext [10]unsafe.Pointer // split-stack context + context g_ucontext_t // saved context for setcontext + stackcontext [10]uintptr // split-stack context } type m struct { @@ -431,7 +431,7 @@ type m struct { gsignal *g // signal-handling g sigmask sigset // storage for saved signal mask // Not for gccgo: tls [6]uintptr // thread-local storage (for x86 extern register) - mstartfn uintptr + mstartfn func() curg *g // current running goroutine caughtsig guintptr // goroutine running during fatal signal p puintptr // attached p for executing go code (nil if not executing go code) @@ -541,7 +541,7 @@ type p struct { tracebuf traceBufPtr - // Not for gccgo for now: palloc persistentAlloc // per-P to avoid mutex + palloc persistentAlloc // per-P to avoid mutex // Per-P GC state gcAssistTime int64 // Nanoseconds in assistAlloc @@ -551,7 +551,7 @@ type p struct { // gcw is this P's GC work buffer cache. The work buffer is // filled by write barriers, drained by mutator assists, and // disposed on certain GC state transitions. - // Not for gccgo for now: gcw gcWork + gcw gcWork runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point @@ -714,10 +714,6 @@ type _defer struct { // function function will be somewhere in libffi, so __retaddr // is not useful. makefunccanrecover bool - - // Set to true if this defer stack entry is not part of the - // defer pool. - special bool } // panics @@ -790,7 +786,7 @@ var ( // aligned to a 16-byte boundary. We implement this by increasing the // required size and picking an appropriate offset when we use the // array. -type g_ucontext_t [(_sizeof_ucontext_t + 15) / unsafe.Sizeof(unsafe.Pointer(nil))]unsafe.Pointer +type g_ucontext_t [(_sizeof_ucontext_t + 15) / unsafe.Sizeof(uintptr(0))]uintptr // sigset is the Go version of the C type sigset_t. // _sigset_t is defined by the Makefile from <signal.h>. diff --git a/libgo/go/runtime/runtime_unix_test.go b/libgo/go/runtime/runtime_unix_test.go index e912163..b0cbbbe 100644 --- a/libgo/go/runtime/runtime_unix_test.go +++ b/libgo/go/runtime/runtime_unix_test.go @@ -6,7 +6,7 @@ // We need a fast system call to provoke the race, // and Close(-1) is nearly universally fast. -// +build darwin dragonfly freebsd linux netbsd openbsd plan9 +// +build aix darwin dragonfly freebsd linux netbsd openbsd plan9 package runtime_test diff --git a/libgo/go/runtime/signal_gccgo.go b/libgo/go/runtime/signal_gccgo.go index b4257c9..056be36 100644 --- a/libgo/go/runtime/signal_gccgo.go +++ b/libgo/go/runtime/signal_gccgo.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package runtime diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go index 279001b..b71b21e 100644 --- a/libgo/go/runtime/signal_sighandler.go +++ b/libgo/go/runtime/signal_sighandler.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package runtime @@ -29,13 +29,13 @@ func sighandler(sig uint32, info *_siginfo_t, ctxt unsafe.Pointer, gp *g) { _g_ := getg() c := sigctxt{info, ctxt} + sigfault, sigpc := getSiginfo(info, ctxt) + if sig == _SIGPROF { - sigprof() + sigprof(sigpc, gp, _g_.m) return } - sigfault, sigpc := getSiginfo(info, ctxt) - flags := int32(_SigThrow) if sig < uint32(len(sigtable)) { flags = sigtable[sig].flags diff --git a/libgo/go/runtime/signal_unix.go b/libgo/go/runtime/signal_unix.go index 13b7930..c8713b6 100644 --- a/libgo/go/runtime/signal_unix.go +++ b/libgo/go/runtime/signal_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package runtime @@ -216,7 +216,7 @@ func sigtrampgo(sig uint32, info *_siginfo_t, ctx unsafe.Pointer) { c := sigctxt{info, ctx} if sig == _SIGPROF { _, pc := getSiginfo(info, ctx) - sigprofNonGoPC(pc) + sigprofNonGo(pc) return } badsignal(uintptr(sig), &c) diff --git a/libgo/go/runtime/slice.go b/libgo/go/runtime/slice.go index 55f4454..f61f85e 100644 --- a/libgo/go/runtime/slice.go +++ b/libgo/go/runtime/slice.go @@ -60,10 +60,7 @@ func makeslice(et *_type, len, cap int) slice { panic(errorString("makeslice: cap out of range")) } - // gccgo's current garbage collector requires using newarray, - // not mallocgc here. This can change back to mallocgc when - // we port the garbage collector. - p := newarray(et, cap) + p := mallocgc(et.size*uintptr(cap), et, true) return slice{p, len, cap} } @@ -144,21 +141,14 @@ func growslice(et *_type, old slice, cap int) slice { var p unsafe.Pointer if et.kind&kindNoPointers != 0 { - // gccgo's current GC requires newarray, not mallocgc. - p = newarray(et, newcap) + p = mallocgc(capmem, nil, false) memmove(p, old.array, lenmem) - // The call to memclr is not needed for gccgo since - // the newarray function will zero the memory. - // Calling memclr is also wrong since we allocated - // newcap*et.size bytes, which is not the same as capmem. // The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length). // Only clear the part that will not be overwritten. - // memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem) - _ = newlenmem + memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem) } else { // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory. - // gccgo's current GC requires newarray, not mallocgc. - p = newarray(et, newcap) + p = mallocgc(capmem, et, true) if !writeBarrier.enabled { memmove(p, old.array, lenmem) } else { diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go index bf9f62e..a3d0918 100644 --- a/libgo/go/runtime/stubs.go +++ b/libgo/go/runtime/stubs.go @@ -51,11 +51,22 @@ func mcall(fn func(*g)) // // For the gc toolchain this permits running a function that requires // additional stack space in a context where the stack can not be -// split. For gccgo, however, stack splitting is not managed by the -// Go runtime. In effect, all stacks are system stacks. So this gccgo -// version just runs the function. +// split. We don't really need additional stack space in gccgo, since +// stack splitting is handled separately. But to keep things looking +// the same, we do switch to the g0 stack here if necessary. func systemstack(fn func()) { - fn() + gp := getg() + mp := gp.m + if gp == mp.g0 || gp == mp.gsignal { + fn() + } else if gp == mp.curg { + mcall(func(origg *g) { + fn() + gogo(origg) + }) + } else { + badsystemstack() + } } func badsystemstack() { @@ -119,26 +130,18 @@ func noescape(p unsafe.Pointer) unsafe.Pointer { return unsafe.Pointer(x ^ 0) } -//extern mincore -func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 - //go:noescape func jmpdefer(fv *funcval, argp uintptr) func exit1(code int32) -func asminit() func setg(gg *g) + +//extern __builtin_trap func breakpoint() -// reflectcall calls fn with a copy of the n argument bytes pointed at by arg. -// After fn returns, reflectcall copies n-retoffset result bytes -// back into arg+retoffset before returning. If copying result bytes back, -// the caller should pass the argument frame type as argtype, so that -// call can execute appropriate write barriers during the copy. -// Package reflect passes a frame type. In package runtime, there is only -// one call that copies results back, in cgocallbackg1, and it does NOT pass a -// frame type, meaning there are no write barriers invoked. See that call -// site for justification. -func reflectcall(argtype *_type, fn, arg unsafe.Pointer, argsize uint32, retoffset uint32) +func asminit() {} + +//go:linkname reflectcall reflect.call +func reflectcall(fntype *functype, fn *funcval, isInterface, isMethod bool, params, results *unsafe.Pointer) func procyield(cycles uint32) @@ -216,6 +219,25 @@ const _NoArgs = ^uintptr(0) //go:linkname time_now time.now func time_now() (sec int64, nsec int32) +//extern __builtin_prefetch +func prefetch(addr unsafe.Pointer, rw int32, locality int32) + +func prefetcht0(addr uintptr) { + prefetch(unsafe.Pointer(addr), 0, 3) +} + +func prefetcht1(addr uintptr) { + prefetch(unsafe.Pointer(addr), 0, 2) +} + +func prefetcht2(addr uintptr) { + prefetch(unsafe.Pointer(addr), 0, 1) +} + +func prefetchnta(addr uintptr) { + prefetch(unsafe.Pointer(addr), 0, 0) +} + // For gccgo, expose this for C callers. //go:linkname unixnanotime runtime.unixnanotime func unixnanotime() int64 { @@ -252,32 +274,12 @@ func osyield() //extern syscall func syscall(trap uintptr, a1, a2, a3, a4, a5, a6 uintptr) uintptr -// newobject allocates a new object. -// For gccgo unless and until we port malloc.go. -func newobject(*_type) unsafe.Pointer - -// newarray allocates a new array of objects. -// For gccgo unless and until we port malloc.go. -func newarray(*_type, int) unsafe.Pointer - // For gccgo, to communicate from the C code to the Go code. //go:linkname setIsCgo runtime.setIsCgo func setIsCgo() { iscgo = true } -// Temporary for gccgo until we port proc.go. -//go:linkname makeMainInitDone runtime.makeMainInitDone -func makeMainInitDone() { - main_init_done = make(chan bool) -} - -// Temporary for gccgo until we port proc.go. -//go:linkname closeMainInitDone runtime.closeMainInitDone -func closeMainInitDone() { - close(main_init_done) -} - // For gccgo, to communicate from the C code to the Go code. //go:linkname setCpuidECX runtime.setCpuidECX func setCpuidECX(v uint32) { @@ -290,82 +292,6 @@ func setSupportAES(v bool) { support_aes = v } -// typedmemmove copies a typed value. -// For gccgo for now. -//go:linkname typedmemmove runtime.typedmemmove -//go:nosplit -func typedmemmove(typ *_type, dst, src unsafe.Pointer) { - memmove(dst, src, typ.size) -} - -// Temporary for gccgo until we port mbarrier.go. -//go:linkname reflect_typedmemmove reflect.typedmemmove -func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) { - typedmemmove(typ, dst, src) -} - -// Temporary for gccgo until we port mbarrier.go. -//go:nosplit -func typedmemclr(typ *_type, ptr unsafe.Pointer) { - memclrNoHeapPointers(ptr, typ.size) -} - -// Temporary for gccgo until we port mbarrier.go. -//go:nosplit -func memclrHasPointers(ptr unsafe.Pointer, n uintptr) { - memclrNoHeapPointers(ptr, n) -} - -// Temporary for gccgo until we port mbarrier.go. -//go:linkname typedslicecopy runtime.typedslicecopy -func typedslicecopy(typ *_type, dst, src slice) int { - n := dst.len - if n > src.len { - n = src.len - } - if n == 0 { - return 0 - } - memmove(dst.array, src.array, uintptr(n)*typ.size) - return n -} - -// Temporary for gccgo until we port mbarrier.go. -//go:linkname reflect_typedslicecopy reflect.typedslicecopy -func reflect_typedslicecopy(elemType *_type, dst, src slice) int { - return typedslicecopy(elemType, dst, src) -} - -// Here for gccgo until we port malloc.go. -const ( - _64bit = 1 << (^uintptr(0) >> 63) / 2 - _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*32 - _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1) - _MaxGcproc = 32 -) - -// Here for gccgo until we port malloc.go. -//extern runtime_mallocgc -func c_mallocgc(size uintptr, typ uintptr, flag uint32) unsafe.Pointer -func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { - flag := uint32(0) - if !needzero { - flag = 1 << 3 - } - return c_mallocgc(size, uintptr(unsafe.Pointer(typ)), flag) -} - -// Here for gccgo until we port mgc.go. -var writeBarrier struct { - enabled bool // compiler emits a check of this before calling write barrier - needed bool // whether we need a write barrier for current GC phase - cgo bool // whether we need a write barrier for a cgo check - alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load -} - -func queueRescan(*g) { -} - // Here for gccgo until we port atomic_pointer.go and mgc.go. //go:nosplit func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool { @@ -379,21 +305,12 @@ func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool { func lock(l *mutex) func unlock(l *mutex) -// Here for gccgo for netpoll and Solaris. +// Here for gccgo. func errno() int // Temporary for gccgo until we port proc.go. func entersyscall(int32) func entersyscallblock(int32) -func exitsyscall(int32) -func gopark(func(*g, unsafe.Pointer) bool, unsafe.Pointer, string, byte, int) -func goparkunlock(*mutex, string, byte, int) - -// Temporary hack for gccgo until we port the garbage collector. -func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {} - -// Here for gccgo until we port msize.go. -func roundupsize(uintptr) uintptr // Here for gccgo until we port mgc.go. func GC() @@ -417,64 +334,22 @@ func getMstats() *mstats { return &memstats } -// Temporary for gccgo until we port proc.go. -func setcpuprofilerate_m(hz int32) - // Temporary for gccgo until we port mem_GOOS.go. func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) -// Temporary for gccgo until we port proc.go, so that the C signal -// handler can call into cpuprof. -//go:linkname cpuprofAdd runtime.cpuprofAdd -func cpuprofAdd(stk []uintptr) { - cpuprof.add(stk) -} - -// For gccgo until we port proc.go. -func Breakpoint() -func LockOSThread() -func UnlockOSThread() -func lockOSThread() -func unlockOSThread() - // Temporary for gccgo until we port malloc.go func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer // Temporary for gccgo until we port mheap.go func setprofilebucket(p unsafe.Pointer, b *bucket) -// Temporary for gccgo until we port mgc.go. -func setgcpercent(int32) int32 - -//go:linkname setGCPercent runtime_debug.setGCPercent -func setGCPercent(in int32) (out int32) { - return setgcpercent(in) -} - // Temporary for gccgo until we port atomic_pointer.go. //go:nosplit func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) { atomic.StorepNoWB(noescape(ptr), new) } -// Temporary for gccgo until we port mbarrier.go -func writebarrierptr(dst *uintptr, src uintptr) { - *dst = src -} - -// Temporary for gccgo until we port malloc.go -var zerobase uintptr - -//go:linkname getZerobase runtime.getZerobase -func getZerobase() *uintptr { - return &zerobase -} - -// Temporary for gccgo until we port proc.go. -func sigprof() -func goexit1() - // Get signal trampoline, written in C. func getSigtramp() uintptr @@ -547,79 +422,12 @@ func getPanicking() uint32 { return panicking } -// Temporary for gccgo until we port mcache.go. -func allocmcache() *mcache -func freemcache(*mcache) - -// Temporary for gccgo until we port mgc.go. -// This is just so that allgadd will compile. -var work struct { - rescan struct { - lock mutex - list []guintptr - } -} - -// Temporary for gccgo until we port mgc.go. -var gcBlackenEnabled uint32 - -// Temporary for gccgo until we port mgc.go. -func gcMarkWorkAvailable(p *p) bool { - return false -} - -// Temporary for gccgo until we port mgc.go. -var gcController gcControllerState - -// Temporary for gccgo until we port mgc.go. -type gcControllerState struct { -} - -// Temporary for gccgo until we port mgc.go. -func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { - return nil -} - -// Temporary for gccgo until we port mgc.go. -var gcphase uint32 - -// Temporary for gccgo until we port mgc.go. -const ( - _GCoff = iota - _GCmark - _GCmarktermination -) - -// Temporary for gccgo until we port mgc.go. -type gcMarkWorkerMode int - -// Temporary for gccgo until we port mgc.go. -const ( - gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota - gcMarkWorkerFractionalMode - gcMarkWorkerIdleMode -) - -// Temporary for gccgo until we port mheap.go. -type mheap struct { -} - -// Temporary for gccgo until we port mheap.go. -var mheap_ mheap - -// Temporary for gccgo until we port mheap.go. -func (h *mheap) scavenge(k int32, now, limit uint64) { -} - // Temporary for gccgo until we initialize ncpu in Go. //go:linkname setncpu runtime.setncpu func setncpu(n int32) { ncpu = n } -// Temporary for gccgo until we port malloc.go. -var physPageSize uintptr - // Temporary for gccgo until we reliably initialize physPageSize in Go. //go:linkname setpagesize runtime.setpagesize func setpagesize(s uintptr) { @@ -633,10 +441,20 @@ func sigprofNonGoPC(pc uintptr) { } // Temporary for gccgo until we port mgc.go. -// gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes -// to use in execution traces. -var gcMarkWorkerModeStrings = [...]string{ - "GC (dedicated)", - "GC (fractional)", - "GC (idle)", +//go:linkname runtime_m0 runtime.runtime_m0 +func runtime_m0() *m { + return &m0 +} + +// Temporary for gccgo until we port mgc.go. +//go:linkname runtime_g0 runtime.runtime_g0 +func runtime_g0() *g { + return &g0 +} + +const uintptrMask = 1<<(8*sys.PtrSize) - 1 + +type bitvector struct { + n int32 // # of bits + bytedata *uint8 } diff --git a/libgo/go/runtime/stubs2.go b/libgo/go/runtime/stubs2.go index e891fe5..490405d 100644 --- a/libgo/go/runtime/stubs2.go +++ b/libgo/go/runtime/stubs2.go @@ -18,16 +18,8 @@ func exit(code int32) func nanotime() int64 func usleep(usec uint32) -//extern mmap -func mmap(addr unsafe.Pointer, length uintptr, prot, flags, fd int32, offset uintptr) unsafe.Pointer - -//extern munmap -func munmap(addr unsafe.Pointer, n uintptr) int32 - //go:noescape func write(fd uintptr, p unsafe.Pointer, n int32) int32 //go:noescape func open(name *byte, mode, perm int32) int32 - -func madvise(addr unsafe.Pointer, n uintptr, flags int32) diff --git a/libgo/go/runtime/symtab.go b/libgo/go/runtime/symtab.go index 52e2d03..bad0347 100644 --- a/libgo/go/runtime/symtab.go +++ b/libgo/go/runtime/symtab.go @@ -115,11 +115,17 @@ func FuncForPC(pc uintptr) *Func { // Name returns the name of the function. func (f *Func) Name() string { + if f == nil { + return "" + } return f.name } // Entry returns the entry address of the function. func (f *Func) Entry() uintptr { + if f == nil { + return 0 + } return f.entry } diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go index 611aba9..d060e09 100644 --- a/libgo/go/runtime/traceback_gccgo.go +++ b/libgo/go/runtime/traceback_gccgo.go @@ -9,7 +9,7 @@ package runtime import ( "runtime/internal/sys" - _ "unsafe" // for go:linkname + "unsafe" ) // For gccgo, use go:linkname to rename compiler-called functions to @@ -20,6 +20,34 @@ import ( //go:linkname goroutineheader runtime.goroutineheader //go:linkname printcreatedby runtime.printcreatedby +var ( + // initialized in tracebackinit + runfinqPC uintptr + bgsweepPC uintptr + forcegchelperPC uintptr + timerprocPC uintptr + gcBgMarkWorkerPC uintptr +) + +func tracebackinit() { + // Go variable initialization happens late during runtime startup. + // Instead of initializing the variables above in the declarations, + // schedinit calls this function so that the variables are + // initialized and available earlier in the startup sequence. + // This doesn't use funcPC to avoid memory allocation. + // FIXME: We should be able to use funcPC when escape analysis is on. + f1 := runfinq + runfinqPC = **(**uintptr)(unsafe.Pointer(&f1)) + f2 := bgsweep + bgsweepPC = **(**uintptr)(unsafe.Pointer(&f2)) + f3 := forcegchelper + forcegchelperPC = **(**uintptr)(unsafe.Pointer(&f3)) + f4 := timerproc + timerprocPC = **(**uintptr)(unsafe.Pointer(&f4)) + f5 := gcBgMarkWorker + gcBgMarkWorkerPC = **(**uintptr)(unsafe.Pointer(&f5)) +} + func printcreatedby(gp *g) { // Show what created goroutine, except main goroutine (goid 1). pc := gp.gopc @@ -168,14 +196,26 @@ func goroutineheader(gp *g) { // isSystemGoroutine reports whether the goroutine g must be omitted in // stack dumps and deadlock detector. func isSystemGoroutine(gp *g) bool { - // FIXME. - return false + // FIXME: This doesn't work reliably for gccgo because in many + // cases the startpc field will be set to a thunk rather than + // to one of these addresses. + pc := gp.startpc + return pc == runfinqPC && !fingRunning || + pc == bgsweepPC || + pc == forcegchelperPC || + pc == timerprocPC || + pc == gcBgMarkWorkerPC } func tracebackothers(me *g) { var tb tracebackg tb.gp = me + // The getTraceback function will modify me's stack context. + // Preserve it in case we have been called via systemstack. + context := me.context + stackcontext := me.stackcontext + level, _, _ := gotraceback() // Show the current goroutine first, if we haven't already. @@ -225,4 +265,7 @@ func tracebackothers(me *g) { } } unlock(&allglock) + + me.context = context + me.stackcontext = stackcontext } diff --git a/libgo/go/runtime/type.go b/libgo/go/runtime/type.go index cfee35a..6788f24 100644 --- a/libgo/go/runtime/type.go +++ b/libgo/go/runtime/type.go @@ -9,17 +9,18 @@ package runtime import "unsafe" type _type struct { + size uintptr + ptrdata uintptr + hash uint32 kind uint8 align int8 fieldAlign uint8 _ uint8 - size uintptr - hash uint32 hashfn func(unsafe.Pointer, uintptr) uintptr equalfn func(unsafe.Pointer, unsafe.Pointer) bool - gc unsafe.Pointer + gcdata *byte string *string *uncommontype ptrToThis *_type diff --git a/libgo/go/syscall/env_unix.go b/libgo/go/syscall/env_unix.go index 5bf3336..eb93e2e 100644 --- a/libgo/go/syscall/env_unix.go +++ b/libgo/go/syscall/env_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris // Unix environment variables. diff --git a/libgo/go/syscall/exec_bsd.go b/libgo/go/syscall/exec_bsd.go index af025e4..80991ec 100644 --- a/libgo/go/syscall/exec_bsd.go +++ b/libgo/go/syscall/exec_bsd.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd netbsd openbsd solaris +// +build aix darwin dragonfly freebsd netbsd openbsd solaris package syscall @@ -235,6 +235,10 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr // Set the controlling TTY to Ctty if sys.Setctty { + if TIOCSCTTY == 0 { + err1 = ENOSYS + goto childerror + } _, err1 = raw_ioctl(sys.Ctty, TIOCSCTTY, 0) if err1 != 0 { goto childerror diff --git a/libgo/go/syscall/exec_unix.go b/libgo/go/syscall/exec_unix.go index c04005c..f2bc741 100644 --- a/libgo/go/syscall/exec_unix.go +++ b/libgo/go/syscall/exec_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // Fork, exec, wait, etc. diff --git a/libgo/go/syscall/exec_unix_test.go b/libgo/go/syscall/exec_unix_test.go index 69c4a1f..58708da 100644 --- a/libgo/go/syscall/exec_unix_test.go +++ b/libgo/go/syscall/exec_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package syscall_test diff --git a/libgo/go/syscall/export_unix_test.go b/libgo/go/syscall/export_unix_test.go index 47ec544..120500c 100644 --- a/libgo/go/syscall/export_unix_test.go +++ b/libgo/go/syscall/export_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package syscall diff --git a/libgo/go/syscall/libcall_aix.go b/libgo/go/syscall/libcall_aix.go new file mode 100644 index 0000000..992eeb4 --- /dev/null +++ b/libgo/go/syscall/libcall_aix.go @@ -0,0 +1,11 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build aix + +package syscall + +func raw_ptrace(request int, pid int, addr *byte, data *byte) Errno { + return ENOSYS +} diff --git a/libgo/go/syscall/libcall_posix_largefile.go b/libgo/go/syscall/libcall_posix_largefile.go index 1f437b4..9b13735 100644 --- a/libgo/go/syscall/libcall_posix_largefile.go +++ b/libgo/go/syscall/libcall_posix_largefile.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build linux solaris,386 solaris,sparc +// +build aix linux solaris,386 solaris,sparc // POSIX library calls on systems which use the largefile interface. diff --git a/libgo/go/syscall/libcall_posix_regfile.go b/libgo/go/syscall/libcall_posix_regfile.go index d106a7b..5b8f75a 100644 --- a/libgo/go/syscall/libcall_posix_regfile.go +++ b/libgo/go/syscall/libcall_posix_regfile.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !aix // +build !linux // +build !solaris !386 // +build !solaris !sparc diff --git a/libgo/go/syscall/libcall_posix_utimesnano.go b/libgo/go/syscall/libcall_posix_utimesnano.go index 5d9d02e..372b0d7 100644 --- a/libgo/go/syscall/libcall_posix_utimesnano.go +++ b/libgo/go/syscall/libcall_posix_utimesnano.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd openbsd netbsd solaris +// +build aix darwin dragonfly freebsd openbsd netbsd solaris // General POSIX version of UtimesNano. diff --git a/libgo/go/syscall/libcall_wait4.go b/libgo/go/syscall/libcall_wait4.go index 559d780..00b6874 100644 --- a/libgo/go/syscall/libcall_wait4.go +++ b/libgo/go/syscall/libcall_wait4.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !aix + // For systems with the wait4 library call. package syscall diff --git a/libgo/go/syscall/libcall_wait4_aix.go b/libgo/go/syscall/libcall_wait4_aix.go new file mode 100644 index 0000000..9c25d04 --- /dev/null +++ b/libgo/go/syscall/libcall_wait4_aix.go @@ -0,0 +1,26 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Handle AIX's wait4 specific behavior + +package syscall + +//sys wait4(pid Pid_t, status *_C_int, options int, rusage *Rusage) (wpid Pid_t, err error) +//wait4(pid Pid_t, status *_C_int, options _C_int, rusage *Rusage) Pid_t + +func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) { + var status _C_int + var r Pid_t + err = ERESTART + // AIX wait4 may return with ERESTART errno, while the processus is still + // active. + for err == ERESTART { + r, err = wait4(Pid_t(pid), &status, options, rusage) + } + wpid = int(r) + if wstatus != nil { + *wstatus = WaitStatus(status) + } + return +} diff --git a/libgo/go/syscall/mmap_unix_test.go b/libgo/go/syscall/mmap_unix_test.go index 01f7783..d0b3644 100644 --- a/libgo/go/syscall/mmap_unix_test.go +++ b/libgo/go/syscall/mmap_unix_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd +// +build aix darwin dragonfly freebsd linux netbsd openbsd package syscall_test diff --git a/libgo/go/syscall/signame.c b/libgo/go/syscall/signame.c index 0453c06..dca92a9 100644 --- a/libgo/go/syscall/signame.c +++ b/libgo/go/syscall/signame.c @@ -31,7 +31,7 @@ Signame (intgo sig) s = buf; } len = __builtin_strlen (s); - data = runtime_mallocgc (len, 0, FlagNoScan); + data = runtime_mallocgc (len, nil, false); __builtin_memcpy (data, s, len); ret.str = data; ret.len = len; diff --git a/libgo/go/syscall/sockcmsg_unix.go b/libgo/go/syscall/sockcmsg_unix.go index 0161699..c01602f 100644 --- a/libgo/go/syscall/sockcmsg_unix.go +++ b/libgo/go/syscall/sockcmsg_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris // Socket control messages diff --git a/libgo/go/syscall/socket_bsd.go b/libgo/go/syscall/socket_bsd.go index 0f09627..ecdab06 100644 --- a/libgo/go/syscall/socket_bsd.go +++ b/libgo/go/syscall/socket_bsd.go @@ -4,10 +4,12 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd openbsd netbsd +// +build aix darwin dragonfly freebsd openbsd netbsd package syscall +import "unsafe" + const SizeofSockaddrInet4 = 16 const SizeofSockaddrInet6 = 28 const SizeofSockaddrUnix = 110 diff --git a/libgo/go/syscall/syscall_unix.go b/libgo/go/syscall/syscall_unix.go index ddf7303..61aa1c4 100644 --- a/libgo/go/syscall/syscall_unix.go +++ b/libgo/go/syscall/syscall_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package syscall diff --git a/libgo/go/syscall/timestruct.go b/libgo/go/syscall/timestruct.go index 49c3383..6ece338 100644 --- a/libgo/go/syscall/timestruct.go +++ b/libgo/go/syscall/timestruct.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package syscall diff --git a/libgo/go/syscall/wait.c b/libgo/go/syscall/wait.c index 8c3b53f..a50f7d6 100644 --- a/libgo/go/syscall/wait.c +++ b/libgo/go/syscall/wait.c @@ -12,6 +12,10 @@ #include "runtime.h" +#ifndef WCOREDUMP +#define WCOREDUMP(status) (((status) & 0200) != 0) +#endif + extern _Bool Exited (uint32_t *w) __asm__ (GOSYM_PREFIX "syscall.Exited.N18_syscall.WaitStatus"); diff --git a/libgo/go/time/sys_unix.go b/libgo/go/time/sys_unix.go index 91d54c9..4c68bbd 100644 --- a/libgo/go/time/sys_unix.go +++ b/libgo/go/time/sys_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package time diff --git a/libgo/go/time/zoneinfo_unix.go b/libgo/go/time/zoneinfo_unix.go index 7727488..a876e27 100644 --- a/libgo/go/time/zoneinfo_unix.go +++ b/libgo/go/time/zoneinfo_unix.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin,386 darwin,amd64 dragonfly freebsd linux,!android nacl netbsd openbsd solaris +// +build aix darwin,386 darwin,amd64 dragonfly freebsd linux,!android nacl netbsd openbsd solaris // Parse "zoneinfo" time zone file. // This is a fairly standard file format used on OS X, Linux, BSD, Sun, and others. |