From dd931d9b48647e898dc80927c532ae93cc09e192 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Mon, 24 Sep 2018 21:46:21 +0000 Subject: libgo: update to Go 1.11 Reviewed-on: https://go-review.googlesource.com/136435 gotools/: * Makefile.am (mostlyclean-local): Run chmod on check-go-dir to make sure it is writable. (check-go-tools): Likewise. (check-vet): Copy internal/objabi to check-vet-dir. * Makefile.in: Rebuild. From-SVN: r264546 --- libgo/go/runtime/malloc.go | 699 +++++++++++++++++++++++++++++---------------- 1 file changed, 460 insertions(+), 239 deletions(-) (limited to 'libgo/go/runtime/malloc.go') diff --git a/libgo/go/runtime/malloc.go b/libgo/go/runtime/malloc.go index 523989e..ac4759f 100644 --- a/libgo/go/runtime/malloc.go +++ b/libgo/go/runtime/malloc.go @@ -78,9 +78,34 @@ // // 3. We don't zero pages that never get reused. +// Virtual memory layout +// +// The heap consists of a set of arenas, which are 64MB on 64-bit and +// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also +// aligned to the arena size. +// +// Each arena has an associated heapArena object that stores the +// metadata for that arena: the heap bitmap for all words in the arena +// and the span map for all pages in the arena. heapArena objects are +// themselves allocated off-heap. +// +// Since arenas are aligned, the address space can be viewed as a +// series of arena frames. The arena map (mheap_.arenas) maps from +// arena frame number to *heapArena, or nil for parts of the address +// space not backed by the Go heap. The arena map is structured as a +// two-level array consisting of a "L1" arena map and many "L2" arena +// maps; however, since arenas are large, on many architectures, the +// arena map consists of a single, large L2 map. +// +// The arena map covers the entire possible address space, allowing +// the Go heap to use any part of the address space. The allocator +// attempts to keep arenas contiguous so that large spans (and hence +// large objects) can cross arenas. + package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -124,9 +149,8 @@ const ( _TinySize = 16 _TinySizeClass = int8(2) - _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc - _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. - _HeapAllocChunk = 1 << 20 // Chunk size for heap growth + _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc + _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap. // Per-P, per order stack segment cache size. _StackCacheSize = 32 * 1024 @@ -145,25 +169,144 @@ const ( // plan9 | 4KB | 3 _NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9 - // Number of bits in page to span calculations (4k pages). - // On Windows 64-bit we limit the arena to 32GB or 35 bits. - // Windows counts memory used by page table into committed memory - // of the process, so we can't reserve too much memory. - // See https://golang.org/issue/5402 and https://golang.org/issue/5236. - // On other 64-bit platforms, we limit the arena to 512GB, or 39 bits. - // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address. - // The only exception is mips32 which only has access to low 2GB of virtual memory. - // On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory, - // but as most devices have less than 4GB of physical memory anyway, we - // try to be conservative here, and only ask for a 2GB heap. - _MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle)) - _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift - - // _MaxMem is the maximum heap arena size minus 1. + // heapAddrBits is the number of bits in a heap address. On + // amd64, addresses are sign-extended beyond heapAddrBits. On + // other arches, they are zero-extended. + // + // On 64-bit platforms, we limit this to 48 bits based on a + // combination of hardware and OS limitations. + // + // amd64 hardware limits addresses to 48 bits, sign-extended + // to 64 bits. Addresses where the top 16 bits are not either + // all 0 or all 1 are "non-canonical" and invalid. Because of + // these "negative" addresses, we offset addresses by 1<<47 + // (arenaBaseOffset) on amd64 before computing indexes into + // the heap arenas index. In 2017, amd64 hardware added + // support for 57 bit addresses; however, currently only Linux + // supports this extension and the kernel will never choose an + // address above 1<<47 unless mmap is called with a hint + // address above 1<<47 (which we never do). + // + // arm64 hardware (as of ARMv8) limits user addresses to 48 + // bits, in the range [0, 1<<48). + // + // ppc64, mips64, and s390x support arbitrary 64 bit addresses + // in hardware. However, since Go only supports Linux on + // these, we lean on OS limits. Based on Linux's processor.h, + // the user address space is limited as follows on 64-bit + // architectures: + // + // Architecture Name Maximum Value (exclusive) + // --------------------------------------------------------------------- + // amd64 TASK_SIZE_MAX 0x007ffffffff000 (47 bit addresses) + // arm64 TASK_SIZE_64 0x01000000000000 (48 bit addresses) + // ppc64{,le} TASK_SIZE_USER64 0x00400000000000 (46 bit addresses) + // mips64{,le} TASK_SIZE64 0x00010000000000 (40 bit addresses) + // s390x TASK_SIZE 1<<64 (64 bit addresses) + // + // These limits may increase over time, but are currently at + // most 48 bits except on s390x. On all architectures, Linux + // starts placing mmap'd regions at addresses that are + // significantly below 48 bits, so even if it's possible to + // exceed Go's 48 bit limit, it's extremely unlikely in + // practice. + // + // On 32-bit platforms, we accept the full 32-bit address + // space because doing so is cheap. + // mips32 only has access to the low 2GB of virtual memory, so + // we further limit it to 31 bits. + // + // WebAssembly currently has a limit of 4GB linear memory. + heapAddrBits = (_64bit*(1-sys.GoarchWasm))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + + // maxAlloc is the maximum size of an allocation. On 64-bit, + // it's theoretically possible to allocate 1<= 0; i-- { + var p uintptr switch { case GOARCH == "arm64" && GOOS == "darwin": p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) @@ -312,225 +447,283 @@ func mallocinit() { } else { p = uintptr(i)<<42 | uintptrMask&(0x70<<52) } + case raceenabled: + // The TSAN runtime requires the heap + // to be in the range [0x00c000000000, + // 0x00e000000000). + p = uintptr(i)<<32 | uintptrMask&(0x00c0<<32) + if p >= uintptrMask&0x00e000000000 { + continue + } default: p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32) } - p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) - if p != 0 { - break - } + hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) + hint.addr = p + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } - } + } else { + // On a 32-bit machine, we're much more concerned + // about keeping the usable heap contiguous. + // Hence: + // + // 1. We reserve space for all heapArenas up front so + // they don't get interleaved with the heap. They're + // ~258MB, so this isn't too bad. (We could reserve a + // smaller amount of space up front if this is a + // problem.) + // + // 2. We hint the heap to start right above the end of + // the binary so we have the best chance of keeping it + // contiguous. + // + // 3. We try to stake out a reasonably large initial + // heap reservation. - if p == 0 { - // On a 32-bit machine, we can't typically get away - // with a giant virtual address space reservation. - // Instead we map the memory information bitmap - // immediately after the data segment, large enough - // to handle the entire 4GB address space (256 MB), - // along with a reservation for an initial arena. - // When that gets used up, we'll start asking the kernel - // for any memory anywhere. + const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{}) + meta := uintptr(sysReserve(nil, arenaMetaSize)) + if meta != 0 { + mheap_.heapArenaAlloc.init(meta, arenaMetaSize) + } // We want to start the arena low, but if we're linked // against C code, it's possible global constructors // have called malloc and adjusted the process' brk. // Query the brk so we can avoid trying to map the - // arena over it (which will cause the kernel to put - // the arena somewhere else, likely at a high + // region over it (which will cause the kernel to put + // the region somewhere else, likely at a high // address). procBrk := sbrk0() - // If we fail to allocate, try again with a smaller arena. - // This is necessary on Android L where we share a process - // with ART, which reserves virtual memory aggressively. - // In the worst case, fall back to a 0-sized initial arena, - // in the hope that subsequent reservations will succeed. + // If we ask for the end of the data segment but the + // operating system requires a little more space + // before we can start allocating, it will give out a + // slightly higher pointer. Except QEMU, which is + // buggy, as usual: it won't adjust the pointer + // upward. So adjust it upward a little bit ourselves: + // 1/4 MB to get away from the running binary image. + p := getEnd() + if p < procBrk { + p = procBrk + } + if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end { + p = mheap_.heapArenaAlloc.end + } + p = round(p+(256<<10), heapArenaBytes) + // Because we're worried about fragmentation on + // 32-bit, we try to make a large initial reservation. arenaSizes := [...]uintptr{ 512 << 20, 256 << 20, 128 << 20, - 0, } - for _, arenaSize := range &arenaSizes { - // SysReserve treats the address we ask for, end, as a hint, - // not as an absolute requirement. If we ask for the end - // of the data segment but the operating system requires - // a little more space before we can start allocating, it will - // give out a slightly higher pointer. Except QEMU, which - // is buggy, as usual: it won't adjust the pointer upward. - // So adjust it upward a little bit ourselves: 1/4 MB to get - // away from the running binary image and then round up - // to a MB boundary. - p = round(getEnd()+(1<<18), 1<<20) - pSize = bitmapSize + spansSize + arenaSize + _PageSize - if p <= procBrk && procBrk < p+pSize { - // Move the start above the brk, - // leaving some room for future brk - // expansion. - p = round(procBrk+(1<<20), 1<<20) - } - p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved)) - if p != 0 { + a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes) + if a != nil { + mheap_.arena.init(uintptr(a), size) + p = uintptr(a) + size // For hint below break } } - if p == 0 { - throw("runtime: cannot reserve arena virtual address space") - } - } - - // PageSize can be larger than OS definition of page size, - // so SysReserve can give us a PageSize-unaligned pointer. - // To overcome this we ask for PageSize more and round up the pointer. - p1 := round(p, _PageSize) - pSize -= p1 - p - - spansStart := p1 - p1 += spansSize - mheap_.bitmap = p1 + bitmapSize - p1 += bitmapSize - if sys.PtrSize == 4 { - // Set arena_start such that we can accept memory - // reservations located anywhere in the 4GB virtual space. - mheap_.arena_start = 0 - } else { - mheap_.arena_start = p1 + hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc()) + hint.addr = p + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } - mheap_.arena_end = p + pSize - mheap_.arena_used = p1 - mheap_.arena_alloc = p1 - mheap_.arena_reserved = reserved - - if mheap_.arena_start&(_PageSize-1) != 0 { - println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) - throw("misrounded allocation in mallocinit") - } - - // Initialize the rest of the allocator. - mheap_.init(spansStart, spansSize) - _g_ := getg() - _g_.m.mcache = allocmcache() } -// sysAlloc allocates the next n bytes from the heap arena. The -// returned pointer is always _PageSize aligned and between -// h.arena_start and h.arena_end. sysAlloc returns nil on failure. +// sysAlloc allocates heap arena space for at least n bytes. The +// returned pointer is always heapArenaBytes-aligned and backed by +// h.arenas metadata. The returned size is always a multiple of +// heapArenaBytes. sysAlloc returns nil on failure. // There is no corresponding free function. -func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer { - // strandLimit is the maximum number of bytes to strand from - // the current arena block. If we would need to strand more - // than this, we fall back to sysAlloc'ing just enough for - // this allocation. - const strandLimit = 16 << 20 - - if n > h.arena_end-h.arena_alloc { - // If we haven't grown the arena to _MaxMem yet, try - // to reserve some more address space. - p_size := round(n+_PageSize, 256<<20) - new_end := h.arena_end + p_size // Careful: can overflow - if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem { - // TODO: It would be bad if part of the arena - // is reserved and part is not. - var reserved bool - p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved)) - if p == 0 { - // TODO: Try smaller reservation - // growths in case we're in a crowded - // 32-bit address space. - goto reservationFailed - } - // p can be just about anywhere in the address - // space, including before arena_end. - if p == h.arena_end { - // The new block is contiguous with - // the current block. Extend the - // current arena block. - h.arena_end = new_end - h.arena_reserved = reserved - } else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit { - // We were able to reserve more memory - // within the arena space, but it's - // not contiguous with our previous - // reservation. It could be before or - // after our current arena_used. - // - // Keep everything page-aligned. - // Our pages are bigger than hardware pages. - h.arena_end = p + p_size - p = round(p, _PageSize) - h.arena_alloc = p - h.arena_reserved = reserved - } else { - // We got a mapping, but either - // - // 1) It's not in the arena, so we - // can't use it. (This should never - // happen on 32-bit.) - // - // 2) We would need to discard too - // much of our current arena block to - // use it. - // - // We haven't added this allocation to - // the stats, so subtract it from a - // fake stat (but avoid underflow). - // - // We'll fall back to a small sysAlloc. - stat := uint64(p_size) - sysFree(unsafe.Pointer(p), p_size, &stat) +// +// h must be locked. +func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { + n = round(n, heapArenaBytes) + + // First, try the arena pre-reservation. + v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys) + if v != nil { + size = n + goto mapped + } + + // Try to grow the heap at a hint address. + for h.arenaHints != nil { + hint := h.arenaHints + p := hint.addr + if hint.down { + p -= n + } + if p+n < p { + // We can't use this, so don't ask. + v = nil + } else if arenaIndex(p+n-1) >= 1< h.arena_used { - h.setArenaUsed(h.arena_alloc, true) + if size == 0 { + if raceenabled { + // The race detector assumes the heap lives in + // [0x00c000000000, 0x00e000000000), but we + // just ran out of hints in this region. Give + // a nice failure. + throw("too many address space collisions for -race mode") } - if p&(_PageSize-1) != 0 { - throw("misrounded allocation in MHeap_SysAlloc") + // All of the hints failed, so we'll take any + // (sufficiently aligned) address the kernel will give + // us. + v, size = sysReserveAligned(nil, n, heapArenaBytes) + if v == nil { + return nil, 0 } - return unsafe.Pointer(p) + + // Create new hints for extending this region. + hint := (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr, hint.down = uintptr(v), true + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + hint = (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr = uintptr(v) + size + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint } -reservationFailed: - // If using 64-bit, our reservation is all we have. - if sys.PtrSize != 4 { - return nil + // Check for bad pointers or pointers we can't use. + { + var bad string + p := uintptr(v) + if p+size < p { + bad = "region exceeds uintptr range" + } else if arenaIndex(p) >= 1<= 1< _MaxMem { - // This shouldn't be possible because _MaxMem is the - // whole address space on 32-bit. - top := uint64(h.arena_start) + _MaxMem - print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n") - sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys) - return nil + // Back the reservation. + sysMap(v, size, &memstats.heap_sys) + +mapped: + // Create arena metadata. + for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ { + l2 := h.arenas[ri.l1()] + if l2 == nil { + // Allocate an L2 arena map. + l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil)) + if l2 == nil { + throw("out of memory allocating heap arena map") + } + atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2)) + } + + if l2[ri.l2()] != nil { + throw("arena already initialized") + } + var r *heapArena + r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + if r == nil { + throw("out of memory allocating heap arena metadata") + } + } + + // Store atomically just in case an object from the + // new heap arena becomes visible before the heap lock + // is released (which shouldn't happen, but there's + // little downside to this). + atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r)) } - p += -p & (_PageSize - 1) - if p+n > h.arena_used { - h.setArenaUsed(p+n, true) + // Tell the race detector about the new heap memory. + if raceenabled { + racemapshadow(v, size) } - if p&(_PageSize-1) != 0 { - throw("misrounded allocation in MHeap_SysAlloc") + return +} + +// sysReserveAligned is like sysReserve, but the returned pointer is +// aligned to align bytes. It may reserve either n or n+align bytes, +// so it returns the size that was reserved. +func sysReserveAligned(v unsafe.Pointer, size, align uintptr) (unsafe.Pointer, uintptr) { + // Since the alignment is rather large in uses of this + // function, we're not likely to get it by chance, so we ask + // for a larger region and remove the parts we don't need. + retries := 0 +retry: + p := uintptr(sysReserve(v, size+align)) + switch { + case p == 0: + return nil, 0 + case p&(align-1) == 0: + // We got lucky and got an aligned region, so we can + // use the whole thing. + return unsafe.Pointer(p), size + align + case GOOS == "windows": + // On Windows we can't release pieces of a + // reservation, so we release the whole thing and + // re-reserve the aligned sub-region. This may race, + // so we may have to try again. + sysFree(unsafe.Pointer(p), size+align, nil) + p = round(p, align) + p2 := sysReserve(unsafe.Pointer(p), size) + if p != uintptr(p2) { + // Must have raced. Try again. + sysFree(p2, size, nil) + if retries++; retries == 100 { + throw("failed to allocate aligned heap memory; too many retries") + } + goto retry + } + // Success. + return p2, size + default: + // Trim off the unaligned parts. + pAligned := round(p, align) + sysFree(unsafe.Pointer(p), pAligned-p, nil) + end := pAligned + size + endLen := (p + size + align) - end + if endLen > 0 { + sysFree(unsafe.Pointer(end), endLen, nil) + } + return unsafe.Pointer(pAligned), size } - return unsafe.Pointer(p) } // base address for all 0-byte allocations @@ -862,7 +1055,7 @@ func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { throw("out of memory") } s.limit = s.base() + size - heapBitsForSpan(s.base()).initSpan(s) + heapBitsForAddr(s.base()).initSpan(s) return s } @@ -875,7 +1068,7 @@ func newobject(typ *_type) unsafe.Pointer { //go:linkname reflect_unsafe_New reflect.unsafe_New func reflect_unsafe_New(typ *_type) unsafe.Pointer { - return newobject(typ) + return mallocgc(typ.size, typ, true) } // newarray allocates an array of n elements of type typ. @@ -1046,6 +1239,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { return p } +// linearAlloc is a simple linear allocator that pre-reserves a region +// of memory and then maps that region as needed. The caller is +// responsible for locking. +type linearAlloc struct { + next uintptr // next free byte + mapped uintptr // one byte past end of mapped space + end uintptr // end of reserved space +} + +func (l *linearAlloc) init(base, size uintptr) { + l.next, l.mapped = base, base + l.end = base + size +} + +func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { + p := round(l.next, align) + if p+size > l.end { + return nil + } + l.next = p + size + if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped { + // We need to map more of the reserved space. + sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, sysStat) + l.mapped = pEnd + } + return unsafe.Pointer(p) +} + // notInHeap is off-heap memory allocated by a lower-level allocator // like sysAlloc or persistentAlloc. // -- cgit v1.1