diff options
Diffstat (limited to 'libgo/go/runtime/mgcscavenge.go')
-rw-r--r-- | libgo/go/runtime/mgcscavenge.go | 395 |
1 files changed, 205 insertions, 190 deletions
diff --git a/libgo/go/runtime/mgcscavenge.go b/libgo/go/runtime/mgcscavenge.go index 3fa1c46..adf2b05 100644 --- a/libgo/go/runtime/mgcscavenge.go +++ b/libgo/go/runtime/mgcscavenge.go @@ -56,6 +56,7 @@ package runtime import ( + "internal/goos" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -90,7 +91,7 @@ const ( // // This ratio is used as part of multiplicative factor to help the scavenger account // for the additional costs of using scavenged memory in its pacing. - scavengeCostRatio = 0.7 * (sys.GoosDarwin + sys.GoosIos) + scavengeCostRatio = 0.7 * (goos.IsDarwin + goos.IsIos) // scavengeReservationShards determines the amount of memory the scavenger // should reserve for scavenging at a time. Specifically, the amount of @@ -104,7 +105,8 @@ func heapRetained() uint64 { } // gcPaceScavenger updates the scavenger's pacing, particularly -// its rate and RSS goal. +// its rate and RSS goal. For this, it requires the current heapGoal, +// and the heapGoal for the previous GC cycle. // // The RSS goal is based on the current heap goal with a small overhead // to accommodate non-determinism in the allocator. @@ -112,18 +114,22 @@ func heapRetained() uint64 { // The pacing is based on scavengePageRate, which applies to both regular and // huge pages. See that constant for more information. // +// Must be called whenever GC pacing is updated. +// // mheap_.lock must be held or the world must be stopped. -func gcPaceScavenger() { +func gcPaceScavenger(heapGoal, lastHeapGoal uint64) { + assertWorldStoppedOrLockHeld(&mheap_.lock) + // If we're called before the first GC completed, disable scavenging. // We never scavenge before the 2nd GC cycle anyway (we don't have enough // information about the heap yet) so this is fine, and avoids a fault // or garbage data later. - if gcController.lastHeapGoal == 0 { - mheap_.scavengeGoal = ^uint64(0) + if lastHeapGoal == 0 { + atomic.Store64(&mheap_.scavengeGoal, ^uint64(0)) return } // Compute our scavenging goal. - goalRatio := float64(atomic.Load64(&gcController.heapGoal)) / float64(gcController.lastHeapGoal) + goalRatio := float64(heapGoal) / float64(lastHeapGoal) retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio) // Add retainExtraPercent overhead to retainedGoal. This calculation // looks strange but the purpose is to arrive at an integer division @@ -151,10 +157,10 @@ func gcPaceScavenger() { // the background scavenger. We disable the background scavenger if there's // less than one physical page of work to do because it's not worth it. if retainedNow <= retainedGoal || retainedNow-retainedGoal < uint64(physPageSize) { - mheap_.scavengeGoal = ^uint64(0) + atomic.Store64(&mheap_.scavengeGoal, ^uint64(0)) return } - mheap_.scavengeGoal = retainedGoal + atomic.Store64(&mheap_.scavengeGoal, retainedGoal) } // Sleep/wait state of the background scavenger. @@ -249,7 +255,7 @@ func scavengeSleep(ns int64) int64 { // The background scavenger maintains the RSS of the application below // the line described by the proportional scavenging statistics in // the mheap struct. -func bgscavenge() { +func bgscavenge(c chan int) { setSystemGoroutine() scavenge.g = getg() @@ -259,56 +265,93 @@ func bgscavenge() { scavenge.parked = true scavenge.timer = new(timer) - scavenge.timer.f = func(_ interface{}, _ uintptr) { + scavenge.timer.f = func(_ any, _ uintptr) { wakeScavenger() } - gcenable_setup <- 1 + c <- 1 goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1) - // Exponentially-weighted moving average of the fraction of time this - // goroutine spends scavenging (that is, percent of a single CPU). - // It represents a measure of scheduling overheads which might extend - // the sleep or the critical time beyond what's expected. Assume no - // overhead to begin with. - // - // TODO(mknyszek): Consider making this based on total CPU time of the - // application (i.e. scavengePercent * GOMAXPROCS). This isn't really - // feasible now because the scavenger acquires the heap lock over the - // scavenging operation, which means scavenging effectively blocks - // allocators and isn't scalable. However, given a scalable allocator, - // it makes sense to also make the scavenger scale with it; if you're - // allocating more frequently, then presumably you're also generating - // more work for the scavenger. - const idealFraction = scavengePercent / 100.0 - scavengeEWMA := float64(idealFraction) + // idealFraction is the ideal % of overall application CPU time that we + // spend scavenging. + idealFraction := float64(scavengePercent) / 100.0 + // Input: fraction of CPU time used. + // Setpoint: idealFraction. + // Output: ratio of critical time to sleep time (determines sleep time). + // + // The output of this controller is somewhat indirect to what we actually + // want to achieve: how much time to sleep for. The reason for this definition + // is to ensure that the controller's outputs have a direct relationship with + // its inputs (as opposed to an inverse relationship), making it somewhat + // easier to reason about for tuning purposes. + critSleepController := piController{ + // Tuned loosely via Ziegler-Nichols process. + kp: 0.3375, + ti: 3.2e6, + tt: 1e9, // 1 second reset time. + + // These ranges seem wide, but we want to give the controller plenty of + // room to hunt for the optimal value. + min: 0.001, // 1:1000 + max: 1000.0, // 1000:1 + } + // It doesn't really matter what value we start at, but we can't be zero, because + // that'll cause divide-by-zero issues. + critSleepRatio := 0.001 for { released := uintptr(0) - - // Time in scavenging critical section. crit := float64(0) - // Run on the system stack since we grab the heap lock, - // and a stack growth with the heap lock means a deadlock. - systemstack(func() { - lock(&mheap_.lock) - + // Spend at least 1 ms scavenging, otherwise the corresponding + // sleep time to maintain our desired utilization is too low to + // be reliable. + const minCritTime = 1e6 + for crit < minCritTime { // If background scavenging is disabled or if there's no work to do just park. - retained, goal := heapRetained(), mheap_.scavengeGoal + retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal) if retained <= goal { - unlock(&mheap_.lock) - return + break } - // Scavenge one page, and measure the amount of time spent scavenging. + // scavengeQuantum is the amount of memory we try to scavenge + // in one go. A smaller value means the scavenger is more responsive + // to the scheduler in case of e.g. preemption. A larger value means + // that the overheads of scavenging are better amortized, so better + // scavenging throughput. + // + // The current value is chosen assuming a cost of ~10µs/physical page + // (this is somewhat pessimistic), which implies a worst-case latency of + // about 160µs for 4 KiB physical pages. The current value is biased + // toward latency over throughput. + const scavengeQuantum = 64 << 10 + + // Accumulate the amount of time spent scavenging. start := nanotime() - released = mheap_.pages.scavenge(physPageSize, true) - mheap_.pages.scav.released += released - crit = float64(nanotime() - start) + r := mheap_.pages.scavenge(scavengeQuantum) + atomic.Xadduintptr(&mheap_.pages.scav.released, r) + end := nanotime() - unlock(&mheap_.lock) - }) + // On some platforms we may see end >= start if the time it takes to scavenge + // memory is less than the minimum granularity of its clock (e.g. Windows) or + // due to clock bugs. + // + // In this case, just assume scavenging takes 10 µs per regular physical page + // (determined empirically), and conservatively ignore the impact of huge pages + // on timing. + const approxCritNSPerPhysicalPage = 10e3 + if end <= start { + crit += approxCritNSPerPhysicalPage * float64(r/physPageSize) + } else { + crit += float64(end - start) + } + released += r + + // When using fake time just do one loop. + if faketime != 0 { + break + } + } if released == 0 { lock(&scavenge.lock) @@ -325,18 +368,13 @@ func bgscavenge() { throw("released less than one physical page of memory") } - // On some platforms we may see crit as zero if the time it takes to scavenge - // memory is less than the minimum granularity of its clock (e.g. Windows). - // In this case, just assume scavenging takes 10 µs per regular physical page - // (determined empirically), and conservatively ignore the impact of huge pages - // on timing. - // - // We shouldn't ever see a crit value less than zero unless there's a bug of - // some kind, either on our side or in the platform we're running on, but be - // defensive in that case as well. - const approxCritNSPerPhysicalPage = 10e3 - if crit <= 0 { - crit = approxCritNSPerPhysicalPage * float64(released/physPageSize) + if crit < minCritTime { + // This means there wasn't enough work to actually fill up minCritTime. + // That's fine; we shouldn't try to do anything with this information + // because it's going result in a short enough sleep request that things + // will get messy. Just assume we did at least this much work. + // All this means is that we'll sleep longer than we otherwise would have. + crit = minCritTime } // Multiply the critical time by 1 + the ratio of the costs of using @@ -347,41 +385,19 @@ func bgscavenge() { // because of the additional overheads of using scavenged memory. crit *= 1 + scavengeCostRatio - // If we spent more than 10 ms (for example, if the OS scheduled us away, or someone - // put their machine to sleep) in the critical section, bound the time we use to - // calculate at 10 ms to avoid letting the sleep time get arbitrarily high. - const maxCrit = 10e6 - if crit > maxCrit { - crit = maxCrit - } + // Go to sleep for our current sleepNS. + slept := scavengeSleep(int64(crit / critSleepRatio)) - // Compute the amount of time to sleep, assuming we want to use at most - // scavengePercent of CPU time. Take into account scheduling overheads - // that may extend the length of our sleep by multiplying by how far - // off we are from the ideal ratio. For example, if we're sleeping too - // much, then scavengeEMWA < idealFraction, so we'll adjust the sleep time - // down. - adjust := scavengeEWMA / idealFraction - sleepTime := int64(adjust * crit / (scavengePercent / 100.0)) - - // Go to sleep. - slept := scavengeSleep(sleepTime) - - // Compute the new ratio. - fraction := crit / (crit + float64(slept)) - - // Set a lower bound on the fraction. - // Due to OS-related anomalies we may "sleep" for an inordinate amount - // of time. Let's avoid letting the ratio get out of hand by bounding - // the sleep time we use in our EWMA. - const minFraction = 1.0 / 1000.0 - if fraction < minFraction { - fraction = minFraction - } + // Calculate the CPU time spent. + // + // This may be slightly inaccurate with respect to GOMAXPROCS, but we're + // recomputing this often enough relative to GOMAXPROCS changes in general + // (it only changes when the world is stopped, and not during a GC) that + // that small inaccuracy is in the noise. + cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs)) - // Update scavengeEWMA by merging in the new crit/slept ratio. - const alpha = 0.5 - scavengeEWMA = alpha*fraction + (1-alpha)*scavengeEWMA + // Update the critSleepRatio, adjusting until we reach our ideal fraction. + critSleepRatio = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit) } } @@ -391,16 +407,7 @@ func bgscavenge() { // back to the top of the heap. // // Returns the amount of memory scavenged in bytes. -// -// p.mheapLock must be held, but may be temporarily released if -// mayUnlock == true. -// -// Must run on the system stack because p.mheapLock must be held. -// -//go:systemstack -func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr { - assertLockHeld(p.mheapLock) - +func (p *pageAlloc) scavenge(nbytes uintptr) uintptr { var ( addrs addrRange gen uint32 @@ -412,9 +419,11 @@ func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr { break } } - r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock) - released += r - addrs = a + systemstack(func() { + r, a := p.scavengeOne(addrs, nbytes-released) + released += r + addrs = a + }) } // Only unreserve the space which hasn't been scavenged or searched // to ensure we always make progress. @@ -452,8 +461,9 @@ func printScavTrace(gen uint32, released uintptr, forced bool) { func (p *pageAlloc) scavengeStartGen() { assertLockHeld(p.mheapLock) + lock(&p.scav.lock) if debug.scavtrace > 0 { - printScavTrace(p.scav.gen, p.scav.released, false) + printScavTrace(p.scav.gen, atomic.Loaduintptr(&p.scav.released), false) } p.inUse.cloneInto(&p.scav.inUse) @@ -483,9 +493,10 @@ func (p *pageAlloc) scavengeStartGen() { // arena in size, so virtually every heap has the scavenger on. p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards p.scav.gen++ - p.scav.released = 0 + atomic.Storeuintptr(&p.scav.released, 0) p.scav.freeHWM = minOffAddr p.scav.scavLWM = maxOffAddr + unlock(&p.scav.lock) } // scavengeReserve reserves a contiguous range of the address space @@ -494,14 +505,9 @@ func (p *pageAlloc) scavengeStartGen() { // first. // // Returns the reserved range and the scavenge generation number for it. -// -// p.mheapLock must be held. -// -// Must run on the system stack because p.mheapLock must be held. -// -//go:systemstack func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { - assertLockHeld(p.mheapLock) + lock(&p.scav.lock) + gen := p.scav.gen // Start by reserving the minimum. r := p.scav.inUse.removeLast(p.scav.reservationBytes) @@ -509,7 +515,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { // Return early if the size is zero; we don't want to use // the bogus address below. if r.size() == 0 { - return r, p.scav.gen + unlock(&p.scav.lock) + return r, gen } // The scavenger requires that base be aligned to a @@ -520,28 +527,26 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { // Remove from inUse however much extra we just pulled out. p.scav.inUse.removeGreaterEqual(newBase) + unlock(&p.scav.lock) + r.base = offAddr{newBase} - return r, p.scav.gen + return r, gen } // scavengeUnreserve returns an unscavenged portion of a range that was // previously reserved with scavengeReserve. -// -// p.mheapLock must be held. -// -// Must run on the system stack because p.mheapLock must be held. -// -//go:systemstack func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { - assertLockHeld(p.mheapLock) - - if r.size() == 0 || gen != p.scav.gen { + if r.size() == 0 { return } if r.base.addr()%pallocChunkBytes != 0 { throw("unreserving unaligned region") } - p.scav.inUse.add(r) + lock(&p.scav.lock) + if gen == p.scav.gen { + p.scav.inUse.add(r) + } + unlock(&p.scav.lock) } // scavengeOne walks over address range work until it finds @@ -555,15 +560,10 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { // // work's base address must be aligned to pallocChunkBytes. // -// p.mheapLock must be held, but may be temporarily released if -// mayUnlock == true. -// -// Must run on the system stack because p.mheapLock must be held. +// Must run on the systemstack because it acquires p.mheapLock. // //go:systemstack -func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) { - assertLockHeld(p.mheapLock) - +func (p *pageAlloc) scavengeOne(work addrRange, max uintptr) (uintptr, addrRange) { // Defensively check if we've received an empty address range. // If so, just return. if work.size() == 0 { @@ -595,40 +595,12 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui minPages = 1 } - // Helpers for locking and unlocking only if mayUnlock == true. - lockHeap := func() { - if mayUnlock { - lock(p.mheapLock) - } - } - unlockHeap := func() { - if mayUnlock { - unlock(p.mheapLock) - } - } - - // Fast path: check the chunk containing the top-most address in work, - // starting at that address's page index in the chunk. - // - // Note that work.end() is exclusive, so get the chunk we care about - // by subtracting 1. - maxAddr := work.limit.addr() - 1 - maxChunk := chunkIndex(maxAddr) - if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) { - // We only bother looking for a candidate if there at least - // minPages free pages at all. - base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages) - - // If we found something, scavenge it and return! - if npages != 0 { - work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)} - - assertLockHeld(p.mheapLock) // Must be locked on return. - return uintptr(npages) * pageSize, work - } + // Fast path: check the chunk containing the top-most address in work. + if r, w := p.scavengeOneFast(work, minPages, maxPages); r != 0 { + return r, w + } else { + work = w } - // Update the limit to reflect the fact that we checked maxChunk already. - work.limit = offAddr{chunkBase(maxChunk)} // findCandidate finds the next scavenge candidate in work optimistically. // @@ -667,37 +639,61 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // looking for any free and unscavenged page. If we think we see something, // lock and verify it! for work.size() != 0 { - unlockHeap() // Search for the candidate. candidateChunkIdx, ok := findCandidate(work) - - // Lock the heap. We need to do this now if we found a candidate or not. - // If we did, we'll verify it. If not, we need to lock before returning - // anyway. - lockHeap() - if !ok { // We didn't find a candidate, so we're done. work.limit = work.base break } + // Lock, so we can verify what we found. + lock(p.mheapLock) + // Find, verify, and scavenge if we can. chunk := p.chunkOf(candidateChunkIdx) base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages) if npages > 0 { work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)} - - assertLockHeld(p.mheapLock) // Must be locked on return. + unlock(p.mheapLock) return uintptr(npages) * pageSize, work } + unlock(p.mheapLock) // We were fooled, so let's continue from where we left off. work.limit = offAddr{chunkBase(candidateChunkIdx)} } + return 0, work +} - assertLockHeld(p.mheapLock) // Must be locked on return. +// scavengeOneFast is the fast path for scavengeOne, which just checks the top +// chunk of work for some pages to scavenge. +// +// Must run on the system stack because it acquires the heap lock. +// +//go:systemstack +func (p *pageAlloc) scavengeOneFast(work addrRange, minPages, maxPages uintptr) (uintptr, addrRange) { + maxAddr := work.limit.addr() - 1 + maxChunk := chunkIndex(maxAddr) + + lock(p.mheapLock) + if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) { + // We only bother looking for a candidate if there at least + // minPages free pages at all. + base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages) + + // If we found something, scavenge it and return! + if npages != 0 { + work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)} + unlock(p.mheapLock) + return uintptr(npages) * pageSize, work + } + } + unlock(p.mheapLock) + + // Update the limit to reflect the fact that we checked maxChunk already. + work.limit = offAddr{chunkBase(maxChunk)} return 0, work } @@ -708,38 +704,57 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // // Returns the base address of the scavenged region. // -// p.mheapLock must be held. +// p.mheapLock must be held. Unlocks p.mheapLock but reacquires +// it before returning. Must be run on the systemstack as a result. +// +//go:systemstack func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr { assertLockHeld(p.mheapLock) - p.chunkOf(ci).scavenged.setRange(base, npages) - // Compute the full address for the start of the range. addr := chunkBase(ci) + uintptr(base)*pageSize + // Mark the range we're about to scavenge as allocated, because + // we don't want any allocating goroutines to grab it while + // the scavenging is in progress. + if scav := p.allocRange(addr, uintptr(npages)); scav != 0 { + throw("double scavenge") + } + + // With that done, it's safe to unlock. + unlock(p.mheapLock) + // Update the scavenge low watermark. + lock(&p.scav.lock) if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) { p.scav.scavLWM = oAddr } + unlock(&p.scav.lock) - // Only perform the actual scavenging if we're not in a test. - // It's dangerous to do so otherwise. - if p.test { - return addr - } - sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) + if !p.test { + // Only perform the actual scavenging if we're not in a test. + // It's dangerous to do so otherwise. + sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) - // Update global accounting only when not in test, otherwise - // the runtime's accounting will be wrong. - nbytes := int64(npages) * pageSize - atomic.Xadd64(&memstats.heap_released, nbytes) + // Update global accounting only when not in test, otherwise + // the runtime's accounting will be wrong. + nbytes := int64(npages) * pageSize + atomic.Xadd64(&memstats.heap_released, nbytes) - // Update consistent accounting too. - stats := memstats.heapStats.acquire() - atomic.Xaddint64(&stats.committed, -nbytes) - atomic.Xaddint64(&stats.released, nbytes) - memstats.heapStats.release() + // Update consistent accounting too. + stats := memstats.heapStats.acquire() + atomic.Xaddint64(&stats.committed, -nbytes) + atomic.Xaddint64(&stats.released, nbytes) + memstats.heapStats.release() + } + + // Relock the heap, because now we need to make these pages + // available allocation. Free them back to the page allocator. + lock(p.mheapLock) + p.free(addr, uintptr(npages), true) + // Mark the range as scavenged. + p.chunkOf(ci).scavenged.setRange(base, npages) return addr } |