diff options
Diffstat (limited to 'libgo/go/runtime/mgc.go')
-rw-r--r-- | libgo/go/runtime/mgc.go | 492 |
1 files changed, 256 insertions, 236 deletions
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go index 72479c2..a9f2c1a 100644 --- a/libgo/go/runtime/mgc.go +++ b/libgo/go/runtime/mgc.go @@ -292,10 +292,14 @@ func setGCPhase(x uint32) { type gcMarkWorkerMode int const ( + // gcMarkWorkerNotWorker indicates that the next scheduled G is not + // starting work and the mode should be ignored. + gcMarkWorkerNotWorker gcMarkWorkerMode = iota + // gcMarkWorkerDedicatedMode indicates that the P of a mark // worker is dedicated to running that mark worker. The mark // worker should run without preemption. - gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota + gcMarkWorkerDedicatedMode // gcMarkWorkerFractionalMode indicates that a P is currently // running the "fractional" mark worker. The fractional worker @@ -315,6 +319,7 @@ const ( // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes // to use in execution traces. var gcMarkWorkerModeStrings = [...]string{ + "Not worker", "GC (dedicated)", "GC (fractional)", "GC (idle)", @@ -390,10 +395,24 @@ type gcControllerState struct { // bytes that should be performed by mutator assists. This is // computed at the beginning of each cycle and updated every // time heap_scan is updated. - assistWorkPerByte float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + assistWorkPerByte uint64 // assistBytesPerWork is 1/assistWorkPerByte. - assistBytesPerWork float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + // + // Note that because this is read and written independently + // from assistWorkPerByte users may notice a skew between + // the two values, and such a state should be safe. + assistBytesPerWork uint64 // fractionalUtilizationGoal is the fraction of wall clock // time that should be spent in the fractional mark worker on @@ -411,7 +430,8 @@ type gcControllerState struct { } // startCycle resets the GC controller's state and computes estimates -// for a new GC cycle. The caller must hold worldsema. +// for a new GC cycle. The caller must hold worldsema and the world +// must be stopped. func (c *gcControllerState) startCycle() { c.scanWork = 0 c.bgScanCredit = 0 @@ -471,7 +491,8 @@ func (c *gcControllerState) startCycle() { c.revise() if debug.gcpacertrace > 0 { - print("pacer: assist ratio=", c.assistWorkPerByte, + assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte)) + print("pacer: assist ratio=", assistRatio, " (scan ", memstats.heap_scan>>20, " MB in ", work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", @@ -481,9 +502,22 @@ func (c *gcControllerState) startCycle() { } // revise updates the assist ratio during the GC cycle to account for -// improved estimates. This should be called either under STW or -// whenever memstats.heap_scan, memstats.heap_live, or -// memstats.next_gc is updated (with mheap_.lock held). +// improved estimates. This should be called whenever memstats.heap_scan, +// memstats.heap_live, or memstats.next_gc is updated. It is safe to +// call concurrently, but it may race with other calls to revise. +// +// The result of this race is that the two assist ratio values may not line +// up or may be stale. In practice this is OK because the assist ratio +// moves slowly throughout a GC cycle, and the assist ratio is a best-effort +// heuristic anyway. Furthermore, no part of the heuristic depends on +// the two assist ratio values being exact reciprocals of one another, since +// the two values are used to convert values from different sources. +// +// The worst case result of this raciness is that we may miss a larger shift +// in the ratio (say, if we decide to pace more aggressively against the +// hard heap goal) but even this "hard goal" is best-effort (see #40460). +// The dedicated GC should ensure we don't exceed the hard goal by too much +// in the rare case we do exceed it. // // It should only be called when gcBlackenEnabled != 0 (because this // is when assists are enabled and the necessary statistics are @@ -496,10 +530,12 @@ func (c *gcControllerState) revise() { gcpercent = 100000 } live := atomic.Load64(&memstats.heap_live) + scan := atomic.Load64(&memstats.heap_scan) + work := atomic.Loadint64(&c.scanWork) // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. - heapGoal := int64(memstats.next_gc) + heapGoal := int64(atomic.Load64(&memstats.next_gc)) // Compute the expected scan work remaining. // @@ -510,17 +546,17 @@ func (c *gcControllerState) revise() { // // (This is a float calculation to avoid overflowing on // 100*heap_scan.) - scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) + scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) - if live > memstats.next_gc || c.scanWork > scanWorkExpected { + if int64(live) > heapGoal || work > scanWorkExpected { // We're past the soft goal, or we've already done more scan // work than we expected. Pace GC so that in the worst case it // will complete by the hard goal. const maxOvershoot = 1.1 - heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) + heapGoal = int64(float64(heapGoal) * maxOvershoot) // Compute the upper bound on the scan work remaining. - scanWorkExpected = int64(memstats.heap_scan) + scanWorkExpected = int64(scan) } // Compute the remaining scan work estimate. @@ -530,7 +566,7 @@ func (c *gcControllerState) revise() { // (scanWork), so allocation will change this difference // slowly in the soft regime and not at all in the hard // regime. - scanWorkRemaining := scanWorkExpected - c.scanWork + scanWorkRemaining := scanWorkExpected - work if scanWorkRemaining < 1000 { // We set a somewhat arbitrary lower bound on // remaining scan work since if we aim a little high, @@ -554,8 +590,15 @@ func (c *gcControllerState) revise() { // Compute the mutator assist ratio so by the time the mutator // allocates the remaining heap bytes up to next_gc, it will // have done (or stolen) the remaining amount of scan work. - c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) - c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) + // Note that the assist ratio values are updated atomically + // but not together. This means there may be some degree of + // skew between the two values. This is generally OK as the + // values shift relatively slowly over the course of a GC + // cycle. + assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining) + assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining) + atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte)) + atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork)) } // endCycle computes the trigger ratio for the next cycle. @@ -672,18 +715,12 @@ func (c *gcControllerState) enlistWorker() { } } -// findRunnableGCWorker returns the background mark worker for _p_ if it +// findRunnableGCWorker returns a background mark worker for _p_ if it // should be run. This must only be called when gcBlackenEnabled != 0. func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { if gcBlackenEnabled == 0 { throw("gcControllerState.findRunnable: blackening not enabled") } - if _p_.gcBgMarkWorker == 0 { - // The mark worker associated with this P is blocked - // performing a mark transition. We can't run it - // because it may be on some other run or wait queue. - return nil - } if !gcMarkWorkAvailable(_p_) { // No work to be done right now. This can happen at @@ -693,15 +730,35 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { return nil } + // Grab a worker before we commit to running below. + node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) + if node == nil { + // There is at least one worker per P, so normally there are + // enough workers to run on all Ps, if necessary. However, once + // a worker enters gcMarkDone it may park without rejoining the + // pool, thus freeing a P with no corresponding worker. + // gcMarkDone never depends on another worker doing work, so it + // is safe to simply do nothing here. + // + // If gcMarkDone bails out without completing the mark phase, + // it will always do so with queued global work. Thus, that P + // will be immediately eligible to re-run the worker G it was + // just using, ensuring work can complete. + return nil + } + decIfPositive := func(ptr *int64) bool { - if *ptr > 0 { - if atomic.Xaddint64(ptr, -1) >= 0 { + for { + v := atomic.Loadint64(ptr) + if v <= 0 { + return false + } + + // TODO: having atomic.Casint64 would be more pleasant. + if atomic.Cas64((*uint64)(unsafe.Pointer(ptr)), uint64(v), uint64(v-1)) { return true } - // We lost a race - atomic.Xaddint64(ptr, +1) } - return false } if decIfPositive(&c.dedicatedMarkWorkersNeeded) { @@ -710,6 +767,7 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode } else if c.fractionalUtilizationGoal == 0 { // No need for fractional workers. + gcBgMarkWorkerPool.push(&node.node) return nil } else { // Is this P behind on the fractional utilization @@ -719,14 +777,15 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { delta := nanotime() - gcController.markStartTime if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { // Nope. No need to run a fractional worker. + gcBgMarkWorkerPool.push(&node.node) return nil } // Run a fractional worker. _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode } - // Run the background mark worker - gp := _p_.gcBgMarkWorker.ptr() + // Run the background mark worker. + gp := node.gp.ptr() casgstatus(gp, _Gwaiting, _Grunnable) if trace.enabled { traceGoUnpark(gp, 0) @@ -764,6 +823,8 @@ func pollFractionalWorkerExit() bool { // // mheap_.lock must be held or the world must be stopped. func gcSetTriggerRatio(triggerRatio float64) { + assertWorldStoppedOrLockHeld(&mheap_.lock) + // Compute the next GC goal, which is when the allocated heap // has grown by GOGC/100 over the heap marked by the last // cycle. @@ -846,7 +907,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // Commit to the trigger and goal. memstats.gc_trigger = trigger - memstats.next_gc = goal + atomic.Store64(&memstats.next_gc, goal) if trace.enabled { traceNextGC() } @@ -903,7 +964,9 @@ func gcSetTriggerRatio(triggerRatio float64) { // // mheap_.lock must be held or the world must be stopped. func gcEffectiveGrowthRatio() float64 { - egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked) + assertWorldStoppedOrLockHeld(&mheap_.lock) + + egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked) if egogc < 0 { // Shouldn't happen, but just in case. egogc = 0 @@ -985,7 +1048,6 @@ var work struct { nproc uint32 tstart int64 nwait uint32 - ndone uint32 // Number of roots of various root types. Set by gcMarkRootPrepare. nFlushCacheRoots int @@ -1383,6 +1445,7 @@ func gcStart(trigger gcTrigger) { now = startTheWorldWithSema(trace.enabled) work.pauseNS += now - work.pauseStart work.tMark = now + memstats.gcPauseDist.record(now - work.pauseStart) }) // Release the world sema before Gosched() in STW mode @@ -1409,19 +1472,6 @@ func gcStart(trigger gcTrigger) { // This is protected by markDoneSema. var gcMarkDoneFlushed uint32 -// debugCachedWork enables extra checks for debugging premature mark -// termination. -// -// For debugging issue #27993. -const debugCachedWork = false - -// gcWorkPauseGen is for debugging the mark completion algorithm. -// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. -// Only used if debugCachedWork is true. -// -// For debugging issue #27993. -var gcWorkPauseGen uint32 = 1 - // gcMarkDone transitions the GC from mark to mark termination if all // reachable objects have been marked (that is, there are no grey // objects and can be no more in the future). Otherwise, it flushes @@ -1477,15 +1527,7 @@ top: // Flush the write barrier buffer, since this may add // work to the gcWork. wbBufFlush1(_p_) - // For debugging, shrink the write barrier - // buffer so it flushes immediately. - // wbBuf.reset will keep it at this size as - // long as throwOnGCWork is set. - if debugCachedWork { - b := &_p_.wbBuf - b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) - b.debugGen = gcWorkPauseGen - } + // Flush the gcWork, since this may create global work // and set the flushedWork flag. // @@ -1496,29 +1538,12 @@ top: if _p_.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) _p_.gcw.flushedWork = false - } else if debugCachedWork { - // For debugging, freeze the gcWork - // until we know whether we've reached - // completion or not. If we think - // we've reached completion, but - // there's a paused gcWork, then - // that's a bug. - _p_.gcw.pauseGen = gcWorkPauseGen - // Capture the G's stack. - for i := range _p_.gcw.pauseStack { - _p_.gcw.pauseStack[i].pc = 0 - } - callers(1, _p_.gcw.pauseStack[:]) } }) casgstatus(gp, _Gwaiting, _Grunning) }) if gcMarkDoneFlushed != 0 { - if debugCachedWork { - // Release paused gcWorks. - atomic.Xadd(&gcWorkPauseGen, 1) - } // More grey objects were discovered since the // previous termination check, so there may be more // work to do. Keep going. It's possible the @@ -1528,13 +1553,6 @@ top: goto top } - if debugCachedWork { - throwOnGCWork = true - // Release paused gcWorks. If there are any, they - // should now observe throwOnGCWork and panic. - atomic.Xadd(&gcWorkPauseGen, 1) - } - // There was no global work, no local work, and no Ps // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be @@ -1551,59 +1569,34 @@ top: // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. - if debugCachedWork { - // For debugging, double check that no work was added after we - // went around above and disable write barrier buffering. + // There is sometimes work left over when we enter mark termination due + // to write barriers performed after the completion barrier above. + // Detect this and resume concurrent mark. This is obviously + // unfortunate. + // + // See issue #27993 for details. + // + // Switch to the system stack to call wbBufFlush1, though in this case + // it doesn't matter because we're non-preemptible anyway. + restart := false + systemstack(func() { for _, p := range allp { - gcw := &p.gcw - if !gcw.empty() { - printlock() - print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) - if gcw.wbuf1 == nil { - print(" wbuf1=<nil>") - } else { - print(" wbuf1.n=", gcw.wbuf1.nobj) - } - if gcw.wbuf2 == nil { - print(" wbuf2=<nil>") - } else { - print(" wbuf2.n=", gcw.wbuf2.nobj) - } - print("\n") - if gcw.pauseGen == gcw.putGen { - println("runtime: checkPut already failed at this generation") - } - throw("throwOnGCWork") + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break } } - } else { - // For unknown reasons (see issue #27993), there is - // sometimes work left over when we enter mark - // termination. Detect this and resume concurrent - // mark. This is obviously unfortunate. - // - // Switch to the system stack to call wbBufFlush1, - // though in this case it doesn't matter because we're - // non-preemptible anyway. - restart := false + }) + if restart { + getg().m.preemptoff = "" systemstack(func() { - for _, p := range allp { - wbBufFlush1(p) - if !p.gcw.empty() { - restart = true - break - } - } + now := startTheWorldWithSema(true) + work.pauseNS += now - work.pauseStart + memstats.gcPauseDist.record(now - work.pauseStart) }) - if restart { - getg().m.preemptoff = "" - systemstack(func() { - now := startTheWorldWithSema(true) - work.pauseNS += now - work.pauseStart - }) - semrelease(&worldsema) - goto top - } + semrelease(&worldsema) + goto top } // Disable assists and background workers. We must do @@ -1632,10 +1625,10 @@ top: gcMarkTermination(nextTriggerRatio) } +// World must be stopped and mark assists and background workers must be +// disabled. func gcMarkTermination(nextTriggerRatio float64) { - // World is stopped. - // Start marktermination which includes enabling the write barrier. - atomic.Store(&gcBlackenEnabled, 0) + // Start marktermination (write barrier remains enabled for now). setGCPhase(_GCmarktermination) work.heap1 = memstats.heap_live @@ -1672,13 +1665,13 @@ func gcMarkTermination(nextTriggerRatio float64) { // mark using checkmark bits, to check that we // didn't forget to mark anything during the // concurrent mark process. + startCheckmarks() gcResetMarkState() - initCheckmarks() gcw := &getg().m.p.ptr().gcw gcDrain(gcw, 0) wbBufFlush1(getg().m.p.ptr()) gcw.dispose() - clearCheckmarks() + endCheckmarks() } // marking is complete so we can turn the write barrier off @@ -1713,6 +1706,7 @@ func gcMarkTermination(nextTriggerRatio float64) { unixNow := sec*1e9 + int64(nsec) work.pauseNS += now - work.pauseStart work.tEnd = now + memstats.gcPauseDist.record(now - work.pauseStart) atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) @@ -1826,20 +1820,26 @@ func gcMarkTermination(nextTriggerRatio float64) { } } -// gcBgMarkStartWorkers prepares background mark worker goroutines. -// These goroutines will not run until the mark phase, but they must -// be started while the work is not stopped and from a regular G -// stack. The caller must hold worldsema. +// gcBgMarkStartWorkers prepares background mark worker goroutines. These +// goroutines will not run until the mark phase, but they must be started while +// the work is not stopped and from a regular G stack. The caller must hold +// worldsema. func gcBgMarkStartWorkers() { - // Background marking is performed by per-P G's. Ensure that - // each P has a background GC G. - for _, p := range allp { - if p.gcBgMarkWorker == 0 { - expectSystemGoroutine() - go gcBgMarkWorker(p) - notetsleepg(&work.bgMarkReady, -1) - noteclear(&work.bgMarkReady) - } + // Background marking is performed by per-P G's. Ensure that each P has + // a background GC G. + // + // Worker Gs don't exit if gomaxprocs is reduced. If it is raised + // again, we can reuse the old workers; no need to create new workers. + for gcBgMarkWorkerCount < gomaxprocs { + expectSystemGoroutine() + go gcBgMarkWorker() + + notetsleepg(&work.bgMarkReady, -1) + noteclear(&work.bgMarkReady) + // The worker is now guaranteed to be added to the pool before + // its P's next findRunnableGCWorker. + + gcBgMarkWorkerCount++ } } @@ -1859,84 +1859,106 @@ func gcBgMarkPrepare() { work.nwait = ^uint32(0) } -func gcBgMarkWorker(_p_ *p) { +// gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single +// gcBgMarkWorker goroutine. +type gcBgMarkWorkerNode struct { + // Unused workers are managed in a lock-free stack. This field must be first. + node lfnode + + // The g of this worker. + gp guintptr + + // Release this m on park. This is used to communicate with the unlock + // function, which cannot access the G's stack. It is unused outside of + // gcBgMarkWorker(). + m muintptr +} + +func gcBgMarkWorker() { setSystemGoroutine() gp := getg() - type parkInfo struct { - m muintptr // Release this m on park. - attach puintptr // If non-nil, attach to this p on park. - } - // We pass park to a gopark unlock function, so it can't be on + // We pass node to a gopark unlock function, so it can't be on // the stack (see gopark). Prevent deadlock from recursively // starting GC by disabling preemption. gp.m.preemptoff = "GC worker init" - park := new(parkInfo) + node := new(gcBgMarkWorkerNode) gp.m.preemptoff = "" - park.m.set(acquirem()) - park.attach.set(_p_) - // Inform gcBgMarkStartWorkers that this worker is ready. - // After this point, the background mark worker is scheduled - // cooperatively by gcController.findRunnable. Hence, it must - // never be preempted, as this would put it into _Grunnable - // and put it on a run queue. Instead, when the preempt flag - // is set, this puts itself into _Gwaiting to be woken up by - // gcController.findRunnable at the appropriate time. + node.gp.set(gp) + + node.m.set(acquirem()) notewakeup(&work.bgMarkReady) + // After this point, the background mark worker is generally scheduled + // cooperatively by gcController.findRunnableGCWorker. While performing + // work on the P, preemption is disabled because we are working on + // P-local work buffers. When the preempt flag is set, this puts itself + // into _Gwaiting to be woken up by gcController.findRunnableGCWorker + // at the appropriate time. + // + // When preemption is enabled (e.g., while in gcMarkDone), this worker + // may be preempted and schedule as a _Grunnable G from a runq. That is + // fine; it will eventually gopark again for further scheduling via + // findRunnableGCWorker. + // + // Since we disable preemption before notifying bgMarkReady, we + // guarantee that this G will be in the worker pool for the next + // findRunnableGCWorker. This isn't strictly necessary, but it reduces + // latency between _GCmark starting and the workers starting. for { - // Go to sleep until woken by gcController.findRunnable. - // We can't releasem yet since even the call to gopark - // may be preempted. - gopark(func(g *g, parkp unsafe.Pointer) bool { - park := (*parkInfo)(parkp) - - // The worker G is no longer running, so it's - // now safe to allow preemption. - releasem(park.m.ptr()) - - // If the worker isn't attached to its P, - // attach now. During initialization and after - // a phase change, the worker may have been - // running on a different P. As soon as we - // attach, the owner P may schedule the - // worker, so this must be done after the G is - // stopped. - if park.attach != 0 { - p := park.attach.ptr() - park.attach.set(nil) - // cas the worker because we may be - // racing with a new worker starting - // on this P. - if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) { - // The P got a new worker. - // Exit this worker. - return false - } + // Go to sleep until woken by + // gcController.findRunnableGCWorker. + gopark(func(g *g, nodep unsafe.Pointer) bool { + node := (*gcBgMarkWorkerNode)(nodep) + + if mp := node.m.ptr(); mp != nil { + // The worker G is no longer running; release + // the M. + // + // N.B. it is _safe_ to release the M as soon + // as we are no longer performing P-local mark + // work. + // + // However, since we cooperatively stop work + // when gp.preempt is set, if we releasem in + // the loop then the following call to gopark + // would immediately preempt the G. This is + // also safe, but inefficient: the G must + // schedule again only to enter gopark and park + // again. Thus, we defer the release until + // after parking the G. + releasem(mp) } + + // Release this G to the pool. + gcBgMarkWorkerPool.push(&node.node) + // Note that at this point, the G may immediately be + // rescheduled and may be running. return true - }, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0) + }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0) - // Loop until the P dies and disassociates this - // worker (the P may later be reused, in which case - // it will get a new worker) or we failed to associate. - if _p_.gcBgMarkWorker.ptr() != gp { - break - } + // Preemption must not occur here, or another G might see + // p.gcMarkWorkerMode. // Disable preemption so we can use the gcw. If the // scheduler wants to preempt us, we'll stop draining, // dispose the gcw, and then preempt. - park.m.set(acquirem()) + node.m.set(acquirem()) + pp := gp.m.p.ptr() // P can't change with preemption disabled. if gcBlackenEnabled == 0 { + println("worker mode", pp.gcMarkWorkerMode) throw("gcBgMarkWorker: blackening not enabled") } + if pp.gcMarkWorkerMode == gcMarkWorkerNotWorker { + throw("gcBgMarkWorker: mode not set") + } + startTime := nanotime() - _p_.gcMarkWorkerStartTime = startTime + pp.gcMarkWorkerStartTime = startTime decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { @@ -1953,11 +1975,11 @@ func gcBgMarkWorker(_p_ *p) { // disabled for mark workers, so it is safe to // read from the G stack. casgstatus(gp, _Grunning, _Gwaiting) - switch _p_.gcMarkWorkerMode { + switch pp.gcMarkWorkerMode { default: throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") case gcMarkWorkerDedicatedMode: - gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) if gp.preempt { // We were preempted. This is // a useful signal to kick @@ -1966,7 +1988,7 @@ func gcBgMarkWorker(_p_ *p) { // somewhere else. lock(&sched.lock) for { - gp, _ := runqget(_p_) + gp, _ := runqget(pp) if gp == nil { break } @@ -1976,24 +1998,24 @@ func gcBgMarkWorker(_p_ *p) { } // Go back to draining, this time // without preemption. - gcDrain(&_p_.gcw, gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainFlushBgCredit) case gcMarkWorkerFractionalMode: - gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) case gcMarkWorkerIdleMode: - gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) } casgstatus(gp, _Gwaiting, _Grunning) }) // Account for time. duration := nanotime() - startTime - switch _p_.gcMarkWorkerMode { + switch pp.gcMarkWorkerMode { case gcMarkWorkerDedicatedMode: atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) case gcMarkWorkerFractionalMode: atomic.Xaddint64(&gcController.fractionalMarkTime, duration) - atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) + atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) case gcMarkWorkerIdleMode: atomic.Xaddint64(&gcController.idleMarkTime, duration) } @@ -2002,31 +2024,27 @@ func gcBgMarkWorker(_p_ *p) { // of work? incnwait := atomic.Xadd(&work.nwait, +1) if incnwait > work.nproc { - println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode, + println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode, "work.nwait=", incnwait, "work.nproc=", work.nproc) throw("work.nwait > work.nproc") } + // We'll releasem after this point and thus this P may run + // something else. We must clear the worker mode to avoid + // attributing the mode to a different (non-worker) G in + // traceGoStart. + pp.gcMarkWorkerMode = gcMarkWorkerNotWorker + // If this worker reached a background mark completion // point, signal the main GC goroutine. if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { - // Make this G preemptible and disassociate it - // as the worker for this P so - // findRunnableGCWorker doesn't try to - // schedule it. - _p_.gcBgMarkWorker.set(nil) - releasem(park.m.ptr()) + // We don't need the P-local buffers here, allow + // preemption becuse we may schedule like a regular + // goroutine in gcMarkDone (block on locks, etc). + releasem(node.m.ptr()) + node.m.set(nil) gcMarkDone() - - // Disable preemption and prepare to reattach - // to the P. - // - // We may be running on a different P at this - // point, so we can't reattach until this G is - // parked. - park.m.set(acquirem()) - park.attach.set(_p_) } } } @@ -2087,7 +2105,7 @@ func gcMark(start_time int64) { // ensured all reachable objects were marked, all of // these must be pointers to black objects. Hence we // can just discard the write barrier buffer. - if debug.gccheckmark > 0 || throwOnGCWork { + if debug.gccheckmark > 0 { // For debugging, flush the buffer and make // sure it really was all marked. wbBufFlush1(p) @@ -2119,13 +2137,21 @@ func gcMark(start_time int64) { gcw.dispose() } - throwOnGCWork = false - - cachestats() - // Update the marked heap stat. memstats.heap_marked = work.bytesMarked + // Flush scanAlloc from each mcache since we're about to modify + // heap_scan directly. If we were to flush this later, then scanAlloc + // might have incorrect information. + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + memstats.heap_scan += uint64(c.scanAlloc) + c.scanAlloc = 0 + } + // Update other GC heap size stats. This must happen after // cachestats (which flushes local statistics to these) and // flushallmcaches (which modifies heap_live). @@ -2144,6 +2170,8 @@ func gcMark(start_time int64) { // //go:systemstack func gcSweep(mode gcMode) { + assertWorldStopped() + if gcphase != _GCoff { throw("gcSweep being done but phase is not GCoff") } @@ -2151,21 +2179,13 @@ func gcSweep(mode gcMode) { lock(&mheap_.lock) mheap_.sweepgen += 2 mheap_.sweepdone = 0 - if !go115NewMCentralImpl && mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 { - // We should have drained this list during the last - // sweep phase. We certainly need to start this phase - // with an empty swept list. - throw("non-empty swept list") - } mheap_.pagesSwept = 0 mheap_.sweepArenas = mheap_.allArenas mheap_.reclaimIndex = 0 mheap_.reclaimCredit = 0 unlock(&mheap_.lock) - if go115NewMCentralImpl { - sweep.centralIndex.clear() - } + sweep.centralIndex.clear() if !_ConcurrentSweep || mode == gcForceBlockMode { // Special case synchronous sweep. |