aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/runtime/mgc.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/runtime/mgc.go')
-rw-r--r--libgo/go/runtime/mgc.go492
1 files changed, 256 insertions, 236 deletions
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index 72479c2..a9f2c1a 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -292,10 +292,14 @@ func setGCPhase(x uint32) {
type gcMarkWorkerMode int
const (
+ // gcMarkWorkerNotWorker indicates that the next scheduled G is not
+ // starting work and the mode should be ignored.
+ gcMarkWorkerNotWorker gcMarkWorkerMode = iota
+
// gcMarkWorkerDedicatedMode indicates that the P of a mark
// worker is dedicated to running that mark worker. The mark
// worker should run without preemption.
- gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota
+ gcMarkWorkerDedicatedMode
// gcMarkWorkerFractionalMode indicates that a P is currently
// running the "fractional" mark worker. The fractional worker
@@ -315,6 +319,7 @@ const (
// gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes
// to use in execution traces.
var gcMarkWorkerModeStrings = [...]string{
+ "Not worker",
"GC (dedicated)",
"GC (fractional)",
"GC (idle)",
@@ -390,10 +395,24 @@ type gcControllerState struct {
// bytes that should be performed by mutator assists. This is
// computed at the beginning of each cycle and updated every
// time heap_scan is updated.
- assistWorkPerByte float64
+ //
+ // Stored as a uint64, but it's actually a float64. Use
+ // float64frombits to get the value.
+ //
+ // Read and written atomically.
+ assistWorkPerByte uint64
// assistBytesPerWork is 1/assistWorkPerByte.
- assistBytesPerWork float64
+ //
+ // Stored as a uint64, but it's actually a float64. Use
+ // float64frombits to get the value.
+ //
+ // Read and written atomically.
+ //
+ // Note that because this is read and written independently
+ // from assistWorkPerByte users may notice a skew between
+ // the two values, and such a state should be safe.
+ assistBytesPerWork uint64
// fractionalUtilizationGoal is the fraction of wall clock
// time that should be spent in the fractional mark worker on
@@ -411,7 +430,8 @@ type gcControllerState struct {
}
// startCycle resets the GC controller's state and computes estimates
-// for a new GC cycle. The caller must hold worldsema.
+// for a new GC cycle. The caller must hold worldsema and the world
+// must be stopped.
func (c *gcControllerState) startCycle() {
c.scanWork = 0
c.bgScanCredit = 0
@@ -471,7 +491,8 @@ func (c *gcControllerState) startCycle() {
c.revise()
if debug.gcpacertrace > 0 {
- print("pacer: assist ratio=", c.assistWorkPerByte,
+ assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte))
+ print("pacer: assist ratio=", assistRatio,
" (scan ", memstats.heap_scan>>20, " MB in ",
work.initialHeapLive>>20, "->",
memstats.next_gc>>20, " MB)",
@@ -481,9 +502,22 @@ func (c *gcControllerState) startCycle() {
}
// revise updates the assist ratio during the GC cycle to account for
-// improved estimates. This should be called either under STW or
-// whenever memstats.heap_scan, memstats.heap_live, or
-// memstats.next_gc is updated (with mheap_.lock held).
+// improved estimates. This should be called whenever memstats.heap_scan,
+// memstats.heap_live, or memstats.next_gc is updated. It is safe to
+// call concurrently, but it may race with other calls to revise.
+//
+// The result of this race is that the two assist ratio values may not line
+// up or may be stale. In practice this is OK because the assist ratio
+// moves slowly throughout a GC cycle, and the assist ratio is a best-effort
+// heuristic anyway. Furthermore, no part of the heuristic depends on
+// the two assist ratio values being exact reciprocals of one another, since
+// the two values are used to convert values from different sources.
+//
+// The worst case result of this raciness is that we may miss a larger shift
+// in the ratio (say, if we decide to pace more aggressively against the
+// hard heap goal) but even this "hard goal" is best-effort (see #40460).
+// The dedicated GC should ensure we don't exceed the hard goal by too much
+// in the rare case we do exceed it.
//
// It should only be called when gcBlackenEnabled != 0 (because this
// is when assists are enabled and the necessary statistics are
@@ -496,10 +530,12 @@ func (c *gcControllerState) revise() {
gcpercent = 100000
}
live := atomic.Load64(&memstats.heap_live)
+ scan := atomic.Load64(&memstats.heap_scan)
+ work := atomic.Loadint64(&c.scanWork)
// Assume we're under the soft goal. Pace GC to complete at
// next_gc assuming the heap is in steady-state.
- heapGoal := int64(memstats.next_gc)
+ heapGoal := int64(atomic.Load64(&memstats.next_gc))
// Compute the expected scan work remaining.
//
@@ -510,17 +546,17 @@ func (c *gcControllerState) revise() {
//
// (This is a float calculation to avoid overflowing on
// 100*heap_scan.)
- scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
+ scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent))
- if live > memstats.next_gc || c.scanWork > scanWorkExpected {
+ if int64(live) > heapGoal || work > scanWorkExpected {
// We're past the soft goal, or we've already done more scan
// work than we expected. Pace GC so that in the worst case it
// will complete by the hard goal.
const maxOvershoot = 1.1
- heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
+ heapGoal = int64(float64(heapGoal) * maxOvershoot)
// Compute the upper bound on the scan work remaining.
- scanWorkExpected = int64(memstats.heap_scan)
+ scanWorkExpected = int64(scan)
}
// Compute the remaining scan work estimate.
@@ -530,7 +566,7 @@ func (c *gcControllerState) revise() {
// (scanWork), so allocation will change this difference
// slowly in the soft regime and not at all in the hard
// regime.
- scanWorkRemaining := scanWorkExpected - c.scanWork
+ scanWorkRemaining := scanWorkExpected - work
if scanWorkRemaining < 1000 {
// We set a somewhat arbitrary lower bound on
// remaining scan work since if we aim a little high,
@@ -554,8 +590,15 @@ func (c *gcControllerState) revise() {
// Compute the mutator assist ratio so by the time the mutator
// allocates the remaining heap bytes up to next_gc, it will
// have done (or stolen) the remaining amount of scan work.
- c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining)
- c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining)
+ // Note that the assist ratio values are updated atomically
+ // but not together. This means there may be some degree of
+ // skew between the two values. This is generally OK as the
+ // values shift relatively slowly over the course of a GC
+ // cycle.
+ assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining)
+ assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining)
+ atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte))
+ atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork))
}
// endCycle computes the trigger ratio for the next cycle.
@@ -672,18 +715,12 @@ func (c *gcControllerState) enlistWorker() {
}
}
-// findRunnableGCWorker returns the background mark worker for _p_ if it
+// findRunnableGCWorker returns a background mark worker for _p_ if it
// should be run. This must only be called when gcBlackenEnabled != 0.
func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
if gcBlackenEnabled == 0 {
throw("gcControllerState.findRunnable: blackening not enabled")
}
- if _p_.gcBgMarkWorker == 0 {
- // The mark worker associated with this P is blocked
- // performing a mark transition. We can't run it
- // because it may be on some other run or wait queue.
- return nil
- }
if !gcMarkWorkAvailable(_p_) {
// No work to be done right now. This can happen at
@@ -693,15 +730,35 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
return nil
}
+ // Grab a worker before we commit to running below.
+ node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
+ if node == nil {
+ // There is at least one worker per P, so normally there are
+ // enough workers to run on all Ps, if necessary. However, once
+ // a worker enters gcMarkDone it may park without rejoining the
+ // pool, thus freeing a P with no corresponding worker.
+ // gcMarkDone never depends on another worker doing work, so it
+ // is safe to simply do nothing here.
+ //
+ // If gcMarkDone bails out without completing the mark phase,
+ // it will always do so with queued global work. Thus, that P
+ // will be immediately eligible to re-run the worker G it was
+ // just using, ensuring work can complete.
+ return nil
+ }
+
decIfPositive := func(ptr *int64) bool {
- if *ptr > 0 {
- if atomic.Xaddint64(ptr, -1) >= 0 {
+ for {
+ v := atomic.Loadint64(ptr)
+ if v <= 0 {
+ return false
+ }
+
+ // TODO: having atomic.Casint64 would be more pleasant.
+ if atomic.Cas64((*uint64)(unsafe.Pointer(ptr)), uint64(v), uint64(v-1)) {
return true
}
- // We lost a race
- atomic.Xaddint64(ptr, +1)
}
- return false
}
if decIfPositive(&c.dedicatedMarkWorkersNeeded) {
@@ -710,6 +767,7 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
_p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
} else if c.fractionalUtilizationGoal == 0 {
// No need for fractional workers.
+ gcBgMarkWorkerPool.push(&node.node)
return nil
} else {
// Is this P behind on the fractional utilization
@@ -719,14 +777,15 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
delta := nanotime() - gcController.markStartTime
if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal {
// Nope. No need to run a fractional worker.
+ gcBgMarkWorkerPool.push(&node.node)
return nil
}
// Run a fractional worker.
_p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
}
- // Run the background mark worker
- gp := _p_.gcBgMarkWorker.ptr()
+ // Run the background mark worker.
+ gp := node.gp.ptr()
casgstatus(gp, _Gwaiting, _Grunnable)
if trace.enabled {
traceGoUnpark(gp, 0)
@@ -764,6 +823,8 @@ func pollFractionalWorkerExit() bool {
//
// mheap_.lock must be held or the world must be stopped.
func gcSetTriggerRatio(triggerRatio float64) {
+ assertWorldStoppedOrLockHeld(&mheap_.lock)
+
// Compute the next GC goal, which is when the allocated heap
// has grown by GOGC/100 over the heap marked by the last
// cycle.
@@ -846,7 +907,7 @@ func gcSetTriggerRatio(triggerRatio float64) {
// Commit to the trigger and goal.
memstats.gc_trigger = trigger
- memstats.next_gc = goal
+ atomic.Store64(&memstats.next_gc, goal)
if trace.enabled {
traceNextGC()
}
@@ -903,7 +964,9 @@ func gcSetTriggerRatio(triggerRatio float64) {
//
// mheap_.lock must be held or the world must be stopped.
func gcEffectiveGrowthRatio() float64 {
- egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked)
+ assertWorldStoppedOrLockHeld(&mheap_.lock)
+
+ egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked)
if egogc < 0 {
// Shouldn't happen, but just in case.
egogc = 0
@@ -985,7 +1048,6 @@ var work struct {
nproc uint32
tstart int64
nwait uint32
- ndone uint32
// Number of roots of various root types. Set by gcMarkRootPrepare.
nFlushCacheRoots int
@@ -1383,6 +1445,7 @@ func gcStart(trigger gcTrigger) {
now = startTheWorldWithSema(trace.enabled)
work.pauseNS += now - work.pauseStart
work.tMark = now
+ memstats.gcPauseDist.record(now - work.pauseStart)
})
// Release the world sema before Gosched() in STW mode
@@ -1409,19 +1472,6 @@ func gcStart(trigger gcTrigger) {
// This is protected by markDoneSema.
var gcMarkDoneFlushed uint32
-// debugCachedWork enables extra checks for debugging premature mark
-// termination.
-//
-// For debugging issue #27993.
-const debugCachedWork = false
-
-// gcWorkPauseGen is for debugging the mark completion algorithm.
-// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen.
-// Only used if debugCachedWork is true.
-//
-// For debugging issue #27993.
-var gcWorkPauseGen uint32 = 1
-
// gcMarkDone transitions the GC from mark to mark termination if all
// reachable objects have been marked (that is, there are no grey
// objects and can be no more in the future). Otherwise, it flushes
@@ -1477,15 +1527,7 @@ top:
// Flush the write barrier buffer, since this may add
// work to the gcWork.
wbBufFlush1(_p_)
- // For debugging, shrink the write barrier
- // buffer so it flushes immediately.
- // wbBuf.reset will keep it at this size as
- // long as throwOnGCWork is set.
- if debugCachedWork {
- b := &_p_.wbBuf
- b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
- b.debugGen = gcWorkPauseGen
- }
+
// Flush the gcWork, since this may create global work
// and set the flushedWork flag.
//
@@ -1496,29 +1538,12 @@ top:
if _p_.gcw.flushedWork {
atomic.Xadd(&gcMarkDoneFlushed, 1)
_p_.gcw.flushedWork = false
- } else if debugCachedWork {
- // For debugging, freeze the gcWork
- // until we know whether we've reached
- // completion or not. If we think
- // we've reached completion, but
- // there's a paused gcWork, then
- // that's a bug.
- _p_.gcw.pauseGen = gcWorkPauseGen
- // Capture the G's stack.
- for i := range _p_.gcw.pauseStack {
- _p_.gcw.pauseStack[i].pc = 0
- }
- callers(1, _p_.gcw.pauseStack[:])
}
})
casgstatus(gp, _Gwaiting, _Grunning)
})
if gcMarkDoneFlushed != 0 {
- if debugCachedWork {
- // Release paused gcWorks.
- atomic.Xadd(&gcWorkPauseGen, 1)
- }
// More grey objects were discovered since the
// previous termination check, so there may be more
// work to do. Keep going. It's possible the
@@ -1528,13 +1553,6 @@ top:
goto top
}
- if debugCachedWork {
- throwOnGCWork = true
- // Release paused gcWorks. If there are any, they
- // should now observe throwOnGCWork and panic.
- atomic.Xadd(&gcWorkPauseGen, 1)
- }
-
// There was no global work, no local work, and no Ps
// communicated work since we took markDoneSema. Therefore
// there are no grey objects and no more objects can be
@@ -1551,59 +1569,34 @@ top:
// below. The important thing is that the wb remains active until
// all marking is complete. This includes writes made by the GC.
- if debugCachedWork {
- // For debugging, double check that no work was added after we
- // went around above and disable write barrier buffering.
+ // There is sometimes work left over when we enter mark termination due
+ // to write barriers performed after the completion barrier above.
+ // Detect this and resume concurrent mark. This is obviously
+ // unfortunate.
+ //
+ // See issue #27993 for details.
+ //
+ // Switch to the system stack to call wbBufFlush1, though in this case
+ // it doesn't matter because we're non-preemptible anyway.
+ restart := false
+ systemstack(func() {
for _, p := range allp {
- gcw := &p.gcw
- if !gcw.empty() {
- printlock()
- print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork)
- if gcw.wbuf1 == nil {
- print(" wbuf1=<nil>")
- } else {
- print(" wbuf1.n=", gcw.wbuf1.nobj)
- }
- if gcw.wbuf2 == nil {
- print(" wbuf2=<nil>")
- } else {
- print(" wbuf2.n=", gcw.wbuf2.nobj)
- }
- print("\n")
- if gcw.pauseGen == gcw.putGen {
- println("runtime: checkPut already failed at this generation")
- }
- throw("throwOnGCWork")
+ wbBufFlush1(p)
+ if !p.gcw.empty() {
+ restart = true
+ break
}
}
- } else {
- // For unknown reasons (see issue #27993), there is
- // sometimes work left over when we enter mark
- // termination. Detect this and resume concurrent
- // mark. This is obviously unfortunate.
- //
- // Switch to the system stack to call wbBufFlush1,
- // though in this case it doesn't matter because we're
- // non-preemptible anyway.
- restart := false
+ })
+ if restart {
+ getg().m.preemptoff = ""
systemstack(func() {
- for _, p := range allp {
- wbBufFlush1(p)
- if !p.gcw.empty() {
- restart = true
- break
- }
- }
+ now := startTheWorldWithSema(true)
+ work.pauseNS += now - work.pauseStart
+ memstats.gcPauseDist.record(now - work.pauseStart)
})
- if restart {
- getg().m.preemptoff = ""
- systemstack(func() {
- now := startTheWorldWithSema(true)
- work.pauseNS += now - work.pauseStart
- })
- semrelease(&worldsema)
- goto top
- }
+ semrelease(&worldsema)
+ goto top
}
// Disable assists and background workers. We must do
@@ -1632,10 +1625,10 @@ top:
gcMarkTermination(nextTriggerRatio)
}
+// World must be stopped and mark assists and background workers must be
+// disabled.
func gcMarkTermination(nextTriggerRatio float64) {
- // World is stopped.
- // Start marktermination which includes enabling the write barrier.
- atomic.Store(&gcBlackenEnabled, 0)
+ // Start marktermination (write barrier remains enabled for now).
setGCPhase(_GCmarktermination)
work.heap1 = memstats.heap_live
@@ -1672,13 +1665,13 @@ func gcMarkTermination(nextTriggerRatio float64) {
// mark using checkmark bits, to check that we
// didn't forget to mark anything during the
// concurrent mark process.
+ startCheckmarks()
gcResetMarkState()
- initCheckmarks()
gcw := &getg().m.p.ptr().gcw
gcDrain(gcw, 0)
wbBufFlush1(getg().m.p.ptr())
gcw.dispose()
- clearCheckmarks()
+ endCheckmarks()
}
// marking is complete so we can turn the write barrier off
@@ -1713,6 +1706,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
unixNow := sec*1e9 + int64(nsec)
work.pauseNS += now - work.pauseStart
work.tEnd = now
+ memstats.gcPauseDist.record(now - work.pauseStart)
atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user
atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us
memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS)
@@ -1826,20 +1820,26 @@ func gcMarkTermination(nextTriggerRatio float64) {
}
}
-// gcBgMarkStartWorkers prepares background mark worker goroutines.
-// These goroutines will not run until the mark phase, but they must
-// be started while the work is not stopped and from a regular G
-// stack. The caller must hold worldsema.
+// gcBgMarkStartWorkers prepares background mark worker goroutines. These
+// goroutines will not run until the mark phase, but they must be started while
+// the work is not stopped and from a regular G stack. The caller must hold
+// worldsema.
func gcBgMarkStartWorkers() {
- // Background marking is performed by per-P G's. Ensure that
- // each P has a background GC G.
- for _, p := range allp {
- if p.gcBgMarkWorker == 0 {
- expectSystemGoroutine()
- go gcBgMarkWorker(p)
- notetsleepg(&work.bgMarkReady, -1)
- noteclear(&work.bgMarkReady)
- }
+ // Background marking is performed by per-P G's. Ensure that each P has
+ // a background GC G.
+ //
+ // Worker Gs don't exit if gomaxprocs is reduced. If it is raised
+ // again, we can reuse the old workers; no need to create new workers.
+ for gcBgMarkWorkerCount < gomaxprocs {
+ expectSystemGoroutine()
+ go gcBgMarkWorker()
+
+ notetsleepg(&work.bgMarkReady, -1)
+ noteclear(&work.bgMarkReady)
+ // The worker is now guaranteed to be added to the pool before
+ // its P's next findRunnableGCWorker.
+
+ gcBgMarkWorkerCount++
}
}
@@ -1859,84 +1859,106 @@ func gcBgMarkPrepare() {
work.nwait = ^uint32(0)
}
-func gcBgMarkWorker(_p_ *p) {
+// gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single
+// gcBgMarkWorker goroutine.
+type gcBgMarkWorkerNode struct {
+ // Unused workers are managed in a lock-free stack. This field must be first.
+ node lfnode
+
+ // The g of this worker.
+ gp guintptr
+
+ // Release this m on park. This is used to communicate with the unlock
+ // function, which cannot access the G's stack. It is unused outside of
+ // gcBgMarkWorker().
+ m muintptr
+}
+
+func gcBgMarkWorker() {
setSystemGoroutine()
gp := getg()
- type parkInfo struct {
- m muintptr // Release this m on park.
- attach puintptr // If non-nil, attach to this p on park.
- }
- // We pass park to a gopark unlock function, so it can't be on
+ // We pass node to a gopark unlock function, so it can't be on
// the stack (see gopark). Prevent deadlock from recursively
// starting GC by disabling preemption.
gp.m.preemptoff = "GC worker init"
- park := new(parkInfo)
+ node := new(gcBgMarkWorkerNode)
gp.m.preemptoff = ""
- park.m.set(acquirem())
- park.attach.set(_p_)
- // Inform gcBgMarkStartWorkers that this worker is ready.
- // After this point, the background mark worker is scheduled
- // cooperatively by gcController.findRunnable. Hence, it must
- // never be preempted, as this would put it into _Grunnable
- // and put it on a run queue. Instead, when the preempt flag
- // is set, this puts itself into _Gwaiting to be woken up by
- // gcController.findRunnable at the appropriate time.
+ node.gp.set(gp)
+
+ node.m.set(acquirem())
notewakeup(&work.bgMarkReady)
+ // After this point, the background mark worker is generally scheduled
+ // cooperatively by gcController.findRunnableGCWorker. While performing
+ // work on the P, preemption is disabled because we are working on
+ // P-local work buffers. When the preempt flag is set, this puts itself
+ // into _Gwaiting to be woken up by gcController.findRunnableGCWorker
+ // at the appropriate time.
+ //
+ // When preemption is enabled (e.g., while in gcMarkDone), this worker
+ // may be preempted and schedule as a _Grunnable G from a runq. That is
+ // fine; it will eventually gopark again for further scheduling via
+ // findRunnableGCWorker.
+ //
+ // Since we disable preemption before notifying bgMarkReady, we
+ // guarantee that this G will be in the worker pool for the next
+ // findRunnableGCWorker. This isn't strictly necessary, but it reduces
+ // latency between _GCmark starting and the workers starting.
for {
- // Go to sleep until woken by gcController.findRunnable.
- // We can't releasem yet since even the call to gopark
- // may be preempted.
- gopark(func(g *g, parkp unsafe.Pointer) bool {
- park := (*parkInfo)(parkp)
-
- // The worker G is no longer running, so it's
- // now safe to allow preemption.
- releasem(park.m.ptr())
-
- // If the worker isn't attached to its P,
- // attach now. During initialization and after
- // a phase change, the worker may have been
- // running on a different P. As soon as we
- // attach, the owner P may schedule the
- // worker, so this must be done after the G is
- // stopped.
- if park.attach != 0 {
- p := park.attach.ptr()
- park.attach.set(nil)
- // cas the worker because we may be
- // racing with a new worker starting
- // on this P.
- if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) {
- // The P got a new worker.
- // Exit this worker.
- return false
- }
+ // Go to sleep until woken by
+ // gcController.findRunnableGCWorker.
+ gopark(func(g *g, nodep unsafe.Pointer) bool {
+ node := (*gcBgMarkWorkerNode)(nodep)
+
+ if mp := node.m.ptr(); mp != nil {
+ // The worker G is no longer running; release
+ // the M.
+ //
+ // N.B. it is _safe_ to release the M as soon
+ // as we are no longer performing P-local mark
+ // work.
+ //
+ // However, since we cooperatively stop work
+ // when gp.preempt is set, if we releasem in
+ // the loop then the following call to gopark
+ // would immediately preempt the G. This is
+ // also safe, but inefficient: the G must
+ // schedule again only to enter gopark and park
+ // again. Thus, we defer the release until
+ // after parking the G.
+ releasem(mp)
}
+
+ // Release this G to the pool.
+ gcBgMarkWorkerPool.push(&node.node)
+ // Note that at this point, the G may immediately be
+ // rescheduled and may be running.
return true
- }, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
+ }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
- // Loop until the P dies and disassociates this
- // worker (the P may later be reused, in which case
- // it will get a new worker) or we failed to associate.
- if _p_.gcBgMarkWorker.ptr() != gp {
- break
- }
+ // Preemption must not occur here, or another G might see
+ // p.gcMarkWorkerMode.
// Disable preemption so we can use the gcw. If the
// scheduler wants to preempt us, we'll stop draining,
// dispose the gcw, and then preempt.
- park.m.set(acquirem())
+ node.m.set(acquirem())
+ pp := gp.m.p.ptr() // P can't change with preemption disabled.
if gcBlackenEnabled == 0 {
+ println("worker mode", pp.gcMarkWorkerMode)
throw("gcBgMarkWorker: blackening not enabled")
}
+ if pp.gcMarkWorkerMode == gcMarkWorkerNotWorker {
+ throw("gcBgMarkWorker: mode not set")
+ }
+
startTime := nanotime()
- _p_.gcMarkWorkerStartTime = startTime
+ pp.gcMarkWorkerStartTime = startTime
decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc {
@@ -1953,11 +1975,11 @@ func gcBgMarkWorker(_p_ *p) {
// disabled for mark workers, so it is safe to
// read from the G stack.
casgstatus(gp, _Grunning, _Gwaiting)
- switch _p_.gcMarkWorkerMode {
+ switch pp.gcMarkWorkerMode {
default:
throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
case gcMarkWorkerDedicatedMode:
- gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
if gp.preempt {
// We were preempted. This is
// a useful signal to kick
@@ -1966,7 +1988,7 @@ func gcBgMarkWorker(_p_ *p) {
// somewhere else.
lock(&sched.lock)
for {
- gp, _ := runqget(_p_)
+ gp, _ := runqget(pp)
if gp == nil {
break
}
@@ -1976,24 +1998,24 @@ func gcBgMarkWorker(_p_ *p) {
}
// Go back to draining, this time
// without preemption.
- gcDrain(&_p_.gcw, gcDrainFlushBgCredit)
+ gcDrain(&pp.gcw, gcDrainFlushBgCredit)
case gcMarkWorkerFractionalMode:
- gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
case gcMarkWorkerIdleMode:
- gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+ gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
}
casgstatus(gp, _Gwaiting, _Grunning)
})
// Account for time.
duration := nanotime() - startTime
- switch _p_.gcMarkWorkerMode {
+ switch pp.gcMarkWorkerMode {
case gcMarkWorkerDedicatedMode:
atomic.Xaddint64(&gcController.dedicatedMarkTime, duration)
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
case gcMarkWorkerFractionalMode:
atomic.Xaddint64(&gcController.fractionalMarkTime, duration)
- atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration)
+ atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
case gcMarkWorkerIdleMode:
atomic.Xaddint64(&gcController.idleMarkTime, duration)
}
@@ -2002,31 +2024,27 @@ func gcBgMarkWorker(_p_ *p) {
// of work?
incnwait := atomic.Xadd(&work.nwait, +1)
if incnwait > work.nproc {
- println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode,
+ println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode,
"work.nwait=", incnwait, "work.nproc=", work.nproc)
throw("work.nwait > work.nproc")
}
+ // We'll releasem after this point and thus this P may run
+ // something else. We must clear the worker mode to avoid
+ // attributing the mode to a different (non-worker) G in
+ // traceGoStart.
+ pp.gcMarkWorkerMode = gcMarkWorkerNotWorker
+
// If this worker reached a background mark completion
// point, signal the main GC goroutine.
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
- // Make this G preemptible and disassociate it
- // as the worker for this P so
- // findRunnableGCWorker doesn't try to
- // schedule it.
- _p_.gcBgMarkWorker.set(nil)
- releasem(park.m.ptr())
+ // We don't need the P-local buffers here, allow
+ // preemption becuse we may schedule like a regular
+ // goroutine in gcMarkDone (block on locks, etc).
+ releasem(node.m.ptr())
+ node.m.set(nil)
gcMarkDone()
-
- // Disable preemption and prepare to reattach
- // to the P.
- //
- // We may be running on a different P at this
- // point, so we can't reattach until this G is
- // parked.
- park.m.set(acquirem())
- park.attach.set(_p_)
}
}
}
@@ -2087,7 +2105,7 @@ func gcMark(start_time int64) {
// ensured all reachable objects were marked, all of
// these must be pointers to black objects. Hence we
// can just discard the write barrier buffer.
- if debug.gccheckmark > 0 || throwOnGCWork {
+ if debug.gccheckmark > 0 {
// For debugging, flush the buffer and make
// sure it really was all marked.
wbBufFlush1(p)
@@ -2119,13 +2137,21 @@ func gcMark(start_time int64) {
gcw.dispose()
}
- throwOnGCWork = false
-
- cachestats()
-
// Update the marked heap stat.
memstats.heap_marked = work.bytesMarked
+ // Flush scanAlloc from each mcache since we're about to modify
+ // heap_scan directly. If we were to flush this later, then scanAlloc
+ // might have incorrect information.
+ for _, p := range allp {
+ c := p.mcache
+ if c == nil {
+ continue
+ }
+ memstats.heap_scan += uint64(c.scanAlloc)
+ c.scanAlloc = 0
+ }
+
// Update other GC heap size stats. This must happen after
// cachestats (which flushes local statistics to these) and
// flushallmcaches (which modifies heap_live).
@@ -2144,6 +2170,8 @@ func gcMark(start_time int64) {
//
//go:systemstack
func gcSweep(mode gcMode) {
+ assertWorldStopped()
+
if gcphase != _GCoff {
throw("gcSweep being done but phase is not GCoff")
}
@@ -2151,21 +2179,13 @@ func gcSweep(mode gcMode) {
lock(&mheap_.lock)
mheap_.sweepgen += 2
mheap_.sweepdone = 0
- if !go115NewMCentralImpl && mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 {
- // We should have drained this list during the last
- // sweep phase. We certainly need to start this phase
- // with an empty swept list.
- throw("non-empty swept list")
- }
mheap_.pagesSwept = 0
mheap_.sweepArenas = mheap_.allArenas
mheap_.reclaimIndex = 0
mheap_.reclaimCredit = 0
unlock(&mheap_.lock)
- if go115NewMCentralImpl {
- sweep.centralIndex.clear()
- }
+ sweep.centralIndex.clear()
if !_ConcurrentSweep || mode == gcForceBlockMode {
// Special case synchronous sweep.