Commit 57afa764 authored by Austin Clements's avatar Austin Clements

runtime: add ragged global barrier function

This adds forEachP, which performs a general-purpose ragged global
barrier. forEachP takes a callback and invokes it for every P at a GC
safe point.

Ps that are idle or in a syscall are considered to be at a continuous
safe point. forEachP ensures that these Ps do not change state by
forcing all syscall Ps into idle and holding the sched.lock.

To ensure that Ps do not enter syscall or idle without running the
safe-point function, this adds checks for a pending callback every
place there is currently a gcwaiting check.

We'll use forEachP to replace the STW around enabling the write
barrier and to replace the current asynchronous per-M wbuf cache with
a cooperatively managed per-P gcWork cache.

Change-Id: Ie944f8ce1fead7c79bf271d2f42fcd61a41bb3cc
Reviewed-on: https://go-review.googlesource.com/8206Reviewed-by: default avatarRuss Cox <rsc@golang.org>
Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent 81c2233b
...@@ -743,6 +743,124 @@ func mstart1() { ...@@ -743,6 +743,124 @@ func mstart1() {
schedule() schedule()
} }
// forEachP calls fn(p) for every P p when p reaches a GC safe point.
// If a P is currently executing code, this will bring the P to a GC
// safe point and execute fn on that P. If the P is not executing code
// (it is idle or in a syscall), this will call fn(p) directly while
// preventing the P from exiting its state. This does not ensure that
// fn will run on every CPU executing Go code, but it act as a global
// memory barrier. GC uses this as a "ragged barrier."
//
// The caller must hold worldsema.
func forEachP(fn func(*p)) {
mp := acquirem()
_p_ := getg().m.p.ptr()
lock(&sched.lock)
if sched.stopwait != 0 {
throw("forEachP: sched.stopwait != 0")
}
sched.stopwait = gomaxprocs - 1
sched.safePointFn = fn
// Ask all Ps to run the safe point function.
for _, p := range allp[:gomaxprocs] {
if p != _p_ {
atomicstore(&p.runSafePointFn, 1)
}
}
preemptall()
// Any P entering _Pidle or _Psyscall from now on will observe
// p.runSafePointFn == 1 and will call runSafePointFn when
// changing its status to _Pidle/_Psyscall.
// Run safe point function for all idle Ps. sched.pidle will
// not change because we hold sched.lock.
for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
if cas(&p.runSafePointFn, 1, 0) {
fn(p)
sched.stopwait--
}
}
wait := sched.stopwait > 0
unlock(&sched.lock)
// Run fn for the current P.
fn(_p_)
// Force Ps currently in _Psyscall into _Pidle and hand them
// off to induce safe point function execution.
for i := 0; i < int(gomaxprocs); i++ {
p := allp[i]
s := p.status
if s == _Psyscall && p.runSafePointFn == 1 && cas(&p.status, s, _Pidle) {
if trace.enabled {
traceGoSysBlock(p)
traceProcStop(p)
}
p.syscalltick++
handoffp(p)
}
}
// Wait for remaining Ps to run fn.
if wait {
for {
// Wait for 100us, then try to re-preempt in
// case of any races.
if notetsleep(&sched.stopnote, 100*1000) {
noteclear(&sched.stopnote)
break
}
preemptall()
}
}
if sched.stopwait != 0 {
throw("forEachP: not stopped")
}
for i := 0; i < int(gomaxprocs); i++ {
p := allp[i]
if p.runSafePointFn != 0 {
throw("forEachP: P did not run fn")
}
}
lock(&sched.lock)
sched.safePointFn = nil
unlock(&sched.lock)
releasem(mp)
}
// runSafePointFn runs the safe point function, if any, for this P.
// This should be called like
//
// if getg().m.p.runSafePointFn != 0 {
// runSafePointFn()
// }
//
// runSafePointFn must be checked on any transition in to _Pidle or
// _Psyscall to avoid a race where forEachP sees that the P is running
// just before the P goes into _Pidle/_Psyscall and neither forEachP
// nor the P run the safe-point function.
func runSafePointFn() {
p := getg().m.p.ptr()
// Resolve the race between forEachP running the safe-point
// function on this P's behalf and this P running the
// safe-point function directly.
if !cas(&p.runSafePointFn, 1, 0) {
return
}
sched.safePointFn(p)
lock(&sched.lock)
sched.stopwait--
if sched.stopwait == 0 {
notewakeup(&sched.stopnote)
}
unlock(&sched.lock)
}
// When running with cgo, we call _cgo_thread_start // When running with cgo, we call _cgo_thread_start
// to start threads for us so that we can play nicely with // to start threads for us so that we can play nicely with
// foreign code. // foreign code.
...@@ -1109,6 +1227,13 @@ func handoffp(_p_ *p) { ...@@ -1109,6 +1227,13 @@ func handoffp(_p_ *p) {
unlock(&sched.lock) unlock(&sched.lock)
return return
} }
if _p_.runSafePointFn != 0 && cas(&_p_.runSafePointFn, 1, 0) {
sched.safePointFn(_p_)
sched.stopwait--
if sched.stopwait == 0 {
notewakeup(&sched.stopnote)
}
}
if sched.runqsize != 0 { if sched.runqsize != 0 {
unlock(&sched.lock) unlock(&sched.lock)
startm(_p_, false) startm(_p_, false)
...@@ -1246,6 +1371,9 @@ top: ...@@ -1246,6 +1371,9 @@ top:
gcstopm() gcstopm()
goto top goto top
} }
if _g_.m.p.ptr().runSafePointFn != 0 {
runSafePointFn()
}
if fingwait && fingwake { if fingwait && fingwake {
if gp := wakefing(); gp != nil { if gp := wakefing(); gp != nil {
ready(gp, 0) ready(gp, 0)
...@@ -1327,7 +1455,7 @@ stop: ...@@ -1327,7 +1455,7 @@ stop:
// return P and block // return P and block
lock(&sched.lock) lock(&sched.lock)
if sched.gcwaiting != 0 { if sched.gcwaiting != 0 || _g_.m.p.ptr().runSafePointFn != 0 {
unlock(&sched.lock) unlock(&sched.lock)
goto top goto top
} }
...@@ -1454,6 +1582,9 @@ top: ...@@ -1454,6 +1582,9 @@ top:
gcstopm() gcstopm()
goto top goto top
} }
if _g_.m.p.ptr().runSafePointFn != 0 {
runSafePointFn()
}
var gp *g var gp *g
var inheritTime bool var inheritTime bool
...@@ -1709,6 +1840,10 @@ func reentersyscall(pc, sp uintptr) { ...@@ -1709,6 +1840,10 @@ func reentersyscall(pc, sp uintptr) {
_g_.m.mcache = nil _g_.m.mcache = nil
_g_.m.p.ptr().m = 0 _g_.m.p.ptr().m = 0
atomicstore(&_g_.m.p.ptr().status, _Psyscall) atomicstore(&_g_.m.p.ptr().status, _Psyscall)
if _g_.m.p.ptr().runSafePointFn != 0 {
// runSafePointFn may stack split if run on this stack
systemstack(runSafePointFn)
}
if sched.gcwaiting != 0 { if sched.gcwaiting != 0 {
systemstack(entersyscall_gcwait) systemstack(entersyscall_gcwait)
save(pc, sp) save(pc, sp)
......
...@@ -38,8 +38,8 @@ const ( ...@@ -38,8 +38,8 @@ const (
const ( const (
// P status // P status
_Pidle = iota _Pidle = iota
_Prunning _Prunning // Only this P is allowed to change from _Prunning.
_Psyscall _Psyscall
_Pgcstop _Pgcstop
_Pdead _Pdead
...@@ -388,6 +388,8 @@ type p struct { ...@@ -388,6 +388,8 @@ type p struct {
// disposed on certain GC state transitions. // disposed on certain GC state transitions.
gcw gcWork gcw gcWork
runSafePointFn uint32 // if 1, run sched.safePointFn at next safe point
pad [64]byte pad [64]byte
} }
...@@ -437,6 +439,10 @@ type schedt struct { ...@@ -437,6 +439,10 @@ type schedt struct {
sysmonnote note sysmonnote note
lastpoll uint64 lastpoll uint64
// safepointFn should be called on each P at the next GC
// safepoint if p.runSafePointFn is set.
safePointFn func(*p)
profilehz int32 // cpu profiling rate profilehz int32 // cpu profiling rate
procresizetime int64 // nanotime() of last change to gomaxprocs procresizetime int64 // nanotime() of last change to gomaxprocs
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment