Commit ce9a4afa authored by Dmitry Vyukov's avatar Dmitry Vyukov

runtime: simplify procresize

Currently we do very a complex rebalancing of runnable goroutines
between queues, which tries to preserve scheduling fairness.
Besides being complex and error-prone, it also destroys all locality
of scheduling.

This change uses simpler scheme: leave runnable goroutines where
they are, during starttheworld start all Ps with local work,
plus start one additional P in case we have excessive runnable
goroutines in local queues or in the global queue.

The schedler must be able to operate efficiently w/o the rebalancing,
because garbage collections do not have to happen frequently.

The immediate need is execution tracing support: handling of
garabage collection which does stoptheworld/starttheworld several
times becomes exceedingly complex if the current execution can
jump between Ps during starttheworld.

Change-Id: I4fdb7a6d80ca4bd08900d0c6a0a252a95b1a2c90
Reviewed-on: https://go-review.googlesource.com/1951Reviewed-by: default avatarRick Hudson <rlh@golang.org>
parent c7eb9663
...@@ -132,7 +132,9 @@ func schedinit() { ...@@ -132,7 +132,9 @@ func schedinit() {
} }
procs = n procs = n
} }
procresize(int32(procs)) if procresize(int32(procs)) != nil {
gothrow("unknown runnable goroutine during bootstrap")
}
if buildVersion == "" { if buildVersion == "" {
// Condition should never trigger. This code just serves // Condition should never trigger. This code just serves
...@@ -651,30 +653,14 @@ func starttheworld() { ...@@ -651,30 +653,14 @@ func starttheworld() {
injectglist(gp) injectglist(gp)
add := needaddgcproc() add := needaddgcproc()
lock(&sched.lock) lock(&sched.lock)
procs := gomaxprocs
if newprocs != 0 { if newprocs != 0 {
procresize(newprocs) procs = newprocs
newprocs = 0 newprocs = 0
} else {
procresize(gomaxprocs)
} }
p1 := procresize(procs)
sched.gcwaiting = 0 sched.gcwaiting = 0
var p1 *p
for {
p := pidleget()
if p == nil {
break
}
// procresize() puts p's with work at the beginning of the list.
// Once we reach a p without a run queue, the rest don't have one either.
if p.runqhead == p.runqtail {
pidleput(p)
break
}
p.m = mget()
p.link = p1
p1 = p
}
if sched.sysmonwait != 0 { if sched.sysmonwait != 0 {
sched.sysmonwait = 0 sched.sysmonwait = 0
notewakeup(&sched.sysmonnote) notewakeup(&sched.sysmonnote)
...@@ -699,6 +685,13 @@ func starttheworld() { ...@@ -699,6 +685,13 @@ func starttheworld() {
} }
} }
// Wakeup an additional proc in case we have excessive runnable goroutines
// in local queues or in the global queue. If we don't, the proc will park itself.
// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 {
wakep()
}
if add { if add {
// If GC could have used another helper proc, start one now, // If GC could have used another helper proc, start one now,
// in the hope that it will be available next time. // in the hope that it will be available next time.
...@@ -2383,7 +2376,8 @@ func setcpuprofilerate_m(hz int32) { ...@@ -2383,7 +2376,8 @@ func setcpuprofilerate_m(hz int32) {
// Change number of processors. The world is stopped, sched is locked. // Change number of processors. The world is stopped, sched is locked.
// gcworkbufs are not being modified by either the GC or // gcworkbufs are not being modified by either the GC or
// the write barrier code. // the write barrier code.
func procresize(new int32) { // Returns list of Ps with local work, they need to be scheduled by the caller.
func procresize(new int32) *p {
old := gomaxprocs old := gomaxprocs
if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs { if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs {
gothrow("procresize: invalid arg") gothrow("procresize: invalid arg")
...@@ -2410,19 +2404,11 @@ func procresize(new int32) { ...@@ -2410,19 +2404,11 @@ func procresize(new int32) {
} }
} }
// redistribute runnable G's evenly // free unused P's
// collect all runnable goroutines in global queue preserving FIFO order for i := new; i < old; i++ {
// FIFO order is required to ensure fairness even during frequent GCs p := allp[i]
// see http://golang.org/issue/7126 // move all runable goroutines to the global queue
empty := false for p.runqhead != p.runqtail {
for !empty {
empty = true
for i := int32(0); i < old; i++ {
p := allp[i]
if p.runqhead == p.runqtail {
continue
}
empty = false
// pop from tail of local queue // pop from tail of local queue
p.runqtail-- p.runqtail--
gp := p.runq[p.runqtail%uint32(len(p.runq))] gp := p.runq[p.runqtail%uint32(len(p.runq))]
...@@ -2434,25 +2420,6 @@ func procresize(new int32) { ...@@ -2434,25 +2420,6 @@ func procresize(new int32) {
} }
sched.runqsize++ sched.runqsize++
} }
}
// fill local queues with at most len(p.runq)/2 goroutines
// start at 1 because current M already executes some G and will acquire allp[0] below,
// so if we have a spare G we want to put it into allp[1].
var _p_ p
for i := int32(1); i < new*int32(len(_p_.runq))/2 && sched.runqsize > 0; i++ {
gp := sched.runqhead
sched.runqhead = gp.schedlink
if sched.runqhead == nil {
sched.runqtail = nil
}
sched.runqsize--
runqput(allp[i%new], gp)
}
// free unused P's
for i := new; i < old; i++ {
p := allp[i]
freemcache(p.mcache) freemcache(p.mcache)
p.mcache = nil p.mcache = nil
gfpurge(p) gfpurge(p)
...@@ -2461,22 +2428,39 @@ func procresize(new int32) { ...@@ -2461,22 +2428,39 @@ func procresize(new int32) {
} }
_g_ := getg() _g_ := getg()
if _g_.m.p != nil { if _g_.m.p != nil && _g_.m.p.id < new {
_g_.m.p.m = nil // continue to use the current P
_g_.m.p.status = _Prunning
} else {
// release the current P and acquire allp[0]
if _g_.m.p != nil {
_g_.m.p.m = nil
}
_g_.m.p = nil
_g_.m.mcache = nil
p := allp[0]
p.m = nil
p.status = _Pidle
acquirep(p)
} }
_g_.m.p = nil var runnablePs *p
_g_.m.mcache = nil for i := new - 1; i >= 0; i-- {
p := allp[0]
p.m = nil
p.status = _Pidle
acquirep(p)
for i := new - 1; i > 0; i-- {
p := allp[i] p := allp[i]
if _g_.m.p == p {
continue
}
p.status = _Pidle p.status = _Pidle
pidleput(p) if p.runqhead == p.runqtail {
pidleput(p)
} else {
p.m = mget()
p.link = runnablePs
runnablePs = p
}
} }
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(new)) atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(new))
return runnablePs
} }
// Associate p and the current m. // Associate p and the current m.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment