Commit 32fef990 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov Committed by Russ Cox

runtime: fix CPU underutilization

runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.
This is reincarnation of cl/9776044 with the bug fixed.
The bug was due to code added after cl/9776044 was created:
if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
        runtime·lock(&runtime·sched);
        gp = globrunqget(m->p, 1);
        runtime·unlock(&runtime·sched);
}
If M gets gp from global runq here, it does not reset m->spinning.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10743044
parent 735cf529
...@@ -1133,6 +1133,25 @@ stop: ...@@ -1133,6 +1133,25 @@ stop:
goto top; goto top;
} }
static void
resetspinning(void)
{
int32 nmspinning;
if(m->spinning) {
m->spinning = false;
nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
if(nmspinning < 0)
runtime·throw("findrunnable: negative nmspinning");
} else
nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
// so see if we need to wakeup another P here.
if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
wakep();
}
// Injects the list of runnable G's into the scheduler. // Injects the list of runnable G's into the scheduler.
// Can run concurrently with GC. // Can run concurrently with GC.
static void static void
...@@ -1184,28 +1203,22 @@ top: ...@@ -1184,28 +1203,22 @@ top:
runtime·lock(&runtime·sched); runtime·lock(&runtime·sched);
gp = globrunqget(m->p, 1); gp = globrunqget(m->p, 1);
runtime·unlock(&runtime·sched); runtime·unlock(&runtime·sched);
if(gp)
resetspinning();
} }
if(gp == nil) { if(gp == nil) {
gp = runqget(m->p); gp = runqget(m->p);
if(gp && m->spinning) if(gp && m->spinning)
runtime·throw("schedule: spinning with local work"); runtime·throw("schedule: spinning with local work");
} }
if(gp == nil) if(gp == nil) {
gp = findrunnable(); gp = findrunnable(); // blocks until work is available
resetspinning();
if(m->spinning) {
m->spinning = false;
runtime·xadd(&runtime·sched.nmspinning, -1);
} }
// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
// so see if we need to wakeup another M here.
if (m->p->runqhead != m->p->runqtail &&
runtime·atomicload(&runtime·sched.nmspinning) == 0 &&
runtime·atomicload(&runtime·sched.npidle) > 0) // TODO: fast atomic
wakep();
if(gp->lockedm) { if(gp->lockedm) {
// Hands off own p to the locked m,
// then blocks waiting for a new p.
startlockedm(gp); startlockedm(gp);
goto top; goto top;
} }
......
...@@ -93,6 +93,30 @@ func TestYieldLocked(t *testing.T) { ...@@ -93,6 +93,30 @@ func TestYieldLocked(t *testing.T) {
<-c <-c
} }
func TestGoroutineParallelism(t *testing.T) {
const P = 4
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
for try := 0; try < 10; try++ {
done := make(chan bool)
x := uint32(0)
for p := 0; p < P; p++ {
// Test that all P goroutines are scheduled at the same time
go func(p int) {
for i := 0; i < 3; i++ {
expected := uint32(P*i + p)
for atomic.LoadUint32(&x) != expected {
}
atomic.StoreUint32(&x, expected+1)
}
done <- true
}(p)
}
for p := 0; p < P; p++ {
<-done
}
}
}
func TestBlockLocked(t *testing.T) { func TestBlockLocked(t *testing.T) {
const N = 10 const N = 10
c := make(chan bool) c := make(chan bool)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment