Commit 1f07e51a authored by Kirill Smelkov's avatar Kirill Smelkov

X zodb/cache: Switch RCE.ready from chan to sync.WaitGroup

Currently RCE.ready is chan(struct{}) and is closed to indicate 1->N
that loading of an RCE has finished. There is a drawback however with
using channel for this function:

	making RCE.ready allocates and thus adds pressure on GC.

We can use sync.WaitGroup functionality for this purpose of notifying
that something is ready. Usually sync.WaitGroup is used in N->1 way, but
as it also correctly works in N->M mode we can use in in our scenario
where 1 loading goroutine notifies N RCE waiters.

Checking already closed channel was also found to checking already done
sync.WaitGroup. Reason is that sync.WaitGroup is only a light wrapper
around runtime.sema, but channel is much more heavyweight.

Speedup with this patch:

name                        old time/op    new time/op    delta
NoopStorage-4                 56.2ns ± 0%    57.1ns ± 2%     ~     (p=0.079 n=5+5)
CacheStartup-4                1.34µs ± 7%    1.22µs ± 3%   -8.68%  (p=0.008 n=5+5)
CacheNoHit/size=0-4           1.67µs ± 4%    1.42µs ± 4%  -15.08%  (p=0.008 n=5+5)
CacheNoHit/size=16-4          1.73µs ± 7%    1.42µs ± 4%  -17.83%  (p=0.008 n=5+5)
CacheNoHit/size=128-4         1.68µs ± 5%    1.42µs ± 5%  -15.65%  (p=0.008 n=5+5)
CacheNoHit/size=512-4         1.62µs ± 4%    1.43µs ± 5%  -12.18%  (p=0.008 n=5+5)
CacheNoHit/size=4096-4        1.69µs ± 2%    1.44µs ± 0%  -15.03%  (p=0.008 n=5+5)
CacheHit/size=0-4              158ns ± 1%     130ns ± 2%  -17.47%  (p=0.008 n=5+5)
CacheHit/size=16-4             147ns ± 2%     124ns ± 7%  -16.01%  (p=0.008 n=5+5)
CacheHit/size=128-4            149ns ± 0%     126ns ± 5%  -15.30%  (p=0.016 n=4+5)
CacheHit/size=512-4            151ns ± 0%     126ns ± 1%  -16.56%  (p=0.000 n=4+5)
CacheHit/size=4096-4           154ns ± 0%     129ns ± 2%  -16.49%  (p=0.008 n=5+5)
NoopStoragePar-4              32.2ns ± 9%    30.4ns ± 5%     ~     (p=0.175 n=5+5)
CacheStartupPar-4             1.64µs ± 2%    1.42µs ± 4%  -13.31%  (p=0.008 n=5+5)
CacheNoHitPar/size=0-4        1.88µs ± 2%    1.63µs ± 3%  -13.33%  (p=0.008 n=5+5)
CacheNoHitPar/size=16-4       1.87µs ± 1%    1.62µs ± 2%  -13.33%  (p=0.008 n=5+5)
CacheNoHitPar/size=128-4      1.90µs ± 3%    1.64µs ± 2%  -13.68%  (p=0.008 n=5+5)
CacheNoHitPar/size=512-4      1.86µs ± 3%    1.62µs ± 1%  -12.91%  (p=0.008 n=5+5)
CacheNoHitPar/size=4096-4     1.87µs ± 3%    1.70µs ± 3%   -9.21%  (p=0.008 n=5+5)
CacheHitPar/size=0-4           233ns ± 0%     217ns ± 3%   -6.87%  (p=0.016 n=4+5)
CacheHitPar/size=16-4          228ns ± 2%     225ns ± 2%     ~     (p=0.119 n=5+5)
CacheHitPar/size=128-4         232ns ± 4%     214ns ± 1%   -7.92%  (p=0.008 n=5+5)
CacheHitPar/size=512-4         228ns ± 1%     210ns ± 1%   -7.82%  (p=0.008 n=5+5)
CacheHitPar/size=4096-4        226ns ± 2%     209ns ± 2%   -7.54%  (p=0.008 n=5+5)
NoopStorageProc-4             34.1ns ± 6%    34.9ns ±18%     ~     (p=0.690 n=5+5)
CacheStartupProc-4            1.14µs ± 8%    1.12µs ± 4%     ~     (p=0.802 n=5+5)
CacheNoHitProc/size=0-4       1.32µs ± 4%    1.10µs ± 6%  -16.92%  (p=0.008 n=5+5)
CacheNoHitProc/size=16-4      1.32µs ± 2%    1.14µs ± 7%  -13.54%  (p=0.008 n=5+5)
CacheNoHitProc/size=128-4     1.30µs ± 6%    1.07µs ±10%  -17.99%  (p=0.008 n=5+5)
CacheNoHitProc/size=512-4     1.26µs ± 5%    1.09µs ± 5%  -13.47%  (p=0.008 n=5+5)
CacheNoHitProc/size=4096-4    1.27µs ± 3%    1.09µs ± 7%  -14.55%  (p=0.008 n=5+5)
CacheHitProc/size=0-4         69.5ns ± 6%    56.4ns ±10%  -18.88%  (p=0.008 n=5+5)
CacheHitProc/size=16-4        75.1ns ± 6%    55.8ns ± 1%  -25.65%  (p=0.008 n=5+5)
CacheHitProc/size=128-4       74.5ns ± 4%    57.1ns ± 1%  -23.31%  (p=0.008 n=5+5)
CacheHitProc/size=512-4       69.4ns ± 1%    58.1ns ± 2%  -16.27%  (p=0.008 n=5+5)
CacheHitProc/size=4096-4      93.3ns ± 5%    63.2ns ± 4%  -32.25%  (p=0.008 n=5+5)

name                        old allocs/op  new allocs/op  delta
NoopStorage-4                   0.00           0.00          ~     (all equal)
CacheStartup-4                  6.00 ± 0%      5.00 ± 0%  -16.67%  (p=0.008 n=5+5)
CacheNoHit/size=0-4             4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHit/size=16-4            4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHit/size=128-4           4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHit/size=512-4           4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHit/size=4096-4          4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheHit/size=0-4               0.00           0.00          ~     (all equal)
CacheHit/size=16-4              0.00           0.00          ~     (all equal)
CacheHit/size=128-4             0.00           0.00          ~     (all equal)
CacheHit/size=512-4             0.00           0.00          ~     (all equal)
CacheHit/size=4096-4            0.00           0.00          ~     (all equal)
NoopStoragePar-4                0.00           0.00          ~     (all equal)
CacheStartupPar-4               5.00 ± 0%      4.00 ± 0%  -20.00%  (p=0.008 n=5+5)
CacheNoHitPar/size=0-4          4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitPar/size=16-4         4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitPar/size=128-4        4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitPar/size=512-4        4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitPar/size=4096-4       4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheHitPar/size=0-4            0.00           0.00          ~     (all equal)
CacheHitPar/size=16-4           0.00           0.00          ~     (all equal)
CacheHitPar/size=128-4          0.00           0.00          ~     (all equal)
CacheHitPar/size=512-4          0.00           0.00          ~     (all equal)
CacheHitPar/size=4096-4         0.00           0.00          ~     (all equal)
NoopStorageProc-4               0.00           0.00          ~     (all equal)
CacheStartupProc-4              6.00 ± 0%      5.00 ± 0%  -16.67%  (p=0.008 n=5+5)
CacheNoHitProc/size=0-4         4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitProc/size=16-4        4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitProc/size=128-4       4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitProc/size=512-4       4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheNoHitProc/size=4096-4      4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.008 n=5+5)
CacheHitProc/size=0-4           0.00           0.00          ~     (all equal)
CacheHitProc/size=16-4          0.00           0.00          ~     (all equal)
CacheHitProc/size=128-4         0.00           0.00          ~     (all equal)
CacheHitProc/size=512-4         0.00           0.00          ~     (all equal)
CacheHitProc/size=4096-4        0.00           0.00          ~     (all equal)
parent 0d377587
......@@ -95,7 +95,10 @@ type revCacheEntry struct {
serial Tid
err error
ready chan struct{} // closed when loading finished
// done when loading finished
// (like closed-when-ready `chan struct{}` but does not allocate on
// make and is faster)
ready sync.WaitGroup
// protected by .parent's lock:
......@@ -162,7 +165,7 @@ func (c *Cache) Load(ctx context.Context, xid Xid) (buf *Buf, serial Tid, err er
// rce is already in cache - use it
if !rceNew {
<-rce.ready
rce.ready.Wait()
c.gcMu.Lock()
rce.inLRU.MoveBefore(&c.lru.Head)
c.gcMu.Unlock()
......@@ -302,7 +305,7 @@ func (c *Cache) lookupRCE(xid Xid, wantBufRef int) (rce *revCacheEntry, rceNew b
// loadRCE performs data loading from database into rce.
//
// rce must be new just created by lookupRCE() with returned rceNew=true.
// loading completion is signalled by closing rce.ready.
// loading completion is signalled by marking rce.ready done.
func (c *Cache) loadRCE(ctx context.Context, rce *revCacheEntry, oid Oid) {
oce := rce.parent
buf, serial, err := c.loader.Load(ctx, Xid{At: rce.head, Oid: oid})
......@@ -342,7 +345,7 @@ func (c *Cache) loadRCE(ctx context.Context, rce *revCacheEntry, oid Oid) {
// rce was already dropped by merge / evicted
// (XXX recheck about evicted)
oce.Unlock()
close(rce.ready)
rce.ready.Done()
return
}
......@@ -394,7 +397,7 @@ func (c *Cache) loadRCE(ctx context.Context, rce *revCacheEntry, oid Oid) {
// now after .waitBufRef was synced to .buf notify to waiters that
// original rce in question was loaded. Do so outside .parent lock.
close(rceOrig.ready)
rceOrig.ready.Done()
// update lru & cache size
......@@ -581,8 +584,8 @@ func (oce *oidCacheEntry) newRevEntry(i int, head Tid) *revCacheEntry {
rce := &revCacheEntry{
parent: oce,
head: head,
ready: make(chan struct{}),
}
rce.ready.Add(1)
rce.inLRU.Init() // initially not on Cache.lru list
oce.rcev = append(oce.rcev, nil)
......
......@@ -336,7 +336,7 @@ func TestCache(t *testing.T) {
// (now 15] becomes ready but does not yet takes oce lock)
rce1_h15.waitBufRef = -1
close(rce1_h15.ready)
rce1_h15.ready.Done()
ok1(rce1_h15.loaded())
checkOCE(1, rce1_h3, rce1_h6, rce1_h7, rce1_h9, rce1_h11, rce1_h13, rce1_h15)
checkMRU(12, rce1_h11, rce1_h9, rce1_h7, rce1_h6, rce1_h3) // no 13] and 15] yet
......@@ -352,7 +352,7 @@ func TestCache(t *testing.T) {
checkMRU(5 /*was 12*/, rce1_h9, rce1_h7, rce1_h6, rce1_h3)
// (15] takes oce lock and updates c.size and LRU list)
rce1_h15.ready = make(chan struct{}) // so loadRCE could run
rce1_h15.ready.Add(1) // so loadRCE could run
c.loadRCE(ctx, rce1_h15, 1)
checkOCE(1, rce1_h3, rce1_h6, rce1_h7, rce1_h9, rce1_h15)
checkMRU(12, rce1_h15, rce1_h9, rce1_h7, rce1_h6, rce1_h3)
......@@ -368,7 +368,7 @@ func TestCache(t *testing.T) {
rce1_h16.serial = 16
rce1_h16.buf = mkbuf(zz)
rce1_h16.waitBufRef = -1
close(rce1_h16.ready)
rce1_h16.ready.Done()
ok1(rce1_h16.loaded())
checkOCE(1, rce1_h3, rce1_h6, rce1_h7, rce1_h9, rce1_h15, rce1_h16, rce1_h17)
checkMRU(12, rce1_h15, rce1_h9, rce1_h7, rce1_h6, rce1_h3) // no 16] and 17] yet
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment