Commit 1068aca3 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 09433847
...@@ -35,7 +35,7 @@ digraph { ...@@ -35,7 +35,7 @@ digraph {
wcfsRead -> blktabGet; wcfsRead -> blktabGet;
wcfsRead -> δFtail; wcfsRead -> δFtail;
wcfsRead -> mappingRegister; wcfsRead -> setupWatch;
wcfsRead -> headWatch; wcfsRead -> headWatch;
...@@ -49,7 +49,7 @@ digraph { ...@@ -49,7 +49,7 @@ digraph {
// wcfs_simple -> autoexit; // wcfs_simple -> autoexit;
client -> wcfsRead; client -> wcfsRead;
client -> mappingRegister; client -> setupWatch;
client -> clientInvHandle; client -> clientInvHandle;
// client -> δR; // client -> δR;
...@@ -83,14 +83,14 @@ digraph { ...@@ -83,14 +83,14 @@ digraph {
_wcfs_zhead [label=".wcfs/\nzhead", style=filled fillcolor=lightyellow] _wcfs_zhead [label=".wcfs/\nzhead", style=filled fillcolor=lightyellow]
wcfsRead [label="read(#blk)"] wcfsRead [label="read(#blk)", style=filled fillcolor=grey95]
blktabGet [label="blktab.Get(#blk):\nmanually + → ⌈rev(#blk)⌉", style=filled fillcolor=grey95] blktabGet [label="blktab.Get(#blk):\nmanually + → ⌈rev(#blk)⌉", style=filled fillcolor=grey95]
δFtail [style=filled fillcolor=lightyellow] δFtail [style=filled fillcolor=lightyellow]
mappingRegister [label="mmappings:\nregister/maint"] setupWatch [label="watches:\nregister/maint", style=filled fillcolor=grey95]
clientInvHandle [label="process\n#blk invalidations"] clientInvHandle [label="process\n#blk invalidations", style=filled fillcolor=grey95]
headWatch [label="#blk ← head/watch"] headWatch [label="#blk ← head/watch", style=filled fillcolor=grey95]
fileSock [label="FileSock", style=filled fillcolor=lightyellow] fileSock [label="FileSock", style=filled fillcolor=lightyellow]
ZODB_go_inv [label="ZODB/go\ninvalidations", style=filled fillcolor=grey95] ZODB_go_inv [label="ZODB/go\ninvalidations", style=filled fillcolor=grey95]
......
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
<!-- wcfsRead --> <!-- wcfsRead -->
<g id="node7" class="node"> <g id="node7" class="node">
<title>wcfsRead</title> <title>wcfsRead</title>
<ellipse fill="none" stroke="#000000" cx="1105.2046" cy="-299.5635" rx="47.3916" ry="18"/> <ellipse fill="#f2f2f2" stroke="#000000" cx="1105.2046" cy="-299.5635" rx="47.3916" ry="18"/>
<text text-anchor="middle" x="1105.2046" y="-295.8635" font-family="Times,serif" font-size="14.00" fill="#000000">read(#blk)</text> <text text-anchor="middle" x="1105.2046" y="-295.8635" font-family="Times,serif" font-size="14.00" fill="#000000">read(#blk)</text>
</g> </g>
<!-- wcfs&#45;&gt;wcfsRead --> <!-- wcfs&#45;&gt;wcfsRead -->
...@@ -110,23 +110,23 @@ ...@@ -110,23 +110,23 @@
<path fill="none" stroke="#000000" d="M1229.9749,-376.2546C1206.068,-361.56 1166.8988,-337.4844 1138.8224,-320.227"/> <path fill="none" stroke="#000000" d="M1229.9749,-376.2546C1206.068,-361.56 1166.8988,-337.4844 1138.8224,-320.227"/>
<polygon fill="#000000" stroke="#000000" points="1140.482,-317.1388 1130.1299,-314.884 1136.8164,-323.1023 1140.482,-317.1388"/> <polygon fill="#000000" stroke="#000000" points="1140.482,-317.1388 1130.1299,-314.884 1136.8164,-323.1023 1140.482,-317.1388"/>
</g> </g>
<!-- mappingRegister --> <!-- setupWatch -->
<g id="node20" class="node"> <g id="node20" class="node">
<title>mappingRegister</title> <title>setupWatch</title>
<ellipse fill="none" stroke="#000000" cx="1483.2046" cy="-209.8234" rx="65.1077" ry="26.7407"/> <ellipse fill="#f2f2f2" stroke="#000000" cx="1483.2046" cy="-209.8234" rx="65.1077" ry="26.7407"/>
<text text-anchor="middle" x="1483.2046" y="-213.6234" font-family="Times,serif" font-size="14.00" fill="#000000">mmappings:</text> <text text-anchor="middle" x="1483.2046" y="-213.6234" font-family="Times,serif" font-size="14.00" fill="#000000">watches:</text>
<text text-anchor="middle" x="1483.2046" y="-198.6234" font-family="Times,serif" font-size="14.00" fill="#000000">register/maint</text> <text text-anchor="middle" x="1483.2046" y="-198.6234" font-family="Times,serif" font-size="14.00" fill="#000000">register/maint</text>
</g> </g>
<!-- client&#45;&gt;mappingRegister --> <!-- client&#45;&gt;setupWatch -->
<g id="edge26" class="edge"> <g id="edge26" class="edge">
<title>client&#45;&gt;mappingRegister</title> <title>client&#45;&gt;setupWatch</title>
<path fill="none" stroke="#000000" d="M1280.7168,-384.5439C1333.2703,-375.4553 1439.8519,-354.0516 1464.2046,-326.4335 1483.1625,-304.9336 1487.0846,-272.1581 1486.7372,-246.824"/> <path fill="none" stroke="#000000" d="M1280.7168,-384.5439C1333.2703,-375.4553 1439.8519,-354.0516 1464.2046,-326.4335 1483.1625,-304.9336 1487.0846,-272.1581 1486.7372,-246.824"/>
<polygon fill="#000000" stroke="#000000" points="1490.2349,-246.6935 1486.3786,-236.8254 1483.2394,-246.9444 1490.2349,-246.6935"/> <polygon fill="#000000" stroke="#000000" points="1490.2349,-246.6935 1486.3786,-236.8254 1483.2394,-246.9444 1490.2349,-246.6935"/>
</g> </g>
<!-- clientInvHandle --> <!-- clientInvHandle -->
<g id="node25" class="node"> <g id="node25" class="node">
<title>clientInvHandle</title> <title>clientInvHandle</title>
<ellipse fill="none" stroke="#000000" cx="1251.2046" cy="-299.5635" rx="80.7205" ry="26.7407"/> <ellipse fill="#f2f2f2" stroke="#000000" cx="1251.2046" cy="-299.5635" rx="80.7205" ry="26.7407"/>
<text text-anchor="middle" x="1251.2046" y="-303.3635" font-family="Times,serif" font-size="14.00" fill="#000000">process</text> <text text-anchor="middle" x="1251.2046" y="-303.3635" font-family="Times,serif" font-size="14.00" fill="#000000">process</text>
<text text-anchor="middle" x="1251.2046" y="-288.3635" font-family="Times,serif" font-size="14.00" fill="#000000">#blk invalidations</text> <text text-anchor="middle" x="1251.2046" y="-288.3635" font-family="Times,serif" font-size="14.00" fill="#000000">#blk invalidations</text>
</g> </g>
...@@ -243,16 +243,16 @@ ...@@ -243,16 +243,16 @@
<path fill="none" stroke="#000000" d="M1105.4069,-281.4032C1105.5173,-271.5002 1105.6581,-258.8681 1105.7905,-246.9806"/> <path fill="none" stroke="#000000" d="M1105.4069,-281.4032C1105.5173,-271.5002 1105.6581,-258.8681 1105.7905,-246.9806"/>
<polygon fill="#000000" stroke="#000000" points="1109.2921,-246.8514 1105.9038,-236.813 1102.2925,-246.7734 1109.2921,-246.8514"/> <polygon fill="#000000" stroke="#000000" points="1109.2921,-246.8514 1105.9038,-236.813 1102.2925,-246.7734 1109.2921,-246.8514"/>
</g> </g>
<!-- wcfsRead&#45;&gt;mappingRegister --> <!-- wcfsRead&#45;&gt;setupWatch -->
<g id="edge20" class="edge"> <g id="edge20" class="edge">
<title>wcfsRead&#45;&gt;mappingRegister</title> <title>wcfsRead&#45;&gt;setupWatch</title>
<path fill="none" stroke="#000000" d="M1132.165,-284.591C1141.1669,-280.1495 1151.4051,-275.6794 1161.2046,-272.6934 1267.746,-240.2301 1301.1865,-263.8416 1409.2046,-236.6934 1414.8942,-235.2635 1420.7561,-233.5577 1426.5727,-231.7087"/> <path fill="none" stroke="#000000" d="M1132.165,-284.591C1141.1669,-280.1495 1151.4051,-275.6794 1161.2046,-272.6934 1267.746,-240.2301 1301.1865,-263.8416 1409.2046,-236.6934 1414.8942,-235.2635 1420.7561,-233.5577 1426.5727,-231.7087"/>
<polygon fill="#000000" stroke="#000000" points="1427.8927,-234.9584 1436.2802,-228.4854 1425.6867,-228.315 1427.8927,-234.9584"/> <polygon fill="#000000" stroke="#000000" points="1427.8927,-234.9584 1436.2802,-228.4854 1425.6867,-228.315 1427.8927,-234.9584"/>
</g> </g>
<!-- headWatch --> <!-- headWatch -->
<g id="node21" class="node"> <g id="node21" class="node">
<title>headWatch</title> <title>headWatch</title>
<ellipse fill="none" stroke="#000000" cx="1319.2046" cy="-209.8234" rx="80.6858" ry="18"/> <ellipse fill="#f2f2f2" stroke="#000000" cx="1319.2046" cy="-209.8234" rx="80.6858" ry="18"/>
<text text-anchor="middle" x="1319.2046" y="-206.1234" font-family="Times,serif" font-size="14.00" fill="#000000">#blk ← head/watch</text> <text text-anchor="middle" x="1319.2046" y="-206.1234" font-family="Times,serif" font-size="14.00" fill="#000000">#blk ← head/watch</text>
</g> </g>
<!-- wcfsRead&#45;&gt;headWatch --> <!-- wcfsRead&#45;&gt;headWatch -->
......
...@@ -99,14 +99,14 @@ ...@@ -99,14 +99,14 @@
// //
// C: 1 watch <bigfileX> @<at> // C: 1 watch <bigfileX> @<at>
// //
// The server then, after potentially sending initial pin messages (see below), // The server then, after potentially sending initial pin and unpin messages
// reports either success or failure: // (see below), reports either success or failure:
// //
// S: 1 ok // S: 1 ok
// S: 1 error ... ; if <at> is too far away back from head/at // S: 1 error ... ; if <at> is too far away back from head/at
// //
// The server sends "ok" reply only after head/at is ≥ requested <at>, and // The server sends "ok" reply only after head/at is ≥ requested <at>, and only
// only after all initial pin messages are fully acknowledged by the client. // after all initial pin/unpin messages are fully acknowledged by the client.
// The client can start to use mmapped data after it gets "ok". // The client can start to use mmapped data after it gets "ok".
// The server sends "error" reply if requested <at> is too far away back from // The server sends "error" reply if requested <at> is too far away back from
// head/at. // head/at.
...@@ -127,7 +127,7 @@ ...@@ -127,7 +127,7 @@
// //
// S: <2·k> pin <bigfileX> #<blk> @<rev_max> // S: <2·k> pin <bigfileX> #<blk> @<rev_max>
// XXX @head means unpin. // XXX @head means unpin.
// XXX or use `unpin <bigfileX> #<blk>` ? // XXX -> use `unpin <bigfileX> #<blk>`
// //
// and waits until all clients confirm that changed file block can be updated // and waits until all clients confirm that changed file block can be updated
// in global OS cache. // in global OS cache.
...@@ -137,6 +137,8 @@ ...@@ -137,6 +137,8 @@
// # mmapped at address corresponding to #blk // # mmapped at address corresponding to #blk
// mmap(@<rev_max>/bigfile/<bigfileX>, #blk, MAP_FIXED) // mmap(@<rev_max>/bigfile/<bigfileX>, #blk, MAP_FIXED)
// //
// XXX unpin -> mmap(head/bigfile/<bigfileX>, #blk, MAP_FIXED)
//
// and must send ack back to the server when it is done: // and must send ack back to the server when it is done:
// //
// C: <2·k> ack // C: <2·k> ack
...@@ -395,8 +397,15 @@ package main ...@@ -395,8 +397,15 @@ package main
// and a client that wants @rev data will get @rev data, even if it was this // and a client that wants @rev data will get @rev data, even if it was this
// "old" client that triggered the pagefault(~). // "old" client that triggered the pagefault(~).
// //
// XXX 8) serving read from @<rev>/data + zconn(s) for historical state // 8) serving FUSE reads from @<rev>/bigfile/file is organized similarly to
// XXX 9) gc @rev/ and @rev/bigfile/<bigfileX> automatically on atime timeout // serving reads from head/bigfile/file, but with using dedicated per-<rev>
// ZODB connection and without notifying any watches.
//
// 9) for every ZODB connection (zhead + one per @<rev>) a dedicated read-only
// transaction is maintained. For zhead, every time it is resynced (see "5")
// the transaction associated with zhead is renewed.
//
// XXX 10) gc @rev/ and @rev/bigfile/<bigfileX> automatically on atime timeout
// //
// //
// (*) see notes.txt -> "Notes on OS pagecache control" // (*) see notes.txt -> "Notes on OS pagecache control"
...@@ -404,10 +413,46 @@ package main ...@@ -404,10 +413,46 @@ package main
// (~) see notes.txt -> "Changing mmapping while under pagefault is possible" // (~) see notes.txt -> "Changing mmapping while under pagefault is possible"
// (^) see notes.txt -> "Client cannot be ptraced while under pagefault" // (^) see notes.txt -> "Client cannot be ptraced while under pagefault"
// (%) no need to keep track of ZData - ZBlk1 is always marked as changed on blk data change. // (%) no need to keep track of ZData - ZBlk1 is always marked as changed on blk data change.
// Wcfs locking organization
//
// As it was said processing ZODB invalidations (see "4") and serving file
// reads (see "7") are organized to be mutually exclusive. To do so a major RW
// lock - zheadMu - is used. Whenever ZODB invalidations are processed and
// zhead.at is updated - zheadMu.W is taken. Contrary whenever file read is
// served and in other situations - which needs zhead to remain viewing
// database at the same state - zheadMu.R is taken.
//
// Several locks that protect internal data structures are minor to zheadMu -
// they need to be taken only under zheadMu.R (to protect e.g. multiple readers
// running simultaneously to each other), but do not need to be taken at all if
// zheadMu.W is taken. In data structures such locks are noted as follows
// //
// XXX For every ZODB connection a dedicated read-only transaction is maintained. // xMu sync.Mutex // zheadMu.W | zheadMu.R + xMu
//
// If a lock is not minor to zheadMu, it is still ok to lock it under zheadMu.R
// as zheadMu, being the most major lock in wcfs, always comes locked first, if
// it needs to be locked.
//
// For watches, similarly to zhead, watch.at is protected by major-for-watch
// per-watch RW lock watch.atMu . When watch.at is updated during watch
// setup/upgrade time - watch.atMu.W is taken. Contrary whenever watch is
// notified with pin message - watch.atMu.R is taken to make sure watch.at
// stays unchanged while pins are prepared and processed.
//
// For watches, similarly to zheadMu, there are several minor-to-atMu locks
// that protect internal data structures. Such locks are noted similarly to
// zheadMu enslavement.
//
// In addition to what is written above there are other ordering rules that are
// followed consistently to avoid hitting deadlock:
//
// BigFile.watchMu > Watch.atMu
// WatchLink.byfileMu > BigFile.watchMu
// WatchLink.byfileMu > BigFileDir.fileMu
// WatchLink.byfileMu > Watch.atMu
// XXX notation // Notation used
// //
// δZ - change in ZODB space // δZ - change in ZODB space
// δB - change in BTree*s* space // δB - change in BTree*s* space
...@@ -415,25 +460,10 @@ package main ...@@ -415,25 +460,10 @@ package main
// δF - change in File*s* space // δF - change in File*s* space
// δfile - change in File(1) space // δfile - change in File(1) space
// //
// ??? needed vvv ?
// f - BigFile // f - BigFile
// wlink - WatchLink XXX // bfdir - BigFileDir
// wlink - WatchLink
// w - Watch // w - Watch
// ...
// XXX locking
//
// head.zheadMu WLock by handleδZ
// RLock by read
// ...
//
// Head: zheadMu.W | zheadMu.R + BigFileDir.fileMu
// Watch: atMu.W | atMu.R + pinnedMu
// zheadMu > Watch.atMu
//
// WatchLink.byfileMu > Watch.atMu
// WatchLink.byfileMu > BigFileDir.fileMu
// BigFile.watchMu > Watch.atMu
import ( import (
"bufio" "bufio"
...@@ -615,7 +645,7 @@ type WatchLink struct { ...@@ -615,7 +645,7 @@ type WatchLink struct {
// //
// both already established, and watches being initialized in-progress are registered here. // both already established, and watches being initialized in-progress are registered here.
// (see setupWatch) // (see setupWatch)
byfileMu sync.Mutex // zheadMu.W | zheadMu.R + byfileMu (XXX recheck) byfileMu sync.Mutex
byfile map[zodb.Oid]*Watch // {} foid -> Watch byfile map[zodb.Oid]*Watch // {} foid -> Watch
// IO // IO
...@@ -795,7 +825,7 @@ retry: ...@@ -795,7 +825,7 @@ retry:
close(continueOSCacheUpload) close(continueOSCacheUpload)
}() }()
// head.zheadMu wlocked and all cache uploaders are paused // zheadMu.W taken and all cache uploaders are paused
zhead := head.zconn zhead := head.zconn
bfdir := head.bfdir bfdir := head.bfdir
...@@ -1564,7 +1594,7 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T ...@@ -1564,7 +1594,7 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
// block which could have revision > w.at: XXX test // block which could have revision > w.at: XXX test
// //
// 1 3 2 4 // 1 3 2 4
// -----.----x---o----x---x------]---------- // ─────.────x───o────x───x──────]──────────
// ↑ ↑ // ↑ ↑
// w.at head // w.at head
// //
...@@ -2111,7 +2141,7 @@ func (head *Head) bigopen(ctx context.Context, oid zodb.Oid) (_ *BigFile, err er ...@@ -2111,7 +2141,7 @@ func (head *Head) bigopen(ctx context.Context, oid zodb.Oid) (_ *BigFile, err er
return f, nil return f, nil
} }
// Close release all resources of BigFile. // Close release all resources of BigFile. XXX needed?
func (f *BigFile) Close() error { func (f *BigFile) Close() error {
// XXX locking? // XXX locking?
f.zfile = nil f.zfile = nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment