Commit 1068aca3 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 09433847
......@@ -35,7 +35,7 @@ digraph {
wcfsRead -> blktabGet;
wcfsRead -> δFtail;
wcfsRead -> mappingRegister;
wcfsRead -> setupWatch;
wcfsRead -> headWatch;
......@@ -49,7 +49,7 @@ digraph {
// wcfs_simple -> autoexit;
client -> wcfsRead;
client -> mappingRegister;
client -> setupWatch;
client -> clientInvHandle;
// client -> δR;
......@@ -83,14 +83,14 @@ digraph {
_wcfs_zhead [label=".wcfs/\nzhead", style=filled fillcolor=lightyellow]
wcfsRead [label="read(#blk)"]
wcfsRead [label="read(#blk)", style=filled fillcolor=grey95]
blktabGet [label="blktab.Get(#blk):\nmanually + → ⌈rev(#blk)⌉", style=filled fillcolor=grey95]
δFtail [style=filled fillcolor=lightyellow]
mappingRegister [label="mmappings:\nregister/maint"]
clientInvHandle [label="process\n#blk invalidations"]
headWatch [label="#blk ← head/watch"]
setupWatch [label="watches:\nregister/maint", style=filled fillcolor=grey95]
clientInvHandle [label="process\n#blk invalidations", style=filled fillcolor=grey95]
headWatch [label="#blk ← head/watch", style=filled fillcolor=grey95]
fileSock [label="FileSock", style=filled fillcolor=lightyellow]
ZODB_go_inv [label="ZODB/go\ninvalidations", style=filled fillcolor=grey95]
......
......@@ -70,7 +70,7 @@
<!-- wcfsRead -->
<g id="node7" class="node">
<title>wcfsRead</title>
<ellipse fill="none" stroke="#000000" cx="1105.2046" cy="-299.5635" rx="47.3916" ry="18"/>
<ellipse fill="#f2f2f2" stroke="#000000" cx="1105.2046" cy="-299.5635" rx="47.3916" ry="18"/>
<text text-anchor="middle" x="1105.2046" y="-295.8635" font-family="Times,serif" font-size="14.00" fill="#000000">read(#blk)</text>
</g>
<!-- wcfs&#45;&gt;wcfsRead -->
......@@ -110,23 +110,23 @@
<path fill="none" stroke="#000000" d="M1229.9749,-376.2546C1206.068,-361.56 1166.8988,-337.4844 1138.8224,-320.227"/>
<polygon fill="#000000" stroke="#000000" points="1140.482,-317.1388 1130.1299,-314.884 1136.8164,-323.1023 1140.482,-317.1388"/>
</g>
<!-- mappingRegister -->
<!-- setupWatch -->
<g id="node20" class="node">
<title>mappingRegister</title>
<ellipse fill="none" stroke="#000000" cx="1483.2046" cy="-209.8234" rx="65.1077" ry="26.7407"/>
<text text-anchor="middle" x="1483.2046" y="-213.6234" font-family="Times,serif" font-size="14.00" fill="#000000">mmappings:</text>
<title>setupWatch</title>
<ellipse fill="#f2f2f2" stroke="#000000" cx="1483.2046" cy="-209.8234" rx="65.1077" ry="26.7407"/>
<text text-anchor="middle" x="1483.2046" y="-213.6234" font-family="Times,serif" font-size="14.00" fill="#000000">watches:</text>
<text text-anchor="middle" x="1483.2046" y="-198.6234" font-family="Times,serif" font-size="14.00" fill="#000000">register/maint</text>
</g>
<!-- client&#45;&gt;mappingRegister -->
<!-- client&#45;&gt;setupWatch -->
<g id="edge26" class="edge">
<title>client&#45;&gt;mappingRegister</title>
<title>client&#45;&gt;setupWatch</title>
<path fill="none" stroke="#000000" d="M1280.7168,-384.5439C1333.2703,-375.4553 1439.8519,-354.0516 1464.2046,-326.4335 1483.1625,-304.9336 1487.0846,-272.1581 1486.7372,-246.824"/>
<polygon fill="#000000" stroke="#000000" points="1490.2349,-246.6935 1486.3786,-236.8254 1483.2394,-246.9444 1490.2349,-246.6935"/>
</g>
<!-- clientInvHandle -->
<g id="node25" class="node">
<title>clientInvHandle</title>
<ellipse fill="none" stroke="#000000" cx="1251.2046" cy="-299.5635" rx="80.7205" ry="26.7407"/>
<ellipse fill="#f2f2f2" stroke="#000000" cx="1251.2046" cy="-299.5635" rx="80.7205" ry="26.7407"/>
<text text-anchor="middle" x="1251.2046" y="-303.3635" font-family="Times,serif" font-size="14.00" fill="#000000">process</text>
<text text-anchor="middle" x="1251.2046" y="-288.3635" font-family="Times,serif" font-size="14.00" fill="#000000">#blk invalidations</text>
</g>
......@@ -243,16 +243,16 @@
<path fill="none" stroke="#000000" d="M1105.4069,-281.4032C1105.5173,-271.5002 1105.6581,-258.8681 1105.7905,-246.9806"/>
<polygon fill="#000000" stroke="#000000" points="1109.2921,-246.8514 1105.9038,-236.813 1102.2925,-246.7734 1109.2921,-246.8514"/>
</g>
<!-- wcfsRead&#45;&gt;mappingRegister -->
<!-- wcfsRead&#45;&gt;setupWatch -->
<g id="edge20" class="edge">
<title>wcfsRead&#45;&gt;mappingRegister</title>
<title>wcfsRead&#45;&gt;setupWatch</title>
<path fill="none" stroke="#000000" d="M1132.165,-284.591C1141.1669,-280.1495 1151.4051,-275.6794 1161.2046,-272.6934 1267.746,-240.2301 1301.1865,-263.8416 1409.2046,-236.6934 1414.8942,-235.2635 1420.7561,-233.5577 1426.5727,-231.7087"/>
<polygon fill="#000000" stroke="#000000" points="1427.8927,-234.9584 1436.2802,-228.4854 1425.6867,-228.315 1427.8927,-234.9584"/>
</g>
<!-- headWatch -->
<g id="node21" class="node">
<title>headWatch</title>
<ellipse fill="none" stroke="#000000" cx="1319.2046" cy="-209.8234" rx="80.6858" ry="18"/>
<ellipse fill="#f2f2f2" stroke="#000000" cx="1319.2046" cy="-209.8234" rx="80.6858" ry="18"/>
<text text-anchor="middle" x="1319.2046" y="-206.1234" font-family="Times,serif" font-size="14.00" fill="#000000">#blk ← head/watch</text>
</g>
<!-- wcfsRead&#45;&gt;headWatch -->
......
......@@ -99,14 +99,14 @@
//
// C: 1 watch <bigfileX> @<at>
//
// The server then, after potentially sending initial pin messages (see below),
// reports either success or failure:
// The server then, after potentially sending initial pin and unpin messages
// (see below), reports either success or failure:
//
// S: 1 ok
// S: 1 error ... ; if <at> is too far away back from head/at
//
// The server sends "ok" reply only after head/at is ≥ requested <at>, and
// only after all initial pin messages are fully acknowledged by the client.
// The server sends "ok" reply only after head/at is ≥ requested <at>, and only
// after all initial pin/unpin messages are fully acknowledged by the client.
// The client can start to use mmapped data after it gets "ok".
// The server sends "error" reply if requested <at> is too far away back from
// head/at.
......@@ -127,7 +127,7 @@
//
// S: <2·k> pin <bigfileX> #<blk> @<rev_max>
// XXX @head means unpin.
// XXX or use `unpin <bigfileX> #<blk>` ?
// XXX -> use `unpin <bigfileX> #<blk>`
//
// and waits until all clients confirm that changed file block can be updated
// in global OS cache.
......@@ -137,6 +137,8 @@
// # mmapped at address corresponding to #blk
// mmap(@<rev_max>/bigfile/<bigfileX>, #blk, MAP_FIXED)
//
// XXX unpin -> mmap(head/bigfile/<bigfileX>, #blk, MAP_FIXED)
//
// and must send ack back to the server when it is done:
//
// C: <2·k> ack
......@@ -395,8 +397,15 @@ package main
// and a client that wants @rev data will get @rev data, even if it was this
// "old" client that triggered the pagefault(~).
//
// XXX 8) serving read from @<rev>/data + zconn(s) for historical state
// XXX 9) gc @rev/ and @rev/bigfile/<bigfileX> automatically on atime timeout
// 8) serving FUSE reads from @<rev>/bigfile/file is organized similarly to
// serving reads from head/bigfile/file, but with using dedicated per-<rev>
// ZODB connection and without notifying any watches.
//
// 9) for every ZODB connection (zhead + one per @<rev>) a dedicated read-only
// transaction is maintained. For zhead, every time it is resynced (see "5")
// the transaction associated with zhead is renewed.
//
// XXX 10) gc @rev/ and @rev/bigfile/<bigfileX> automatically on atime timeout
//
//
// (*) see notes.txt -> "Notes on OS pagecache control"
......@@ -404,10 +413,46 @@ package main
// (~) see notes.txt -> "Changing mmapping while under pagefault is possible"
// (^) see notes.txt -> "Client cannot be ptraced while under pagefault"
// (%) no need to keep track of ZData - ZBlk1 is always marked as changed on blk data change.
// Wcfs locking organization
//
// As it was said processing ZODB invalidations (see "4") and serving file
// reads (see "7") are organized to be mutually exclusive. To do so a major RW
// lock - zheadMu - is used. Whenever ZODB invalidations are processed and
// zhead.at is updated - zheadMu.W is taken. Contrary whenever file read is
// served and in other situations - which needs zhead to remain viewing
// database at the same state - zheadMu.R is taken.
//
// Several locks that protect internal data structures are minor to zheadMu -
// they need to be taken only under zheadMu.R (to protect e.g. multiple readers
// running simultaneously to each other), but do not need to be taken at all if
// zheadMu.W is taken. In data structures such locks are noted as follows
//
// xMu sync.Mutex // zheadMu.W | zheadMu.R + xMu
//
// If a lock is not minor to zheadMu, it is still ok to lock it under zheadMu.R
// as zheadMu, being the most major lock in wcfs, always comes locked first, if
// it needs to be locked.
//
// XXX For every ZODB connection a dedicated read-only transaction is maintained.
// For watches, similarly to zhead, watch.at is protected by major-for-watch
// per-watch RW lock watch.atMu . When watch.at is updated during watch
// setup/upgrade time - watch.atMu.W is taken. Contrary whenever watch is
// notified with pin message - watch.atMu.R is taken to make sure watch.at
// stays unchanged while pins are prepared and processed.
//
// For watches, similarly to zheadMu, there are several minor-to-atMu locks
// that protect internal data structures. Such locks are noted similarly to
// zheadMu enslavement.
//
// In addition to what is written above there are other ordering rules that are
// followed consistently to avoid hitting deadlock:
//
// BigFile.watchMu > Watch.atMu
// WatchLink.byfileMu > BigFile.watchMu
// WatchLink.byfileMu > BigFileDir.fileMu
// WatchLink.byfileMu > Watch.atMu
// XXX notation
// Notation used
//
// δZ - change in ZODB space
// δB - change in BTree*s* space
......@@ -415,25 +460,10 @@ package main
// δF - change in File*s* space
// δfile - change in File(1) space
//
// ??? needed vvv ?
// f - BigFile
// wlink - WatchLink XXX
// bfdir - BigFileDir
// wlink - WatchLink
// w - Watch
// ...
// XXX locking
//
// head.zheadMu WLock by handleδZ
// RLock by read
// ...
//
// Head: zheadMu.W | zheadMu.R + BigFileDir.fileMu
// Watch: atMu.W | atMu.R + pinnedMu
// zheadMu > Watch.atMu
//
// WatchLink.byfileMu > Watch.atMu
// WatchLink.byfileMu > BigFileDir.fileMu
// BigFile.watchMu > Watch.atMu
import (
"bufio"
......@@ -615,7 +645,7 @@ type WatchLink struct {
//
// both already established, and watches being initialized in-progress are registered here.
// (see setupWatch)
byfileMu sync.Mutex // zheadMu.W | zheadMu.R + byfileMu (XXX recheck)
byfileMu sync.Mutex
byfile map[zodb.Oid]*Watch // {} foid -> Watch
// IO
......@@ -795,7 +825,7 @@ retry:
close(continueOSCacheUpload)
}()
// head.zheadMu wlocked and all cache uploaders are paused
// zheadMu.W taken and all cache uploaders are paused
zhead := head.zconn
bfdir := head.bfdir
......@@ -1564,7 +1594,7 @@ func (wlink *WatchLink) setupWatch(ctx context.Context, foid zodb.Oid, at zodb.T
// block which could have revision > w.at: XXX test
//
// 1 3 2 4
// -----.----x---o----x---x------]----------
// ─────.────x───o────x───x──────]──────────
// ↑ ↑
// w.at head
//
......@@ -2111,7 +2141,7 @@ func (head *Head) bigopen(ctx context.Context, oid zodb.Oid) (_ *BigFile, err er
return f, nil
}
// Close release all resources of BigFile.
// Close release all resources of BigFile. XXX needed?
func (f *BigFile) Close() error {
// XXX locking?
f.zfile = nil
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment