Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Joshua
wendelin.core
Commits
d34c4610
Commit
d34c4610
authored
Nov 01, 2019
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
09310101
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
575 additions
and
516 deletions
+575
-516
setup.py
setup.py
+1
-0
wcfs/internal/wcfs_misc.h
wcfs/internal/wcfs_misc.h
+64
-1
wcfs/internal/wcfs_virtmem.cpp
wcfs/internal/wcfs_virtmem.cpp
+16
-515
wcfs/internal/wcfs_watchlink.cpp
wcfs/internal/wcfs_watchlink.cpp
+386
-0
wcfs/internal/wcfs_watchlink.h
wcfs/internal/wcfs_watchlink.h
+108
-0
No files found.
setup.py
View file @
d34c4610
...
...
@@ -235,6 +235,7 @@ setup(
PyGoExt
(
'wcfs.internal._wcfs'
,
[
'wcfs/internal/_wcfs.pyx'
,
'wcfs/internal/wcfs_virtmem.cpp'
,
'wcfs/internal/wcfs_watchlink.cpp'
,
'wcfs/internal/wcfs_misc.cpp'
,
],
include_dirs
=
[
# XXX -> common place
...
...
wcfs/internal/wcfs_misc.h
View file @
d34c4610
...
...
@@ -23,6 +23,7 @@
#define _NXD_WCFS_MISC_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
using
std
::
string
;
...
...
@@ -152,6 +153,59 @@ vector<string> split(const string &s, char sep);
}
// strings::
// XXX ok?
struct
IContext
{
virtual
chan
<
structZ
>
done
()
=
0
;
virtual
error
err
()
=
0
;
};
// context::
namespace
context
{
struct
_Background
:
IContext
{
chan
<
structZ
>
done
()
{
return
nil
;
}
error
err
()
{
return
nil
;
}
};
static
_Background
_bg
;
// XXX doc
IContext
*
background
()
{
return
&
_bg
;
// NOTE nil is not valid in C++ (IContext* also carries vtab ptr)
}
// XXX doc
const
error
canceled
=
fmt
::
errorf
(
"context canceled"
);
// XXX deadline exceeded?
}
// context::
#if 0
interface(Context) {
ifunc(chan<structZ> done());
ifunc(error err());
};
I<io::Reader>(f)
// XXX wrap T* as IContext
template<typename T>
class Context : public IContext {
T *obj;
public:
Context(T *obj) : obj(obj) {}
chan<structZ> done() { return obj->done(); }
error err() { return obj->err(); }
};
#endif
// ---- misc ----
#include <unordered_map>
...
...
@@ -205,6 +259,15 @@ tuple<uint64_t, error> parseHex64(const string& s);
tuple
<
int64_t
,
error
>
parseInt
(
const
string
&
s
);
tuple
<
uint64_t
,
error
>
parseUint
(
const
string
&
s
);
}
// xstrconv
}
// xstrconv::
// zodb::
namespace
zodb
{
typedef
uint64_t
Tid
;
typedef
uint64_t
Oid
;
}
// zodb::
#endif
wcfs/internal/wcfs_virtmem.cpp
View file @
d34c4610
...
...
@@ -42,78 +42,23 @@ using namespace golang;
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
#include "wcfs_watchlink.h"
#include "wcfs_misc.h"
using
std
::
min
;
using
std
::
vector
;
typedef
uint64_t
Tid
;
typedef
uint64_t
Oid
;
// TidHead is invalid Tid which is largest Tid value and means @head.
const
Tid
TidHead
=
-
1ULL
;
const
zodb
::
Tid
TidHead
=
-
1ULL
;
static
string
h
(
uint64_t
v
);
// v -> 016x hex representation
#define h_(v) (h(v).c_str())
static
error
mmap_zero_into_ro
(
void
*
addr
,
size_t
size
);
static
error
mmap_into_ro
(
void
*
addr
,
size_t
size
,
const
os
::
File
&
f
,
off_t
offset
);
// XXX ok?
struct
IContext
{
virtual
chan
<
structZ
>
done
()
=
0
;
virtual
error
err
()
=
0
;
};
// context::
namespace
context
{
struct
_Background
:
IContext
{
chan
<
structZ
>
done
()
{
return
nil
;
}
error
err
()
{
return
nil
;
}
};
static
_Background
_bg
;
// XXX doc
IContext
*
background
()
{
return
&
_bg
;
// NOTE nil is not valid in C++ (IContext* also carries vtab ptr)
}
// XXX doc
const
error
canceled
=
fmt
::
errorf
(
"context canceled"
);
// XXX deadline exceeded?
}
// context::
#if 0
interface(Context) {
ifunc(chan<structZ> done());
ifunc(error err());
};
I<io::Reader>(f)
// XXX wrap T* as IContext
template<typename T>
class Context : public IContext {
T *obj;
public:
Context(T *obj) : obj(obj) {}
chan<structZ> done() { return obj->done(); }
error err() { return obj->err(); }
};
#endif
struct
Conn
;
struct
_File
;
struct
_Mapping
;
...
...
@@ -125,7 +70,7 @@ struct PinReq;
struct
WCFS
{
string
mountpoint
;
tuple
<
Conn
*
,
error
>
connect
(
Tid
at
);
tuple
<
Conn
*
,
error
>
connect
(
zodb
::
Tid
at
);
string
_path
(
const
string
&
obj
);
tuple
<
os
::
File
,
error
>
_open
(
const
string
&
path
,
int
flags
=
O_RDONLY
);
tuple
<
WatchLink
*
,
error
>
_openwatch
();
...
...
@@ -137,15 +82,15 @@ struct WCFS {
// XXX doc
struct
Conn
{
WCFS
*
_wc
;
Tid
at
;
zodb
::
Tid
at
;
WatchLink
*
_wlink
;
sync
::
Mutex
_filemu
;
dict
<
Oid
,
_File
*>
_filetab
;
// {} foid -> _file
sync
::
Mutex
_filemu
;
dict
<
zodb
::
Oid
,
_File
*>
_filetab
;
// {} foid -> _file
public:
error
close
();
error
resync
(
Tid
at
);
error
resync
(
zodb
::
Tid
at
);
private:
void
_pinner
(
IContext
*
ctx
);
...
...
@@ -157,13 +102,13 @@ private:
// XXX doc XXX naming -> _FileView ?
struct
_File
{
Conn
*
wconn
;
Oid
foid
;
// hex of ZBigFile root object ID
zodb
::
Oid
foid
;
// hex of ZBigFile root object ID
size_t
blksize
;
// block size of this file XXX -> off_t ?
os
::
File
headf
;
// file object of head/file
off_t
headfsize
;
// head/file size is known to be at least headfsize (size ↑=)
dict
<
int64_t
,
Tid
>
pinned
;
// {} blk -> rev that wcfs already sent us for this file
vector
<
_Mapping
*>
mmaps
;
// []_Mapping ↑blk_start mappings of this file
dict
<
int64_t
,
zodb
::
Tid
>
pinned
;
// {} blk -> rev that wcfs already sent us for this file
vector
<
_Mapping
*>
mmaps
;
// []_Mapping ↑blk_start mappings of this file
};
// _Mapping represents one mapping of _File.
...
...
@@ -181,88 +126,12 @@ struct _Mapping {
return
blk_start
+
(
mem_stop
-
mem_start
)
/
file
->
blksize
;
}
error
_remmapblk
(
int64_t
blk
,
Tid
at
);
};
// StreamID stands for ID of a stream multiplexed over WatchLink.
typedef
uint64_t
StreamID
;
// rxPkt internally represents data of one message received over WatchLink.
struct
rxPkt
{
// stream over which the data was received
StreamID
stream
;
// raw data received/to-be-sent.
// XXX not e.g. string as chan<T> currently does not support types with
// non-trivial copy. Note: we anyway need to limit rx line length to
// avoid DoS, but just for DoS the limit would be higher.
uint16_t
datalen
;
char
data
[
128
-
sizeof
(
StreamID
)
-
sizeof
(
uint16_t
)];
error
from_string
(
const
string
&
rx
);
string
to_string
()
const
;
};
static_assert
(
sizeof
(
rxPkt
)
==
128
);
// WatchLink represents /head/watch link opened on wcfs.
//
// It is created by WCFS::_openwatch().
//
// .sendReq()/.recvReq() provides raw IO in terms of wcfs invalidation protocol messages.
// .close() closes the link.
//
// It is safe to use WatchLink from multiple threads simultaneously.
class
WatchLink
{
WCFS
*
_wc
;
os
::
File
_f
;
// head/watch file handle
string
_rxbuf
;
// buffer for read data from _f
chan
<
structZ
>
_rx_eof
;
// becomes ready when wcfs closes its tx side
// inv.protocol message IO
chan
<
rxPkt
>
_acceptq
;
// server originated messages go here
sync
::
Mutex
_rxmu
;
bool
_rxdown
;
dict
<
StreamID
,
chan
<
rxPkt
>>
_rxtab
;
// {} stream -> rxq server replies go via here
set
<
StreamID
>
_accepted
;
// streams we accepted but did not replied yet
StreamID
_req_next
;
// stream ID for next client-originated request XXX -> atomic
sync
::
Mutex
_txmu
;
// serializes writes
sync
::
Once
_txclose1
;
#if 0
func() _serveCancel
sync.WorkGroup *_serveWG
#endif
public:
friend
tuple
<
WatchLink
*
,
error
>
WCFS
::
_openwatch
();
error
close
();
error
recvReq
(
IContext
*
ctx
,
PinReq
*
rx_into
);
tuple
<
string
,
error
>
sendReq
(
IContext
*
ctx
,
const
string
&
req
);
private:
void
_closeTX
();
error
_serveRX
(
IContext
*
ctx
);
tuple
<
string
,
error
>
_readline
();
error
_send
(
StreamID
stream
,
const
string
&
msg
);
error
_write
(
const
string
&
pkt
);
tuple
<
chan
<
rxPkt
>
,
error
>
_sendReq
(
IContext
*
ctx
,
const
string
&
req
);
};
// PinReq represents 1 server-initiated wcfs pin request received over /head/watch link.
struct
PinReq
{
StreamID
stream
;
// request was received with this stream ID
Oid
foid
;
// request is about this file
int64_t
blk
;
// ----//---- about this block
Tid
at
;
// pin to this at; TidHead means unpin to head
error
_remmapblk
(
int64_t
blk
,
zodb
::
Tid
at
);
};
// connect creates new Conn viewing WCFS state as of @at.
tuple
<
Conn
*
,
error
>
WCFS
::
connect
(
Tid
at
)
{
tuple
<
Conn
*
,
error
>
WCFS
::
connect
(
zodb
::
Tid
at
)
{
WCFS
*
wc
=
this
;
// XXX err ctx
...
...
@@ -398,15 +267,15 @@ void Conn::_pin1(PinReq *req) {
// XXX Conn::mmap
// resync resyncs connection and its mappings onto different database view.
error
Conn
::
resync
(
Tid
at
)
{
error
Conn
::
resync
(
zodb
::
Tid
at
)
{
Conn
&
wconn
=
*
this
;
// XXX err ctx
// XXX locking
for
(
auto
fit
:
wconn
.
_filetab
)
{
Oid
foid
=
fit
.
first
;
_File
&
f
=
*
fit
.
second
;
zodb
::
Oid
foid
=
fit
.
first
;
_File
&
f
=
*
fit
.
second
;
// XXX if file has no mappings and was not used during whole prev
// cycle - forget and stop watching it
...
...
@@ -461,7 +330,7 @@ error Conn::resync(Tid at) {
//
// at=TidHead means unpin to head/ .
// NOTE this does not check whether virtmem already mapped blk as RW.
error
_Mapping
::
_remmapblk
(
int64_t
blk
,
Tid
at
)
{
error
_Mapping
::
_remmapblk
(
int64_t
blk
,
zodb
::
Tid
at
)
{
// XXX err context? blk #<blk> @<at>
_Mapping
*
mmap
=
this
;
...
...
@@ -518,374 +387,6 @@ error _Mapping::_remmapblk(int64_t blk, Tid at) {
// XXX _Mapping::unmap
// ---- WatchLink ----
// _openwatch opens new watch link on wcfs.
tuple
<
WatchLink
*
,
error
>
WCFS
::
_openwatch
()
{
WCFS
*
wc
=
this
;
// head/watch handle.
os
::
File
f
;
error
err
;
tie
(
f
,
err
)
=
wc
->
_open
(
"head/watch"
,
O_RDWR
);
if
(
err
!=
nil
)
return
make_tuple
((
WatchLink
*
)
NULL
,
err
);
WatchLink
*
wlink
=
new
(
WatchLink
);
wlink
->
_wc
=
wc
;
wlink
->
_f
=
f
;
wlink
->
_rx_eof
=
makechan
<
structZ
>
();
wlink
->
_acceptq
=
makechan
<
rxPkt
>
();
wlink
->
_rxdown
=
false
;
wlink
->
_req_next
=
1
;
#if 0
serveCtx, wlink._serveCancel = context.with_cancel(context.background())
wlink->_serveWG = sync.WorkGroup(serveCtx)
wlink->_serveWG.go(wlink._serveRX)
#endif
return
make_tuple
(
wlink
,
nil
);
}
void
WatchLink
::
_closeTX
()
{
WatchLink
&
wlink
=
*
this
;
wlink
.
_txclose1
.
do_
([
&
]()
{
// ask wcfs to close its tx & rx sides; close(wcfs.tx) wakes up
// _serveRX on client (= on us). The connection can be already closed
// by wcfs - so ignore errors when sending bye.
(
void
)
wlink
.
_send
(
1
,
"bye"
);
// XXX stream ok?
// XXX vvv should be ~ shutdown(TX, wlink._f), however shutdown does
// not work for non-socket file descriptors. And even if we dup link
// fd, and close only one used for TX, peer's RX will still be blocked
// ad fds are referring to one file object which stays in opened
// state. So just use ^^^ "bye" as "TX closed" message.
// wlink._wtx.close();
});
}
// close closes the link.
error
WatchLink
::
close
()
{
WatchLink
&
wlink
=
*
this
;
wlink
.
_closeTX
();
#if 0
wlink._serveCancel();
// XXX we can get stuck here if wcfs does not behave as we want.
// XXX in particular if there is a silly - e.g. syntax or type error in
// test code - we currently get stuck here.
//
// XXX -> better pthread_kill(SIGINT) instead of relying on wcfs proper behaviour?
// XXX -> we now have `kill -QUIT` to wcfs.go on test timeout - remove ^^^ comments?
error err = wlink._serveWG.wait();
// canceled is expected and ok
if (err == context.canceled)
err = nil;
#else
error
err
=
nil
;
#endif
error
err2
=
wlink
.
_f
.
close
();
if
(
err
==
nil
)
err
=
err2
;
return
err
;
}
// _serveRX receives messages from ._f and dispatches them according to streamID.
error
WatchLink
::
_serveRX
(
IContext
*
ctx
)
{
// XXX error -> where ?
WatchLink
&
wlink
=
*
this
;
// when finishing - wakeup everyone waiting for rx
defer
([
&
]()
{
wlink
.
_acceptq
.
close
();
wlink
.
_rxmu
.
lock
();
wlink
.
_rxdown
=
true
;
// don't allow new rxtab registers
wlink
.
_rxmu
.
unlock
();
for
(
auto
_
:
wlink
.
_rxtab
)
{
auto
rxq
=
_
.
second
;
rxq
.
close
();
}
});
string
l
;
error
err
;
rxPkt
pkt
;
while
(
1
)
{
// NOTE: .close() makes sure .f.read*() will wake up
tie
(
l
,
err
)
=
wlink
.
_readline
();
// XXX +maxlen
if
(
err
==
io
::
EOF_
)
{
// peer closed its tx
// XXX what happens on other errors?
wlink
.
_rx_eof
.
close
();
}
if
(
err
!=
nil
)
return
err
;
printf
(
"C: watch : rx: %s"
,
l
.
c_str
());
err
=
pkt
.
from_string
(
l
);
if
(
err
!=
nil
)
return
err
;
if
(
pkt
.
stream
==
0
)
{
// control/fatal message from wcfs
// XXX print -> receive somewhere? XXX -> recvCtl ?
printf
(
"C: watch : rx fatal: %s
\n
"
,
l
.
c_str
());
//wlink.fatalv.append(msg);
continue
;
}
bool
reply
=
(
pkt
.
stream
%
2
!=
0
);
if
(
reply
)
{
chan
<
rxPkt
>
rxq
;
bool
ok
;
wlink
.
_rxmu
.
lock
();
tie
(
rxq
,
ok
)
=
wlink
.
_rxtab
.
pop
(
pkt
.
stream
);
wlink
.
_rxmu
.
unlock
();
if
(
!
ok
)
{
// wcfs sent reply on unexpected stream
// XXX log + dowmn.
printf
(
"wcfs sent reply on unexpected stream
\n
"
);
continue
;
}
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
rxq
.
sends
(
&
pkt
),
// 1
});
if
(
_
==
0
)
return
ctx
->
err
();
}
else
{
wlink
.
_rxmu
.
lock
();
if
(
wlink
.
_accepted
.
has
(
pkt
.
stream
))
{
wlink
.
_rxmu
.
unlock
();
// XXX log + down
printf
(
"wcfs sent request on already used stream
\n
"
);
continue
;
}
// XXX clear _accepted not to leak memory after reply is sent?
wlink
.
_accepted
.
insert
(
pkt
.
stream
);
wlink
.
_rxmu
.
unlock
();
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
wlink
.
_acceptq
.
sends
(
&
pkt
),
// 1
});
if
(
_
==
0
)
return
ctx
->
err
();
}
}
}
// _send sends raw message via specified stream.
//
// multiple _send can be called in parallel - _send serializes writes.
// XXX +ctx?
error
WatchLink
::
_send
(
StreamID
stream
,
const
string
&
msg
)
{
WatchLink
*
wlink
=
this
;
if
(
msg
.
find
(
'\n'
)
!=
string
::
npos
)
panic
(
"msg has
\\
n"
);
string
pkt
=
fmt
::
sprintf
(
"%lu %s
\n
"
,
stream
,
msg
.
c_str
());
return
wlink
->
_write
(
pkt
);
}
error
WatchLink
::
_write
(
const
string
&
pkt
)
{
WatchLink
*
wlink
=
this
;
wlink
->
_txmu
.
lock
();
defer
([
&
]()
{
wlink
->
_txmu
.
unlock
();
});
//printf('C: watch : tx: %r' % pkt)
int
n
;
error
err
;
tie
(
n
,
err
)
=
wlink
->
_f
.
write
(
pkt
.
c_str
(),
pkt
.
size
());
return
err
;
}
// sendReq sends client -> server request and returns server reply.
// XXX -> reply | None when EOF
tuple
<
string
,
error
>
WatchLink
::
sendReq
(
IContext
*
ctx
,
const
string
&
req
)
{
WatchLink
*
wlink
=
this
;
// XXX err ctx
rxPkt
rx
;
chan
<
rxPkt
>
rxq
;
error
err
;
tie
(
rxq
,
err
)
=
wlink
->
_sendReq
(
ctx
,
req
);
// XXX err
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
rxq
.
recvs
(
&
rx
),
// 1
});
if
(
_
==
0
)
return
make_tuple
(
""
,
ctx
->
err
());
// XXX check for EOF
string
reply
=
rx
.
to_string
();
return
make_tuple
(
reply
,
nil
);
}
tuple
<
/*rxq*/
chan
<
rxPkt
>
,
error
>
WatchLink
::
_sendReq
(
IContext
*
ctx
,
const
string
&
req
)
{
WatchLink
*
wlink
=
this
;
// XXX err ctx?
wlink
->
_txmu
.
lock
();
// XXX -> atomic (currently uses arbitrary lock)
StreamID
stream
=
wlink
->
_req_next
;
wlink
->
_req_next
=
(
wlink
->
_req_next
+
2
);
// wraparound at uint64 max
wlink
->
_txmu
.
unlock
();
auto
rxq
=
makechan
<
rxPkt
>
(
1
);
wlink
->
_rxmu
.
lock
();
if
(
wlink
->
_rxdown
)
{
wlink
->
_rxmu
.
unlock
();
return
make_tuple
(
nil
,
fmt
::
errorf
(
"link is down"
));
}
if
(
wlink
->
_rxtab
.
has
(
stream
))
{
wlink
->
_rxmu
.
unlock
();
panic
(
"BUG: to-be-sent stream is present in rxtab"
);
}
wlink
->
_rxtab
[
stream
]
=
rxq
;
wlink
->
_rxmu
.
unlock
();
error
err
=
wlink
->
_send
(
stream
,
req
);
if
(
err
!=
nil
)
{
// remove rxq from rxtab
wlink
->
_rxmu
.
lock
();
wlink
->
_rxtab
.
erase
(
stream
);
wlink
->
_rxmu
.
unlock
();
// no need to drain rxq - it was created with cap=1
rxq
=
nil
;
}
return
make_tuple
(
rxq
,
err
);
}
// recvReq receives client <- server request.
static
error
_parsePinReq
(
PinReq
*
pin
,
const
rxPkt
*
pkt
);
error
WatchLink
::
recvReq
(
IContext
*
ctx
,
PinReq
*
prx
)
{
WatchLink
&
wlink
=
*
this
;
// XXX err ctx?
rxPkt
pkt
;
bool
ok
;
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
wlink
.
_acceptq
.
recvs
(
&
pkt
,
&
ok
),
// 1
});
if
(
_
==
0
)
return
ctx
->
err
();
if
(
!
ok
)
return
io
::
EOF_
;
pkt
.
to_string
();
return
_parsePinReq
(
prx
,
&
pkt
);
}
// _parsePinReq parses message into PinReq according to wcfs invalidation protocol.
static
error
_parsePinReq
(
PinReq
*
pin
,
const
rxPkt
*
pkt
)
{
// XXX err ctx "bad pin"
pin
->
stream
=
pkt
->
stream
;
auto
msg
=
pkt
->
to_string
();
// pin <foid>) #<blk> @<at>
if
(
!
strings
::
has_prefix
(
msg
,
"pin "
))
return
fmt
::
errorf
(
"not a pin request"
);
// XXX +msg?
auto
argv
=
strings
::
split
(
msg
.
substr
(
4
),
' '
);
if
(
argv
.
size
()
!=
3
)
return
fmt
::
errorf
(
"expected 3 arguments, got %zd"
,
argv
.
size
());
error
err
;
tie
(
pin
->
foid
,
err
)
=
xstrconv
::
parseHex64
(
argv
[
0
]);
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid foid"
);
if
(
!
strings
::
has_prefix
(
argv
[
1
],
'#'
))
return
fmt
::
errorf
(
"invalid blk"
);
tie
(
pin
->
blk
,
err
)
=
xstrconv
::
parseInt
(
argv
[
1
].
substr
(
1
));
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid blk"
);
if
(
!
strings
::
has_prefix
(
argv
[
2
],
'@'
))
return
fmt
::
errorf
(
"invalid at"
);
auto
at
=
argv
[
2
].
substr
(
1
);
if
(
at
==
"head"
)
{
pin
->
at
=
TidHead
;
}
else
{
tie
(
pin
->
at
,
err
)
=
xstrconv
::
parseHex64
(
at
);
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid at"
);
}
return
nil
;
}
// _readline reads next raw line sent from wcfs.
tuple
<
string
,
error
>
WatchLink
::
_readline
()
{
WatchLink
&
wlink
=
*
this
;
char
buf
[
128
];
size_t
nl_searchfrom
=
0
;
while
(
1
)
{
auto
nl
=
wlink
.
_rxbuf
.
find
(
'\n'
,
nl_searchfrom
);
if
(
nl
!=
string
::
npos
)
{
auto
line
=
wlink
.
_rxbuf
.
substr
(
0
,
nl
+
1
);
wlink
.
_rxbuf
=
wlink
.
_rxbuf
.
substr
(
nl
+
1
);
return
make_tuple
(
line
,
nil
);
}
nl_searchfrom
=
wlink
.
_rxbuf
.
length
();
int
n
;
error
err
;
tie
(
n
,
err
)
=
wlink
.
_f
.
read
(
buf
,
sizeof
(
buf
));
if
(
n
>
0
)
{
// XXX limit line length to avoid DoS
wlink
.
_rxbuf
+=
string
(
buf
,
n
);
continue
;
}
if
(
err
==
nil
)
panic
(
"read returned (0, nil)"
);
if
(
err
==
io
::
EOF_
&&
wlink
.
_rxbuf
.
length
()
!=
0
)
err
=
io
::
ErrUnexpectedEOF
;
return
make_tuple
(
""
,
err
);
}
}
// from_string parses string into rxPkt.
error
rxPkt
::
from_string
(
const
string
&
rx
)
{
rxPkt
&
pkt
=
*
this
;
// <stream> ... \n
auto
sp
=
rx
.
find
(
' '
);
if
(
sp
==
string
::
npos
)
return
fmt
::
errorf
(
"invalid pkt: no SP"
);
string
sid
=
rx
.
substr
(
0
,
sp
);
string
smsg
=
rx
.
substr
(
sp
+
1
);
error
err
;
tie
(
pkt
.
stream
,
err
)
=
xstrconv
::
parseUint
(
sid
);
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid pkt: invalid stream ID"
);
auto
msglen
=
smsg
.
length
();
if
(
msglen
>
ARRAY_SIZE
(
pkt
.
data
))
return
fmt
::
errorf
(
"invalid pkt: len(msg) > %zu"
,
ARRAY_SIZE
(
pkt
.
data
));
memcpy
(
pkt
.
data
,
smsg
.
c_str
(),
msglen
);
pkt
.
datalen
=
msglen
;
return
nil
;
}
// to_string converts rxPkt data into string.
string
rxPkt
::
to_string
()
const
{
const
rxPkt
&
pkt
=
*
this
;
return
string
(
pkt
.
data
,
pkt
.
datalen
);
}
// ---- WCFS raw file access ----
// _path returns path for object on wcfs.
...
...
wcfs/internal/wcfs_watchlink.cpp
0 → 100644
View file @
d34c4610
// Copyright (C) 2018-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
#include "wcfs_watchlink.h"
// _openwatch opens new watch link on wcfs.
tuple
<
WatchLink
*
,
error
>
WCFS
::
_openwatch
()
{
WCFS
*
wc
=
this
;
// head/watch handle.
os
::
File
f
;
error
err
;
tie
(
f
,
err
)
=
wc
->
_open
(
"head/watch"
,
O_RDWR
);
if
(
err
!=
nil
)
return
make_tuple
((
WatchLink
*
)
NULL
,
err
);
WatchLink
*
wlink
=
new
(
WatchLink
);
wlink
->
_wc
=
wc
;
wlink
->
_f
=
f
;
wlink
->
_rx_eof
=
makechan
<
structZ
>
();
wlink
->
_acceptq
=
makechan
<
rxPkt
>
();
wlink
->
_rxdown
=
false
;
wlink
->
_req_next
=
1
;
#if 0
serveCtx, wlink._serveCancel = context.with_cancel(context.background())
wlink->_serveWG = sync.WorkGroup(serveCtx)
wlink->_serveWG.go(wlink._serveRX)
#endif
return
make_tuple
(
wlink
,
nil
);
}
void
WatchLink
::
_closeTX
()
{
WatchLink
&
wlink
=
*
this
;
wlink
.
_txclose1
.
do_
([
&
]()
{
// ask wcfs to close its tx & rx sides; close(wcfs.tx) wakes up
// _serveRX on client (= on us). The connection can be already closed
// by wcfs - so ignore errors when sending bye.
(
void
)
wlink
.
_send
(
1
,
"bye"
);
// XXX stream ok?
// XXX vvv should be ~ shutdown(TX, wlink._f), however shutdown does
// not work for non-socket file descriptors. And even if we dup link
// fd, and close only one used for TX, peer's RX will still be blocked
// ad fds are referring to one file object which stays in opened
// state. So just use ^^^ "bye" as "TX closed" message.
// wlink._wtx.close();
});
}
// close closes the link.
error
WatchLink
::
close
()
{
WatchLink
&
wlink
=
*
this
;
wlink
.
_closeTX
();
#if 0
wlink._serveCancel();
// XXX we can get stuck here if wcfs does not behave as we want.
// XXX in particular if there is a silly - e.g. syntax or type error in
// test code - we currently get stuck here.
//
// XXX -> better pthread_kill(SIGINT) instead of relying on wcfs proper behaviour?
// XXX -> we now have `kill -QUIT` to wcfs.go on test timeout - remove ^^^ comments?
error err = wlink._serveWG.wait();
// canceled is expected and ok
if (err == context.canceled)
err = nil;
#else
error
err
=
nil
;
#endif
error
err2
=
wlink
.
_f
.
close
();
if
(
err
==
nil
)
err
=
err2
;
return
err
;
}
// _serveRX receives messages from ._f and dispatches them according to streamID.
error
WatchLink
::
_serveRX
(
IContext
*
ctx
)
{
// XXX error -> where ?
WatchLink
&
wlink
=
*
this
;
// when finishing - wakeup everyone waiting for rx
defer
([
&
]()
{
wlink
.
_acceptq
.
close
();
wlink
.
_rxmu
.
lock
();
wlink
.
_rxdown
=
true
;
// don't allow new rxtab registers
wlink
.
_rxmu
.
unlock
();
for
(
auto
_
:
wlink
.
_rxtab
)
{
auto
rxq
=
_
.
second
;
rxq
.
close
();
}
});
string
l
;
error
err
;
rxPkt
pkt
;
while
(
1
)
{
// NOTE: .close() makes sure .f.read*() will wake up
tie
(
l
,
err
)
=
wlink
.
_readline
();
// XXX +maxlen
if
(
err
==
io
::
EOF_
)
{
// peer closed its tx
// XXX what happens on other errors?
wlink
.
_rx_eof
.
close
();
}
if
(
err
!=
nil
)
return
err
;
printf
(
"C: watch : rx: %s"
,
l
.
c_str
());
err
=
pkt
.
from_string
(
l
);
if
(
err
!=
nil
)
return
err
;
if
(
pkt
.
stream
==
0
)
{
// control/fatal message from wcfs
// XXX print -> receive somewhere? XXX -> recvCtl ?
printf
(
"C: watch : rx fatal: %s
\n
"
,
l
.
c_str
());
//wlink.fatalv.append(msg);
continue
;
}
bool
reply
=
(
pkt
.
stream
%
2
!=
0
);
if
(
reply
)
{
chan
<
rxPkt
>
rxq
;
bool
ok
;
wlink
.
_rxmu
.
lock
();
tie
(
rxq
,
ok
)
=
wlink
.
_rxtab
.
pop
(
pkt
.
stream
);
wlink
.
_rxmu
.
unlock
();
if
(
!
ok
)
{
// wcfs sent reply on unexpected stream
// XXX log + dowmn.
printf
(
"wcfs sent reply on unexpected stream
\n
"
);
continue
;
}
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
rxq
.
sends
(
&
pkt
),
// 1
});
if
(
_
==
0
)
return
ctx
->
err
();
}
else
{
wlink
.
_rxmu
.
lock
();
if
(
wlink
.
_accepted
.
has
(
pkt
.
stream
))
{
wlink
.
_rxmu
.
unlock
();
// XXX log + down
printf
(
"wcfs sent request on already used stream
\n
"
);
continue
;
}
// XXX clear _accepted not to leak memory after reply is sent?
wlink
.
_accepted
.
insert
(
pkt
.
stream
);
wlink
.
_rxmu
.
unlock
();
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
wlink
.
_acceptq
.
sends
(
&
pkt
),
// 1
});
if
(
_
==
0
)
return
ctx
->
err
();
}
}
}
// _send sends raw message via specified stream.
//
// multiple _send can be called in parallel - _send serializes writes.
// XXX +ctx?
error
WatchLink
::
_send
(
StreamID
stream
,
const
string
&
msg
)
{
WatchLink
*
wlink
=
this
;
if
(
msg
.
find
(
'\n'
)
!=
string
::
npos
)
panic
(
"msg has
\\
n"
);
string
pkt
=
fmt
::
sprintf
(
"%lu %s
\n
"
,
stream
,
msg
.
c_str
());
return
wlink
->
_write
(
pkt
);
}
error
WatchLink
::
_write
(
const
string
&
pkt
)
{
WatchLink
*
wlink
=
this
;
wlink
->
_txmu
.
lock
();
defer
([
&
]()
{
wlink
->
_txmu
.
unlock
();
});
//printf('C: watch : tx: %r' % pkt)
int
n
;
error
err
;
tie
(
n
,
err
)
=
wlink
->
_f
.
write
(
pkt
.
c_str
(),
pkt
.
size
());
return
err
;
}
// sendReq sends client -> server request and returns server reply.
// XXX -> reply | None when EOF
tuple
<
string
,
error
>
WatchLink
::
sendReq
(
IContext
*
ctx
,
const
string
&
req
)
{
WatchLink
*
wlink
=
this
;
// XXX err ctx
rxPkt
rx
;
chan
<
rxPkt
>
rxq
;
error
err
;
tie
(
rxq
,
err
)
=
wlink
->
_sendReq
(
ctx
,
req
);
// XXX err
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
rxq
.
recvs
(
&
rx
),
// 1
});
if
(
_
==
0
)
return
make_tuple
(
""
,
ctx
->
err
());
// XXX check for EOF
string
reply
=
rx
.
to_string
();
return
make_tuple
(
reply
,
nil
);
}
tuple
<
/*rxq*/
chan
<
rxPkt
>
,
error
>
WatchLink
::
_sendReq
(
IContext
*
ctx
,
const
string
&
req
)
{
WatchLink
*
wlink
=
this
;
// XXX err ctx?
wlink
->
_txmu
.
lock
();
// XXX -> atomic (currently uses arbitrary lock)
StreamID
stream
=
wlink
->
_req_next
;
wlink
->
_req_next
=
(
wlink
->
_req_next
+
2
);
// wraparound at uint64 max
wlink
->
_txmu
.
unlock
();
auto
rxq
=
makechan
<
rxPkt
>
(
1
);
wlink
->
_rxmu
.
lock
();
if
(
wlink
->
_rxdown
)
{
wlink
->
_rxmu
.
unlock
();
return
make_tuple
(
nil
,
fmt
::
errorf
(
"link is down"
));
}
if
(
wlink
->
_rxtab
.
has
(
stream
))
{
wlink
->
_rxmu
.
unlock
();
panic
(
"BUG: to-be-sent stream is present in rxtab"
);
}
wlink
->
_rxtab
[
stream
]
=
rxq
;
wlink
->
_rxmu
.
unlock
();
error
err
=
wlink
->
_send
(
stream
,
req
);
if
(
err
!=
nil
)
{
// remove rxq from rxtab
wlink
->
_rxmu
.
lock
();
wlink
->
_rxtab
.
erase
(
stream
);
wlink
->
_rxmu
.
unlock
();
// no need to drain rxq - it was created with cap=1
rxq
=
nil
;
}
return
make_tuple
(
rxq
,
err
);
}
// recvReq receives client <- server request.
static
error
_parsePinReq
(
PinReq
*
pin
,
const
rxPkt
*
pkt
);
error
WatchLink
::
recvReq
(
IContext
*
ctx
,
PinReq
*
prx
)
{
WatchLink
&
wlink
=
*
this
;
// XXX err ctx?
rxPkt
pkt
;
bool
ok
;
int
_
=
select
({
ctx
->
done
().
recvs
(),
// 0
wlink
.
_acceptq
.
recvs
(
&
pkt
,
&
ok
),
// 1
});
if
(
_
==
0
)
return
ctx
->
err
();
if
(
!
ok
)
return
io
::
EOF_
;
pkt
.
to_string
();
return
_parsePinReq
(
prx
,
&
pkt
);
}
// _parsePinReq parses message into PinReq according to wcfs invalidation protocol.
static
error
_parsePinReq
(
PinReq
*
pin
,
const
rxPkt
*
pkt
)
{
// XXX err ctx "bad pin"
pin
->
stream
=
pkt
->
stream
;
auto
msg
=
pkt
->
to_string
();
// pin <foid>) #<blk> @<at>
if
(
!
strings
::
has_prefix
(
msg
,
"pin "
))
return
fmt
::
errorf
(
"not a pin request"
);
// XXX +msg?
auto
argv
=
strings
::
split
(
msg
.
substr
(
4
),
' '
);
if
(
argv
.
size
()
!=
3
)
return
fmt
::
errorf
(
"expected 3 arguments, got %zd"
,
argv
.
size
());
error
err
;
tie
(
pin
->
foid
,
err
)
=
xstrconv
::
parseHex64
(
argv
[
0
]);
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid foid"
);
if
(
!
strings
::
has_prefix
(
argv
[
1
],
'#'
))
return
fmt
::
errorf
(
"invalid blk"
);
tie
(
pin
->
blk
,
err
)
=
xstrconv
::
parseInt
(
argv
[
1
].
substr
(
1
));
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid blk"
);
if
(
!
strings
::
has_prefix
(
argv
[
2
],
'@'
))
return
fmt
::
errorf
(
"invalid at"
);
auto
at
=
argv
[
2
].
substr
(
1
);
if
(
at
==
"head"
)
{
pin
->
at
=
TidHead
;
}
else
{
tie
(
pin
->
at
,
err
)
=
xstrconv
::
parseHex64
(
at
);
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid at"
);
}
return
nil
;
}
// _readline reads next raw line sent from wcfs.
tuple
<
string
,
error
>
WatchLink
::
_readline
()
{
WatchLink
&
wlink
=
*
this
;
char
buf
[
128
];
size_t
nl_searchfrom
=
0
;
while
(
1
)
{
auto
nl
=
wlink
.
_rxbuf
.
find
(
'\n'
,
nl_searchfrom
);
if
(
nl
!=
string
::
npos
)
{
auto
line
=
wlink
.
_rxbuf
.
substr
(
0
,
nl
+
1
);
wlink
.
_rxbuf
=
wlink
.
_rxbuf
.
substr
(
nl
+
1
);
return
make_tuple
(
line
,
nil
);
}
nl_searchfrom
=
wlink
.
_rxbuf
.
length
();
int
n
;
error
err
;
tie
(
n
,
err
)
=
wlink
.
_f
.
read
(
buf
,
sizeof
(
buf
));
if
(
n
>
0
)
{
// XXX limit line length to avoid DoS
wlink
.
_rxbuf
+=
string
(
buf
,
n
);
continue
;
}
if
(
err
==
nil
)
panic
(
"read returned (0, nil)"
);
if
(
err
==
io
::
EOF_
&&
wlink
.
_rxbuf
.
length
()
!=
0
)
err
=
io
::
ErrUnexpectedEOF
;
return
make_tuple
(
""
,
err
);
}
}
// from_string parses string into rxPkt.
error
rxPkt
::
from_string
(
const
string
&
rx
)
{
rxPkt
&
pkt
=
*
this
;
// <stream> ... \n
auto
sp
=
rx
.
find
(
' '
);
if
(
sp
==
string
::
npos
)
return
fmt
::
errorf
(
"invalid pkt: no SP"
);
string
sid
=
rx
.
substr
(
0
,
sp
);
string
smsg
=
rx
.
substr
(
sp
+
1
);
error
err
;
tie
(
pkt
.
stream
,
err
)
=
xstrconv
::
parseUint
(
sid
);
if
(
err
!=
nil
)
return
fmt
::
errorf
(
"invalid pkt: invalid stream ID"
);
auto
msglen
=
smsg
.
length
();
if
(
msglen
>
ARRAY_SIZE
(
pkt
.
data
))
return
fmt
::
errorf
(
"invalid pkt: len(msg) > %zu"
,
ARRAY_SIZE
(
pkt
.
data
));
memcpy
(
pkt
.
data
,
smsg
.
c_str
(),
msglen
);
pkt
.
datalen
=
msglen
;
return
nil
;
}
// to_string converts rxPkt data into string.
string
rxPkt
::
to_string
()
const
{
const
rxPkt
&
pkt
=
*
this
;
return
string
(
pkt
.
data
,
pkt
.
datalen
);
}
wcfs/internal/wcfs_watchlink.h
0 → 100644
View file @
d34c4610
// Copyright (C) 2018-2019 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
// it under the terms of the GNU General Public License version 3, or (at your
// option) any later version, as published by the Free Software Foundation.
//
// You can also Link and Combine this program with other software covered by
// the terms of any of the Free Software licenses or any of the Open Source
// Initiative approved licenses and Convey the resulting work. Corresponding
// source of such a combination shall include the source code for all other
// software used.
//
// This program is distributed WITHOUT ANY WARRANTY; without even the implied
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// See COPYING file for full licensing terms.
// See https://www.nexedi.com/licensing for rationale and options.
// wcfs_watchlink provides WatchLink class that implements message exchange
// over /head/watch.
#ifndef _NXD_WCFS_WATCHLINK_H_
#define _NXD_WCFS_WATCHLINK_H_
#include "wcfs_misc.h"
struct
WCFS
;
struct
PinReq
;
// StreamID stands for ID of a stream multiplexed over WatchLink.
typedef
uint64_t
StreamID
;
// rxPkt internally represents data of one message received over WatchLink.
struct
rxPkt
{
// stream over which the data was received
StreamID
stream
;
// raw data received/to-be-sent.
// XXX not e.g. string as chan<T> currently does not support types with
// non-trivial copy. Note: we anyway need to limit rx line length to
// avoid DoS, but just for DoS the limit would be higher.
uint16_t
datalen
;
char
data
[
128
-
sizeof
(
StreamID
)
-
sizeof
(
uint16_t
)];
error
from_string
(
const
string
&
rx
);
string
to_string
()
const
;
};
static_assert
(
sizeof
(
rxPkt
)
==
128
);
// WatchLink represents /head/watch link opened on wcfs.
//
// It is created by WCFS::_openwatch().
//
// .sendReq()/.recvReq() provides raw IO in terms of wcfs invalidation protocol messages.
// .close() closes the link.
//
// It is safe to use WatchLink from multiple threads simultaneously.
class
WatchLink
{
WCFS
*
_wc
;
os
::
File
_f
;
// head/watch file handle
string
_rxbuf
;
// buffer for read data from _f
chan
<
structZ
>
_rx_eof
;
// becomes ready when wcfs closes its tx side
// inv.protocol message IO
chan
<
rxPkt
>
_acceptq
;
// server originated messages go here
sync
::
Mutex
_rxmu
;
bool
_rxdown
;
dict
<
StreamID
,
chan
<
rxPkt
>>
_rxtab
;
// {} stream -> rxq server replies go via here
set
<
StreamID
>
_accepted
;
// streams we accepted but did not replied yet
StreamID
_req_next
;
// stream ID for next client-originated request XXX -> atomic
sync
::
Mutex
_txmu
;
// serializes writes
sync
::
Once
_txclose1
;
#if 0
func() _serveCancel
sync.WorkGroup *_serveWG
#endif
public:
//friend tuple<WatchLink*, error> WCFS::_openwatch();
error
close
();
error
recvReq
(
IContext
*
ctx
,
PinReq
*
rx_into
);
tuple
<
string
,
error
>
sendReq
(
IContext
*
ctx
,
const
string
&
req
);
private:
void
_closeTX
();
error
_serveRX
(
IContext
*
ctx
);
tuple
<
string
,
error
>
_readline
();
error
_send
(
StreamID
stream
,
const
string
&
msg
);
error
_write
(
const
string
&
pkt
);
tuple
<
chan
<
rxPkt
>
,
error
>
_sendReq
(
IContext
*
ctx
,
const
string
&
req
);
};
// PinReq represents 1 server-initiated wcfs pin request received over /head/watch link.
struct
PinReq
{
StreamID
stream
;
// request was received with this stream ID
zodb
::
Oid
foid
;
// request is about this file
int64_t
blk
;
// ----//---- about this block
zodb
::
Tid
at
;
// pin to this at; TidHead means unpin to head
};
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment