Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neo
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Stefane Fermigier
neo
Commits
fc667f6c
Commit
fc667f6c
authored
7 years ago
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
eeb1f958
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
189 additions
and
215 deletions
+189
-215
go/neo/server/master.go
go/neo/server/master.go
+25
-59
go/neo/server/server.go
go/neo/server/server.go
+49
-0
go/neo/server/storage.go
go/neo/server/storage.go
+104
-144
go/xcommon/xcontext/xcontext.go
go/xcommon/xcontext/xcontext.go
+11
-12
No files found.
go/neo/server/master.go
View file @
fc667f6c
...
...
@@ -38,8 +38,6 @@ import (
"lab.nexedi.com/kirr/neo/go/xcommon/xcontext"
"lab.nexedi.com/kirr/neo/go/xcommon/xio"
"lab.nexedi.com/kirr/neo/go/xcommon/xnet"
"lab.nexedi.com/kirr/go123/xerr"
)
// Master is a node overseeing and managing how whole NEO cluster works
...
...
@@ -62,24 +60,13 @@ type Master struct {
// channels from workers directly serving peers to main driver
nodeCome
chan
nodeCome
// node connected XXX -> acceptq?
nodeLeave
chan
nodeLeave
// node disconnected XXX -> don't need
//
nodeLeave chan nodeLeave // node disconnected XXX -> don't need
// so tests could override
monotime
func
()
float64
}
// event: node connects
type
nodeCome
struct
{
req
*
neo
.
Request
idReq
*
neo
.
RequestIdentification
// we received this identification request
}
// event: node disconnects
type
nodeLeave
struct
{
node
*
neo
.
Node
}
// NewMaster creates new master node that will listen on serveAddr.
// Use Run to actually start running the node.
func
NewMaster
(
clusterName
,
serveAddr
string
,
net
xnet
.
Networker
)
*
Master
{
...
...
@@ -91,7 +78,7 @@ func NewMaster(clusterName, serveAddr string, net xnet.Networker) *Master {
ctlShutdown
:
make
(
chan
chan
error
),
nodeCome
:
make
(
chan
nodeCome
),
nodeLeave
:
make
(
chan
nodeLeave
),
//
nodeLeave: make(chan nodeLeave),
monotime
:
monotime
,
}
...
...
@@ -118,14 +105,14 @@ func (m *Master) Stop() {
<-
ech
}
// Shutdown requests all known nodes in the cluster to stop
// Shutdown requests all known nodes in the cluster to stop
.
// XXX + master's run to finish ?
func
(
m
*
Master
)
Shutdown
()
error
{
panic
(
"TODO"
)
}
// setClusterState sets .clusterState and notifies subscribers
// setClusterState sets .clusterState and notifies subscribers
.
func
(
m
*
Master
)
setClusterState
(
state
neo
.
ClusterState
)
{
m
.
node
.
ClusterState
.
Set
(
state
)
...
...
@@ -133,7 +120,7 @@ func (m *Master) setClusterState(state neo.ClusterState) {
}
// Run starts master node and runs it until ctx is cancelled or fatal error
// Run starts master node and runs it until ctx is cancelled or fatal error
.
func
(
m
*
Master
)
Run
(
ctx
context
.
Context
)
(
err
error
)
{
// start listening
l
,
err
:=
m
.
node
.
Listen
()
...
...
@@ -167,14 +154,19 @@ func (m *Master) Run(ctx context.Context) (err error) {
wg
:=
sync
.
WaitGroup
{}
serveCtx
,
serveCancel
:=
context
.
WithCancel
(
ctx
)
wg
.
Add
(
1
)
go
func
()
{
go
func
(
ctx
context
.
Context
)
(
err
error
)
{
defer
wg
.
Done
()
defer
task
.
Running
(
&
ctx
,
"accept"
)(
&
err
)
// XXX dup in storage
for
serveCtx
.
Err
()
==
nil
{
req
,
idReq
,
err
:=
l
.
Accept
(
serveCtx
)
for
{
if
ctx
.
Err
()
!=
nil
{
return
ctx
.
Err
()
}
req
,
idReq
,
err
:=
l
.
Accept
(
ctx
)
if
err
!=
nil
{
// TODO log / throttle
log
.
Error
(
ctx
,
err
)
// XXX throttle?
continue
}
...
...
@@ -196,13 +188,13 @@ func (m *Master) Run(ctx context.Context) (err error) {
case
m
.
nodeCome
<-
nodeCome
{
req
,
idReq
}
:
// ok
case
<-
serveC
tx
.
Done
()
:
case
<-
c
tx
.
Done
()
:
// shutdown
lclose
(
serveC
tx
,
req
.
Link
())
return
lclose
(
c
tx
,
req
.
Link
())
continue
}
}
}()
}(
serveCtx
)
// main driving logic
err
=
m
.
runMain
(
ctx
)
...
...
@@ -330,7 +322,7 @@ loop:
go
func
()
{
defer
wg
.
Done
()
err
:=
m
.
accept
(
ctx
,
n
.
req
,
resp
)
err
:=
accept
(
ctx
,
n
.
req
,
resp
)
if
err
!=
nil
{
recovery
<-
storRecovery
{
stor
:
node
,
err
:
err
}
return
...
...
@@ -577,7 +569,7 @@ loop:
go
func
()
{
defer
wg
.
Done
()
err
:=
m
.
accept
(
ctx
,
n
.
req
,
resp
)
err
:=
accept
(
ctx
,
n
.
req
,
resp
)
if
err
!=
nil
{
verify
<-
storVerify
{
stor
:
node
,
err
:
err
}
return
...
...
@@ -586,6 +578,7 @@ loop:
storCtlVerify
(
ctx
,
node
,
m
.
node
.
PartTab
,
verify
)
}()
/*
case n := <-m.nodeLeave:
n.node.SetState(neo.DOWN)
...
...
@@ -597,6 +590,7 @@ loop:
err = errClusterDegraded
break loop
}
*/
// a storage node came through verification - adjust our last{Oid,Tid} if ok
// on error check - whether cluster became non-operational and stop verification if so
...
...
@@ -783,7 +777,7 @@ loop:
go
func
()
{
defer
wg
.
Done
()
err
=
m
.
accept
(
ctx
,
n
.
req
,
resp
)
err
=
accept
(
ctx
,
n
.
req
,
resp
)
if
err
!=
nil
{
serviced
<-
serviceDone
{
node
:
node
,
err
:
err
}
return
...
...
@@ -806,6 +800,7 @@ loop:
// TODO if S goes away -> check partTab still operational -> if not - recovery
_
=
d
/*
// XXX who sends here?
case n := <-m.nodeLeave:
n.node.SetState(neo.DOWN)
...
...
@@ -815,6 +810,7 @@ loop:
err = errClusterDegraded
break loop
}
*/
// XXX what else ? (-> txn control at least)
...
...
@@ -1106,36 +1102,6 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
return
node
,
accept
}
// reject sends rejective identification response and closes associated link
func
reject
(
ctx
context
.
Context
,
req
*
neo
.
Request
,
resp
neo
.
Msg
)
{
// XXX cancel on ctx?
// XXX log?
err1
:=
req
.
Reply
(
resp
)
err2
:=
req
.
Link
()
.
Close
()
err
:=
xerr
.
Merge
(
err1
,
err2
)
if
err
!=
nil
{
log
.
Error
(
ctx
,
"reject:"
,
err
)
}
}
// goreject spawns reject in separate goroutine properly added/done on wg
func
goreject
(
ctx
context
.
Context
,
wg
*
sync
.
WaitGroup
,
req
*
neo
.
Request
,
resp
neo
.
Msg
)
{
wg
.
Add
(
1
)
defer
wg
.
Done
()
go
reject
(
ctx
,
req
,
resp
)
}
// accept replies with acceptive identification response
// XXX if problem -> .nodeLeave
// XXX spawn ping goroutine from here?
func
(
m
*
Master
)
accept
(
ctx
context
.
Context
,
req
*
neo
.
Request
,
resp
neo
.
Msg
)
error
{
// XXX cancel on ctx
err1
:=
req
.
Reply
(
resp
)
return
err1
// XXX while trying to work on single conn
//err2 := conn.Close()
//return xerr.First(err1, err2)
}
// allocUUID allocates new node uuid for a node of kind nodeType
// XXX it is bad idea for master to assign uuid to coming node
// -> better nodes generate really unique UUID themselves and always show with them
...
...
This diff is collapsed.
Click to expand it.
go/neo/server/server.go
View file @
fc667f6c
...
...
@@ -26,8 +26,10 @@ import (
"context"
// "fmt"
// "net"
"sync"
"lab.nexedi.com/kirr/neo/go/neo"
"lab.nexedi.com/kirr/neo/go/xcommon/log"
"lab.nexedi.com/kirr/go123/xerr"
)
...
...
@@ -115,3 +117,50 @@ func IdentifyPeer(ctx context.Context, link *neo.NodeLink, myNodeType neo.NodeTy
return
req
,
nil
}
// ----------------------------------------
// event: node connects
type
nodeCome
struct
{
req
*
neo
.
Request
idReq
*
neo
.
RequestIdentification
// we received this identification request
}
/*
// event: node disconnects
type nodeLeave struct {
node *neo.Node
}
*/
// reject sends rejective identification response and closes associated link
func
reject
(
ctx
context
.
Context
,
req
*
neo
.
Request
,
resp
neo
.
Msg
)
{
// XXX cancel on ctx?
// XXX log?
err1
:=
req
.
Reply
(
resp
)
err2
:=
req
.
Link
()
.
Close
()
err
:=
xerr
.
Merge
(
err1
,
err2
)
if
err
!=
nil
{
log
.
Error
(
ctx
,
"reject:"
,
err
)
}
}
// goreject spawns reject in separate goroutine properly added/done on wg
func
goreject
(
ctx
context
.
Context
,
wg
*
sync
.
WaitGroup
,
req
*
neo
.
Request
,
resp
neo
.
Msg
)
{
wg
.
Add
(
1
)
defer
wg
.
Done
()
go
reject
(
ctx
,
req
,
resp
)
}
// accept replies with acceptive identification response
// XXX spawn ping goroutine from here?
func
accept
(
ctx
context
.
Context
,
req
*
neo
.
Request
,
resp
neo
.
Msg
)
error
{
// XXX cancel on ctx
err1
:=
req
.
Reply
(
resp
)
return
err1
// XXX while trying to work on single conn
//err2 := conn.Close()
//return xerr.First(err1, err2)
}
This diff is collapsed.
Click to expand it.
go/neo/server/storage.go
View file @
fc667f6c
...
...
@@ -54,6 +54,8 @@ type Storage struct {
// 2 ? (data.fs)
// 3 packed/ (deltified objects)
zstor
zodb
.
IStorage
// underlying ZODB storage XXX -> directly work with fs1 & friends
//nodeCome chan nodeCome // node connected
}
// NewStorage creates new storage node that will listen on serveAddr and talk to master on masterAddr.
...
...
@@ -89,30 +91,46 @@ func (stor *Storage) Run(ctx context.Context) error {
wg
:=
sync
.
WaitGroup
{}
serveCtx
,
serveCancel
:=
context
.
WithCancel
(
ctx
)
wg
.
Add
(
1
)
go
func
()
{
go
func
(
ctx
context
.
Context
)
(
err
error
)
{
defer
wg
.
Done
()
defer
task
.
Running
(
&
ctx
,
"serve"
)(
&
err
)
// XXX or "accept" ?
// XXX dup from master
for
serveCtx
.
Err
()
==
nil
{
conn
,
idReq
,
err
:=
l
.
Accept
(
serveCtx
)
for
{
if
ctx
.
Err
()
!=
nil
{
return
ctx
.
Err
()
}
req
,
idReq
,
err
:=
l
.
Accept
(
ctx
)
if
err
!=
nil
{
// TODO log / throttle
log
.
Error
(
ctx
,
err
)
// XXX throttle?
continue
}
_
=
idReq
resp
,
ok
:=
stor
.
identify
(
idReq
)
if
!
ok
{
goreject
(
ctx
,
&
wg
,
req
,
resp
)
continue
}
wg
.
Add
(
1
)
go
func
()
{
defer
wg
.
Done
()
}()
// handover to main driver
select
{
//case
m.nodeCome <- nodeCome{conn, idReq, nil/*XXX kill*/
}:
//case
stor.nodeCome <- nodeCome{req, idReq
}:
// // ok
case
<-
serveC
tx
.
Done
()
:
case
<-
c
tx
.
Done
()
:
// shutdown
lclose
(
serveCtx
,
conn
.
Link
())
return
lclose
(
ctx
,
req
.
Link
())
continue
}
}
}()
}(
serveCtx
)
// connect to master and get commands and updates from it
err
=
stor
.
talkMaster
(
ctx
)
...
...
@@ -154,6 +172,7 @@ func (stor *Storage) talkMaster(ctx context.Context) (err error) {
}
// talkMaster1 does 1 cycle of connect/talk/disconnect to master.
//
// it returns error describing why such cycle had to finish
// XXX distinguish between temporary problems and non-temporary ones?
func
(
stor
*
Storage
)
talkMaster1
(
ctx
context
.
Context
)
(
err
error
)
{
...
...
@@ -190,100 +209,13 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) {
err
=
stor
.
m1serve
(
ctx
,
reqStart
)
log
.
Error
(
ctx
,
err
)
return
err
/*
// accept next connection from master. only 1 connection is served at any given time.
// every new connection from master means talk over previous connection is cancelled.
// XXX recheck compatibility with py
type accepted struct {conn *neo.Conn; err error}
acceptq := make(chan accepted)
go func () {
// XXX (temp ?) disabled not to let S accept new connections
// reason: not (yet ?) clear how to allow listen on dialed link without
// missing immediate sends or deadlocks if peer does not follow
// expected protocol exchange (2 receive paths: Recv & Accept)
return
for {
conn, err := Mlink.Accept()
select {
case acceptq <- accepted{conn, err}:
case <-retch:
return
}
if err != nil {
log.Error(ctx, err)
return
}
}
}()
// handle notifications and commands from master
talkq := make(chan error, 1)
for {
// wait for next connection from master if talk over previous one finished.
// XXX rafactor all this into SingleTalker ? (XXX ServeSingle ?)
if Mconn == nil {
select {
case a := <-acceptq:
if a.err != nil {
return a.err
}
Mconn = a.conn
case <-ctx.Done():
return ctx.Err()
}
}
// one talk cycle for master to drive us
// puts error after talk finishes -> talkq
talk := func() error {
// let master initialize us. If successful this ends with StartOperation command.
err := stor.m1initialize(ctx, Mconn)
if err != nil {
log.Error(ctx, err)
return err
}
// we got StartOperation command. Let master drive us during servicing phase.
err = stor.m1serve(ctx, Mconn)
log.Error(ctx, err)
return err
}
go func() {
talkq <- talk()
}()
// next connection / talk finished / cancel
select {
case a := <-acceptq:
lclose(ctx, Mconn) // wakeup/cancel current talk
<-talkq // wait till it finish
if a.err != nil {
return a.err
}
Mconn = a.conn // proceed next cycle on accepted conn
case err = <-talkq:
// XXX check for shutdown command
lclose(ctx, Mconn)
Mconn = nil // now wait for accept to get next Mconn
case <-ctx.Done():
return ctx.Err()
}
}
*/
}
// m1initialize drives storage by master messages during initialization phase
//
// Initialization includes master retrieving info for cluster recovery and data
// verification before starting operation. Initialization finishes either
// successfully with receiving master command
ing
to start operation, or
// successfully with receiving master command to start operation, or
// unsuccessfully with connection closing indicating initialization was
// cancelled or some other error.
//
...
...
@@ -411,6 +343,29 @@ func (stor *Storage) m1serve(ctx context.Context, reqStart *neo.Request) (err er
// --- serve incoming connections from other nodes ---
// identify processes identification request from connected peer.
func
(
stor
*
Storage
)
identify
(
idReq
*
neo
.
RequestIdentification
)
(
neo
.
Msg
,
bool
)
{
// XXX stub: we accept clients and don't care about their UUID
if
idReq
.
NodeType
!=
neo
.
CLIENT
{
return
&
neo
.
Error
{
neo
.
PROTOCOL_ERROR
,
"only clients are accepted"
},
false
}
if
idReq
.
ClusterName
!=
stor
.
node
.
ClusterName
{
return
&
neo
.
Error
{
neo
.
PROTOCOL_ERROR
,
"cluster name mismatch"
},
false
}
// XXX check operational?
return
&
neo
.
AcceptIdentification
{
NodeType
:
stor
.
node
.
MyInfo
.
Type
,
MyUUID
:
stor
.
node
.
MyInfo
.
UUID
,
// XXX lock wrt update
NumPartitions
:
1
,
// XXX
NumReplicas
:
1
,
// XXX
YourUUID
:
idReq
.
UUID
,
},
true
}
// ServeLink serves incoming node-node link connection
func
(
stor
*
Storage
)
ServeLink
(
ctx
context
.
Context
,
link
*
neo
.
NodeLink
)
(
err
error
)
{
defer
task
.
Runningf
(
&
ctx
,
"serve %s"
,
link
)(
&
err
)
...
...
@@ -473,6 +428,7 @@ func (stor *Storage) serveClient(ctx context.Context, req neo.Request) {
//log.Infof(ctx, "%s: serving new client conn", conn) // XXX -> running?
// rederive ctx to be also cancelled if M tells us StopOperation
// XXX level up
ctx
,
cancel
:=
stor
.
withWhileOperational
(
ctx
)
defer
cancel
()
...
...
@@ -499,52 +455,6 @@ func (stor *Storage) serveClient(ctx context.Context, req neo.Request) {
}
}
// serveClient serves incoming connection on which peer identified itself as client
// the connection is closed when serveClient returns
// XXX +error return?
//
// XXX version that keeps 1 goroutine per 1 Conn
// XXX unusable until Conn.Close signals peer
/*
func (stor *Storage) serveClient(ctx context.Context, conn *neo.Conn) {
log.Infof(ctx, "%s: serving new client conn", conn) // XXX -> running?
// rederive ctx to be also cancelled if M tells us StopOperation
ctx, cancel := stor.withWhileOperational(ctx)
defer cancel()
// main work to serve
done := make(chan error, 1)
go func() {
for {
err := stor.serveClient1(conn)
if err != nil {
done <- err
break
}
}
}()
// close connection when either cancelling or returning (e.g. due to an error)
// ( when cancelling - conn.Close will signal to current IO to
// terminate with an error )
var err error
select {
case <-ctx.Done():
// XXX tell client we are shutting down?
// XXX should we also wait for main work to finish?
err = ctx.Err()
case err = <-done:
}
log.Infof(ctx, "%v: %v", conn, err)
// XXX vvv -> defer ?
log.Infof(ctx, "%v: closing client conn", conn)
conn.Close() // XXX err
}
*/
// serveClient1 prepares response for 1 request from client
func
(
stor
*
Storage
)
serveClient1
(
ctx
context
.
Context
,
req
neo
.
Msg
)
(
resp
neo
.
Msg
)
{
switch
req
:=
req
.
(
type
)
{
...
...
@@ -593,3 +503,53 @@ func (stor *Storage) serveClient1(ctx context.Context, req neo.Msg) (resp neo.Ms
//req.Put(...)
}
// ----------------------------------------
// serveClient serves incoming connection on which peer identified itself as client
// the connection is closed when serveClient returns
// XXX +error return?
//
// XXX version that keeps 1 goroutine per 1 Conn
// XXX unusable until Conn.Close signals peer
/*
func (stor *Storage) serveClient(ctx context.Context, conn *neo.Conn) {
log.Infof(ctx, "%s: serving new client conn", conn) // XXX -> running?
// rederive ctx to be also cancelled if M tells us StopOperation
ctx, cancel := stor.withWhileOperational(ctx)
defer cancel()
// main work to serve
done := make(chan error, 1)
go func() {
for {
err := stor.serveClient1(conn)
if err != nil {
done <- err
break
}
}
}()
// close connection when either cancelling or returning (e.g. due to an error)
// ( when cancelling - conn.Close will signal to current IO to
// terminate with an error )
var err error
select {
case <-ctx.Done():
// XXX tell client we are shutting down?
// XXX should we also wait for main work to finish?
err = ctx.Err()
case err = <-done:
}
log.Infof(ctx, "%v: %v", conn, err)
// XXX vvv -> defer ?
log.Infof(ctx, "%v: closing client conn", conn)
conn.Close() // XXX err
}
*/
This diff is collapsed.
Click to expand it.
go/xcommon/xcontext/xcontext.go
View file @
fc667f6c
...
...
@@ -55,28 +55,27 @@ func Merge(ctx1, ctx2 context.Context) (context.Context, context.CancelFunc) {
// ----//---- same for ctx2?
*/
/*
XXX do we need vvv?
// if src ctx is already cancelled - make mc cancelled right after creation
//
// this saves goroutine spawn and makes
//
// ctx = Merge(ctx1, ctx2); ctx.Err != nil
//
// check possible.
select
{
case
<-
ctx1
.
Done
()
:
mc.done = ctx1.Done(
)
close
(
mc
.
done
)
mc
.
doneErr
=
ctx1
.
Err
()
case
<-
ctx2
.
Done
()
:
mc.done = ctx2.Done(
)
close
(
mc
.
done
)
mc
.
doneErr
=
ctx2
.
Err
()
// src ctx not canceled - spawn ctx{1,2}.done merger.
default
:
done := make(chan struct{})
mc.done = done
go mc.wait(done)
// src ctx not canceled - spawn ctx{1,2}.done merger.
go
mc
.
wait
()
}
*/
go
mc
.
wait
()
return
mc
,
mc
.
cancel
}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment