Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Levin Zimmermann
neoppod
Commits
06158f50
Commit
06158f50
authored
May 31, 2017
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
92c5fd44
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
78 deletions
+28
-78
go/neo/master.go
go/neo/master.go
+17
-67
go/neo/nodetab.go
go/neo/nodetab.go
+10
-10
go/neo/parttab.go
go/neo/parttab.go
+1
-1
No files found.
go/neo/master.go
View file @
06158f50
...
@@ -54,8 +54,6 @@ type Master struct {
...
@@ -54,8 +54,6 @@ type Master struct {
ctlStop
chan
chan
error
// request to stop cluster
ctlStop
chan
chan
error
// request to stop cluster
ctlShutdown
chan
chan
error
// request to shutdown cluster XXX with ctx ?
ctlShutdown
chan
chan
error
// request to shutdown cluster XXX with ctx ?
wantToStart
chan
chan
error
// main -> recovery
// channels from various workers to main driver
// channels from various workers to main driver
nodeCome
chan
nodeCome
// node connected
nodeCome
chan
nodeCome
// node connected
nodeLeave
chan
nodeLeave
// node disconnected
nodeLeave
chan
nodeLeave
// node disconnected
...
@@ -71,8 +69,7 @@ type nodeCome struct {
...
@@ -71,8 +69,7 @@ type nodeCome struct {
// node disconnects
// node disconnects
type
nodeLeave
struct
{
type
nodeLeave
struct
{
link
*
NodeLink
link
*
NodeLink
// XXX better use uuid allocated on nodeCome ?
// XXX TODO
}
}
func
NewMaster
(
clusterName
string
)
*
Master
{
func
NewMaster
(
clusterName
string
)
*
Master
{
...
@@ -80,16 +77,14 @@ func NewMaster(clusterName string) *Master {
...
@@ -80,16 +77,14 @@ func NewMaster(clusterName string) *Master {
m
.
nodeUUID
=
m
.
allocUUID
(
MASTER
)
m
.
nodeUUID
=
m
.
allocUUID
(
MASTER
)
// TODO update nodeTab with self
// TODO update nodeTab with self
m
.
clusterState
=
ClusterRecovering
// XXX no elections - we are the only master
m
.
clusterState
=
ClusterRecovering
// XXX no elections - we are the only master
go
m
.
run
(
context
.
TODO
())
// XXX ctx
go
m
.
run
(
context
.
TODO
())
// XXX ctx
return
m
return
m
}
}
// XXX NotifyNodeInformation to all nodes whenever nodetab changes
// Start requests cluster to eventually transition into running state
// Start requests cluster to eventually transition into running state
// it returns an error if such transition is not currently possible (e.g. partition table is not operational)
// it returns an error if such transition is not currently possible
to begin
(e.g. partition table is not operational)
// it returns nil if the transition began.
// it returns nil if the transition began.
// NOTE upon successful return cluster is not yet in running state - the transition will
// NOTE upon successful return cluster is not yet in running state - the transition will
// take time and could be also automatically aborted due to cluster environment change (e.g.
// take time and could be also automatically aborted due to cluster environment change (e.g.
...
@@ -109,12 +104,15 @@ func (m *Master) Stop() error {
...
@@ -109,12 +104,15 @@ func (m *Master) Stop() error {
}
}
// Shutdown requests all known nodes in the cluster to stop
// Shutdown requests all known nodes in the cluster to stop
// XXX + master's run to finish ?
func
(
m
*
Master
)
Shutdown
()
error
{
func
(
m
*
Master
)
Shutdown
()
error
{
panic
(
"TODO"
)
panic
(
"TODO"
)
}
}
// setClusterState sets .clusterState and notifies subscribers
func
(
m
*
Master
)
setClusterState
(
state
ClusterState
)
{
func
(
m
*
Master
)
setClusterState
(
state
ClusterState
)
{
if
state
==
m
.
clusterState
{
if
state
==
m
.
clusterState
{
// <- XXX do we really need this ?
return
return
}
}
...
@@ -147,56 +145,8 @@ func (m *Master) run(ctx context.Context) {
...
@@ -147,56 +145,8 @@ func (m *Master) run(ctx context.Context) {
continue
// -> recovery
continue
// -> recovery
}
}
// XXX shutdown
// XXX shutdown ?
}
/*
go m.recovery(ctx)
for {
select {
case <-ctx.Done():
// XXX -> shutdown
panic("TODO")
// command to start cluster
case c := <-m.ctlStart:
if m.clusterState != ClusterRecovering {
// start possible only from recovery
// XXX err ctx
c.resp <- fmt.Errorf("start: inappropriate current state: %v", m.clusterState)
break
}
ch := make(chan error)
select {
case <-ctx.Done():
// XXX how to avoid checking this ctx.Done everywhere?
c.resp <- ctx.Err()
panic("TODO")
case m.wantToStart <- ch:
}
err := <-ch
c.resp <- err
if err != nil {
break
}
// recovery said it is ok to start and finished - launch verification
m.setClusterState(ClusterVerifying)
go m.verify(ctx)
// command to stop cluster
case <-m.ctlStop:
// TODO
// command to shutdown
case <-m.ctlShutdown:
// TODO
}
}
}
*/
}
}
...
@@ -207,24 +157,24 @@ func (m *Master) run(ctx context.Context) {
...
@@ -207,24 +157,24 @@ func (m *Master) run(ctx context.Context) {
// - accept connections from storage nodes
// - accept connections from storage nodes
// - retrieve and recovery latest previously saved partition table from storages
// - retrieve and recovery latest previously saved partition table from storages
// - monitor whether partition table becomes operational wrt currently up nodeset
// - monitor whether partition table becomes operational wrt currently up nodeset
// - if yes - finish recovering upon receiving "start" command
// - if yes - finish recovering upon receiving "start" command
XXX or autostart
// recovery drives cluster during recovery phase
// recovery drives cluster during recovery phase
//
//
// when recovery finishes error indicates:
// when recovery finishes error indicates:
// - nil: recovery was ok and a command came for cluster to start
// - nil: recovery was ok and a command came for cluster to start
XXX or autostart
// - !nil: recovery was cancelled
// - !nil: recovery was cancelled
func
(
m
*
Master
)
recovery
(
ctx
context
.
Context
)
(
err
error
)
{
func
(
m
*
Master
)
recovery
(
ctx
context
.
Context
)
(
err
error
)
{
m
.
setClusterState
(
ClusterRecovering
)
m
.
setClusterState
(
ClusterRecovering
)
recovery
:=
make
(
chan
storRecovery
)
rctx
,
rcancel
:=
context
.
WithCancel
(
ctx
)
rctx
,
rcancel
:=
context
.
WithCancel
(
ctx
)
defer
rcancel
()
defer
rcancel
()
recovery
:=
make
(
chan
storRecovery
)
inprogress
:=
0
inprogress
:=
0
// start recovery on all storages we are currently in touch with
// start recovery on all storages we are currently in touch with
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
if
stor
.
Info
.
NodeState
>
DOWN
{
// XXX state cmp ok
?
if
stor
.
NodeState
>
DOWN
{
// XXX state cmp ok ? XXX or stor.Link != nil
?
inprogress
++
inprogress
++
go
storCtlRecovery
(
rctx
,
stor
.
Link
,
recovery
)
go
storCtlRecovery
(
rctx
,
stor
.
Link
,
recovery
)
}
}
...
@@ -387,13 +337,12 @@ func storCtlRecovery(ctx context.Context, link *NodeLink, res chan storRecovery)
...
@@ -387,13 +337,12 @@ func storCtlRecovery(ctx context.Context, link *NodeLink, res chan storRecovery)
// verify drives cluster via verification phase
// verify drives cluster via verification phase
//
//
// prerequisite for start: .partTab is operational wrt .nodeTab
// prerequisite for start: .partTab is operational wrt .nodeTab
func
(
m
*
Master
)
verify
(
ctx
context
.
Context
)
error
{
func
(
m
*
Master
)
verify
(
ctx
context
.
Context
)
(
err
error
)
{
m
.
setClusterState
(
ClusterVerifying
)
m
.
setClusterState
(
ClusterVerifying
)
var
err
error
verify
:=
make
(
chan
storVerify
)
vctx
,
vcancel
:=
context
.
WithCancel
(
ctx
)
vctx
,
vcancel
:=
context
.
WithCancel
(
ctx
)
defer
vcancel
()
defer
vcancel
()
verify
:=
make
(
chan
storVerify
)
inprogress
:=
0
inprogress
:=
0
// XXX ask every storage for verify and wait for _all_ them to complete?
// XXX ask every storage for verify and wait for _all_ them to complete?
...
@@ -401,6 +350,7 @@ func (m *Master) verify(ctx context.Context) error {
...
@@ -401,6 +350,7 @@ func (m *Master) verify(ctx context.Context) error {
// start verification on all storages we are currently in touch with
// start verification on all storages we are currently in touch with
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
// XXX check state > DOWN
inprogress
++
inprogress
++
go
storCtlVerify
(
vctx
,
stor
.
Link
,
verify
)
go
storCtlVerify
(
vctx
,
stor
.
Link
,
verify
)
}
}
...
...
go/neo/nodetab.go
View file @
06158f50
...
@@ -81,7 +81,8 @@ type NodeTable struct {
...
@@ -81,7 +81,8 @@ type NodeTable struct {
// Node represents a node entry in NodeTable
// Node represents a node entry in NodeTable
type
Node
struct
{
type
Node
struct
{
Info
NodeInfo
// XXX extract ? XXX -> embedd
//Info NodeInfo // XXX extract ? XXX -> embedd
NodeInfo
Link
*
NodeLink
// link to this node; =nil if not connected XXX do we need it here ?
Link
*
NodeLink
// link to this node; =nil if not connected XXX do we need it here ?
// XXX identified or not ?
// XXX identified or not ?
...
@@ -93,7 +94,7 @@ type Node struct {
...
@@ -93,7 +94,7 @@ type Node struct {
func
(
nt
*
NodeTable
)
Get
(
uuid
NodeUUID
)
*
Node
{
func
(
nt
*
NodeTable
)
Get
(
uuid
NodeUUID
)
*
Node
{
// FIXME linear scan
// FIXME linear scan
for
_
,
node
:=
range
nt
.
nodev
{
for
_
,
node
:=
range
nt
.
nodev
{
if
node
.
Info
.
NodeUUID
==
uuid
{
if
node
.
NodeUUID
==
uuid
{
return
node
return
node
}
}
}
}
...
@@ -111,10 +112,10 @@ func (nt *NodeTable) Update(nodeInfo NodeInfo, link *NodeLink) *Node {
...
@@ -111,10 +112,10 @@ func (nt *NodeTable) Update(nodeInfo NodeInfo, link *NodeLink) *Node {
nt
.
nodev
=
append
(
nt
.
nodev
,
node
)
nt
.
nodev
=
append
(
nt
.
nodev
,
node
)
}
}
node
.
Info
=
nodeInfo
node
.
Node
Info
=
nodeInfo
node
.
Link
=
link
node
.
Link
=
link
nt
.
notify
(
node
.
Info
)
nt
.
notify
(
node
.
Node
Info
)
return
node
return
node
}
}
...
@@ -141,9 +142,9 @@ func (nt *NodeTable) UpdateLinkDown(link *NodeLink) *Node {
...
@@ -141,9 +142,9 @@ func (nt *NodeTable) UpdateLinkDown(link *NodeLink) *Node {
panic
(
"nodetab: UpdateLinkDown: no corresponding entry"
)
panic
(
"nodetab: UpdateLinkDown: no corresponding entry"
)
}
}
node
.
Info
.
NodeState
=
DOWN
node
.
NodeState
=
DOWN
nt
.
notify
(
node
.
Info
)
nt
.
notify
(
node
.
Node
Info
)
return
node
return
node
}
}
...
@@ -153,7 +154,7 @@ func (nt *NodeTable) StorageList() []*Node {
...
@@ -153,7 +154,7 @@ func (nt *NodeTable) StorageList() []*Node {
// FIXME linear scan
// FIXME linear scan
sl
:=
[]
*
Node
{}
sl
:=
[]
*
Node
{}
for
_
,
node
:=
range
nt
.
nodev
{
for
_
,
node
:=
range
nt
.
nodev
{
if
node
.
Info
.
NodeType
==
STORAGE
{
if
node
.
NodeType
==
STORAGE
{
sl
=
append
(
sl
,
node
)
sl
=
append
(
sl
,
node
)
}
}
}
}
...
@@ -167,10 +168,9 @@ func (nt *NodeTable) String() string {
...
@@ -167,10 +168,9 @@ func (nt *NodeTable) String() string {
buf
:=
bytes
.
Buffer
{}
buf
:=
bytes
.
Buffer
{}
// XXX also for .storv
// XXX also for .storv
for
_
,
n
ode
:=
range
nt
.
nodev
{
for
_
,
n
:=
range
nt
.
nodev
{
// XXX recheck output
// XXX recheck output
i
:=
node
.
Info
fmt
.
Fprintf
(
&
buf
,
"%s (%s)
\t
%s
\t
%s
\n
"
,
n
.
NodeUUID
,
n
.
NodeType
,
n
.
NodeState
,
n
.
Address
)
fmt
.
Fprintf
(
&
buf
,
"%s (%s)
\t
%s
\t
%s
\n
"
,
i
.
NodeUUID
,
i
.
NodeType
,
i
.
NodeState
,
i
.
Address
)
}
}
return
buf
.
String
()
return
buf
.
String
()
...
...
go/neo/parttab.go
View file @
06158f50
...
@@ -152,7 +152,7 @@ func (pt *PartitionTable) OperationalWith(nt *NodeTable) bool {
...
@@ -152,7 +152,7 @@ func (pt *PartitionTable) OperationalWith(nt *NodeTable) bool {
case
UP_TO_DATE
,
FEEDING
:
// XXX cell.isReadble in py
case
UP_TO_DATE
,
FEEDING
:
// XXX cell.isReadble in py
// cell says it is readable. let's check whether corresponding node is up
// cell says it is readable. let's check whether corresponding node is up
node
:=
nt
.
Get
(
cell
.
NodeUUID
)
node
:=
nt
.
Get
(
cell
.
NodeUUID
)
if
node
==
nil
||
node
.
Info
.
NodeState
!=
RUNNING
{
// XXX PENDING is also ok ?
if
node
==
nil
||
node
.
NodeState
!=
RUNNING
{
// XXX PENDING is also ok ?
continue
continue
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment