Commit 46c7bdf6 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 7efd3012
...@@ -180,6 +180,14 @@ func (m *Master) run(ctx context.Context) { ...@@ -180,6 +180,14 @@ func (m *Master) run(ctx context.Context) {
} }
// Cluster Recovery
// ----------------
//
// - accept connections from storage nodes
// - retrieve and recovery previously saved partition table from storages
// - monitor whether partition table becomes operational wrt currently up nodeset
// - if yes - finish recovering upon receiving "start" command
// recovery is a process that drives cluster via recovery phase // recovery is a process that drives cluster via recovery phase
// //
// XXX draft: Cluster Recovery if []Stor is fixed // XXX draft: Cluster Recovery if []Stor is fixed
...@@ -293,6 +301,14 @@ func storCtlRecovery(ctx context.Context, link *NodeLink, res chan storRecovery) ...@@ -293,6 +301,14 @@ func storCtlRecovery(ctx context.Context, link *NodeLink, res chan storRecovery)
} }
// Cluster Verification
// --------------------
//
// - starts with operational parttab
// - tell all storages to perform data verificaion (TODO) and retreive last ids
// - once we are done without loosing too much storages in the process (so that
// parttab is still operational) we are ready to enter servicing state.
// verify is a process that drives cluster via verification phase // verify is a process that drives cluster via verification phase
// //
// prerequisite for start: .partTab is operational wrt .nodeTab // prerequisite for start: .partTab is operational wrt .nodeTab
...@@ -347,6 +363,7 @@ loop: ...@@ -347,6 +363,7 @@ loop:
} }
if err != nil { if err != nil {
// XXX -> err = fmt.Errorf("... %v", err)
fmt.Printf("master: verify: %v\n", err) fmt.Printf("master: verify: %v\n", err)
// consume left verify responses (which should come without delay since it was cancelled) // consume left verify responses (which should come without delay since it was cancelled)
...@@ -405,9 +422,19 @@ func storCtlVerify(ctx context.Context, link *NodeLink, res chan storVerify) { ...@@ -405,9 +422,19 @@ func storCtlVerify(ctx context.Context, link *NodeLink, res chan storVerify) {
} }
// Cluster Running
// ---------------
//
// - starts with operational parttab and (enough ?) present storage nodes passed verification
// - monitor storages come & go and if parttab becomes non-operational leave to recovery
// - provide service to clients while we are here
//
// TODO also plan data movement on new storage nodes appearing
// service is the process that drives cluster during running state
//
// XXX draft: Cluster Running if []Stor is fixed // XXX draft: Cluster Running if []Stor is fixed
func (m *Master) runxxx(ctx context.Context, storv []*NodeLink) { func (m *Master) service(ctx context.Context, storv []*NodeLink) {
// TODO // TODO
} }
......
...@@ -55,7 +55,7 @@ const ( ...@@ -55,7 +55,7 @@ const (
// initially ClusterRecovery, during which the master: // initially ClusterRecovery, during which the master:
// - first recovers its own data by reading it from storage nodes; // - first recovers its own data by reading it from storage nodes;
// - waits for the partition table be operational; // - waits for the partition table be operational;
// - automatically switch to ClusterVerifying if the cluster can be safely started. // - automatically switch to ClusterVerifying if the cluster can be safely started. XXX not automatic
// Whenever the partition table becomes non-operational again, the cluster // Whenever the partition table becomes non-operational again, the cluster
// goes back to this state. // goes back to this state.
ClusterRecovering ClusterState = iota ClusterRecovering ClusterState = iota
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment