Commit 46c7bdf6 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 7efd3012
......@@ -180,6 +180,14 @@ func (m *Master) run(ctx context.Context) {
}
// Cluster Recovery
// ----------------
//
// - accept connections from storage nodes
// - retrieve and recovery previously saved partition table from storages
// - monitor whether partition table becomes operational wrt currently up nodeset
// - if yes - finish recovering upon receiving "start" command
// recovery is a process that drives cluster via recovery phase
//
// XXX draft: Cluster Recovery if []Stor is fixed
......@@ -293,6 +301,14 @@ func storCtlRecovery(ctx context.Context, link *NodeLink, res chan storRecovery)
}
// Cluster Verification
// --------------------
//
// - starts with operational parttab
// - tell all storages to perform data verificaion (TODO) and retreive last ids
// - once we are done without loosing too much storages in the process (so that
// parttab is still operational) we are ready to enter servicing state.
// verify is a process that drives cluster via verification phase
//
// prerequisite for start: .partTab is operational wrt .nodeTab
......@@ -347,6 +363,7 @@ loop:
}
if err != nil {
// XXX -> err = fmt.Errorf("... %v", err)
fmt.Printf("master: verify: %v\n", err)
// consume left verify responses (which should come without delay since it was cancelled)
......@@ -405,9 +422,19 @@ func storCtlVerify(ctx context.Context, link *NodeLink, res chan storVerify) {
}
// Cluster Running
// ---------------
//
// - starts with operational parttab and (enough ?) present storage nodes passed verification
// - monitor storages come & go and if parttab becomes non-operational leave to recovery
// - provide service to clients while we are here
//
// TODO also plan data movement on new storage nodes appearing
// service is the process that drives cluster during running state
//
// XXX draft: Cluster Running if []Stor is fixed
func (m *Master) runxxx(ctx context.Context, storv []*NodeLink) {
func (m *Master) service(ctx context.Context, storv []*NodeLink) {
// TODO
}
......
......@@ -55,7 +55,7 @@ const (
// initially ClusterRecovery, during which the master:
// - first recovers its own data by reading it from storage nodes;
// - waits for the partition table be operational;
// - automatically switch to ClusterVerifying if the cluster can be safely started.
// - automatically switch to ClusterVerifying if the cluster can be safely started. XXX not automatic
// Whenever the partition table becomes non-operational again, the cluster
// goes back to this state.
ClusterRecovering ClusterState = iota
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment