Commit 1786bc72 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 622ee617
...@@ -118,22 +118,25 @@ func (m *Master) run(ctx context.Context) { ...@@ -118,22 +118,25 @@ func (m *Master) run(ctx context.Context) {
// command to start cluster // command to start cluster
case c := <-m.ctlStart: case c := <-m.ctlStart:
if c.state == m.clusterState { if m.clusterState != ClusterRecovery {
// already there // start possible only from recovery
m.resp <- nil // XXX err ctx
c.resp <- fmt.Errorf("start: inappropriate current state: %v", m.clusterState)
break break
} }
switch c.state { // check preconditions for start
case RECOVERING: if !m.partTab.OperationalWith(m.nodeTab) {
case VERIFYING: // = RUNNING // XXX err ctx
case CLUSTER_RUNNING: // TODO + how much % PT is covered
case STOPPING: c.resp <- fmt.Errorf("start: non-operational partition table")
break
default:
// TODO
} }
// XXX cancel/stop/wait for current recovery task
// XXX start starting task
// command to stop cluster // command to stop cluster
case c := <-m.ctlStop: case c := <-m.ctlStop:
...@@ -303,6 +306,36 @@ func (m *Master) storCtlRecovery(ctx context.Context, link *NodeLink) { ...@@ -303,6 +306,36 @@ func (m *Master) storCtlRecovery(ctx context.Context, link *NodeLink) {
m.storRecovery <- storRecovery{partTab: pt} m.storRecovery <- storRecovery{partTab: pt}
} }
// storCtlVerify drives a storage node during cluster verifying (= starting) state
// XXX does this need to be a member on Master ?
func (m *Master) storCtlVerify(ctx context.Context, link *NodeLink) {
// XXX err context + link.Close on err
locked := AnswerLockedTransactions{}
err := Ask(&LockedTransactions, &locked)
if err != nil {
return // XXX err
}
if len(locked.TidDict) {
// TODO vvv
panic(fmt.Sprintf("non-ø locked txns in verify: %v", locked.TidDict))
}
last := AnswerLastIDs{}
err = Ask(&LastIDs, &last)
if err != nil {
return // XXX err
}
// XXX send this to driver (what to do with them ?) -> use for
// - oid allocations
// - next tid allocations etc
last.LastOID
last.LastTID
}
// allocUUID allocates new node uuid for a node of kind nodeType // allocUUID allocates new node uuid for a node of kind nodeType
// XXX it is bad idea for master to assign uuid to coming node // XXX it is bad idea for master to assign uuid to coming node
// -> better nodes generate really uniquie UUID themselves and always show with them // -> better nodes generate really uniquie UUID themselves and always show with them
......
...@@ -52,22 +52,22 @@ const ( ...@@ -52,22 +52,22 @@ const (
type ClusterState int32 type ClusterState int32
const ( const (
// Once the primary master is elected, the cluster has a state, which is // Once the primary master is elected, the cluster has a state, which is
// initially RECOVERING, during which the master: // initially ClusterRecovery, during which the master:
// - first recovers its own data by reading it from storage nodes; // - first recovers its own data by reading it from storage nodes;
// - waits for the partition table be operational; // - waits for the partition table be operational;
// - automatically switch to VERIFYING if the cluster can be safely started. // - automatically switch to ClusterVerifying if the cluster can be safely started.
// Whenever the partition table becomes non-operational again, the cluster // Whenever the partition table becomes non-operational again, the cluster
// goes back to this state. // goes back to this state.
RECOVERING ClusterState = iota ClusterRecovering ClusterState = iota
// Transient state, used to: // Transient state, used to:
// - replay the transaction log, in case of unclean shutdown; // - replay the transaction log, in case of unclean shutdown;
// - and actually truncate the DB if the user asked to do so. // - and actually truncate the DB if the user asked to do so.
// Then, the cluster either goes to RUNNING or STARTING_BACKUP state. // Then, the cluster either goes to ClusterRunning or STARTING_BACKUP state.
VERIFYING ClusterVerifying // XXX = ClusterStarting
// Normal operation. The DB is read-writable by clients. // Normal operation. The DB is read-writable by clients.
CLUSTER_RUNNING // XXX conflict with NodeState.RUNNING ClusterRunning
// Transient state to shutdown the whole cluster. // Transient state to shutdown the whole cluster.
STOPPING ClusterStopping
// Transient state, during which the master (re)connect to the upstream // Transient state, during which the master (re)connect to the upstream
// master. // master.
STARTING_BACKUP STARTING_BACKUP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment