Commit 1786bc72 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 622ee617
......@@ -118,22 +118,25 @@ func (m *Master) run(ctx context.Context) {
// command to start cluster
case c := <-m.ctlStart:
if c.state == m.clusterState {
// already there
m.resp <- nil
if m.clusterState != ClusterRecovery {
// start possible only from recovery
// XXX err ctx
c.resp <- fmt.Errorf("start: inappropriate current state: %v", m.clusterState)
break
}
switch c.state {
case RECOVERING:
case VERIFYING: // = RUNNING
case CLUSTER_RUNNING:
case STOPPING:
default:
// TODO
// check preconditions for start
if !m.partTab.OperationalWith(m.nodeTab) {
// XXX err ctx
// TODO + how much % PT is covered
c.resp <- fmt.Errorf("start: non-operational partition table")
break
}
// XXX cancel/stop/wait for current recovery task
// XXX start starting task
// command to stop cluster
case c := <-m.ctlStop:
......@@ -303,6 +306,36 @@ func (m *Master) storCtlRecovery(ctx context.Context, link *NodeLink) {
m.storRecovery <- storRecovery{partTab: pt}
}
// storCtlVerify drives a storage node during cluster verifying (= starting) state
// XXX does this need to be a member on Master ?
func (m *Master) storCtlVerify(ctx context.Context, link *NodeLink) {
// XXX err context + link.Close on err
locked := AnswerLockedTransactions{}
err := Ask(&LockedTransactions, &locked)
if err != nil {
return // XXX err
}
if len(locked.TidDict) {
// TODO vvv
panic(fmt.Sprintf("non-ø locked txns in verify: %v", locked.TidDict))
}
last := AnswerLastIDs{}
err = Ask(&LastIDs, &last)
if err != nil {
return // XXX err
}
// XXX send this to driver (what to do with them ?) -> use for
// - oid allocations
// - next tid allocations etc
last.LastOID
last.LastTID
}
// allocUUID allocates new node uuid for a node of kind nodeType
// XXX it is bad idea for master to assign uuid to coming node
// -> better nodes generate really uniquie UUID themselves and always show with them
......
......@@ -52,22 +52,22 @@ const (
type ClusterState int32
const (
// Once the primary master is elected, the cluster has a state, which is
// initially RECOVERING, during which the master:
// initially ClusterRecovery, during which the master:
// - first recovers its own data by reading it from storage nodes;
// - waits for the partition table be operational;
// - automatically switch to VERIFYING if the cluster can be safely started.
// - automatically switch to ClusterVerifying if the cluster can be safely started.
// Whenever the partition table becomes non-operational again, the cluster
// goes back to this state.
RECOVERING ClusterState = iota
ClusterRecovering ClusterState = iota
// Transient state, used to:
// - replay the transaction log, in case of unclean shutdown;
// - and actually truncate the DB if the user asked to do so.
// Then, the cluster either goes to RUNNING or STARTING_BACKUP state.
VERIFYING
// Then, the cluster either goes to ClusterRunning or STARTING_BACKUP state.
ClusterVerifying // XXX = ClusterStarting
// Normal operation. The DB is read-writable by clients.
CLUSTER_RUNNING // XXX conflict with NodeState.RUNNING
ClusterRunning
// Transient state to shutdown the whole cluster.
STOPPING
ClusterStopping
// Transient state, during which the master (re)connect to the upstream
// master.
STARTING_BACKUP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment