Commit 24c420c0 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 3c25e926
...@@ -414,7 +414,7 @@ func (m *Master) recovery(ctx context.Context) (err error) { ...@@ -414,7 +414,7 @@ func (m *Master) recovery(ctx context.Context) (err error) {
} }
// XXX set cluster state = RECOVERY // XXX set cluster state = RECOVERY
// XXX close links to clients // XXX down clients
// start recovery on all storages we are currently in touch with // start recovery on all storages we are currently in touch with
for _, stor := range m.node.State.NodeTab.StorageList() { for _, stor := range m.node.State.NodeTab.StorageList() {
...@@ -463,54 +463,41 @@ loop: ...@@ -463,54 +463,41 @@ loop:
case ech := <-m.ctlStop: case ech := <-m.ctlStop:
close(ech) // ok; we are already recovering close(ech) // ok; we are already recovering
// new connection comes in // new connection comes in and asks to be identified
case n := <-m.nodeComeq: case n := <-m.nodeComeq:
peer, ok := m.identify(ctx, n, /* XXX only accept storages -> PENDING */) peer, ok := m.identify(ctx, n,
/* XXX only accept storages -> PENDING | MASTER */
)
if !ok { if !ok {
break break
} }
// if new storage arrived - start recovery on it too // S -> start recovery
inprogress++ if peer.node.Type != proto.STORAGE {
peer.wg.Go(func(peerCtx context.Context) error { break
err := m.accept(peerCtx, peer) }
if err != nil {
return err // XXX -> recoveryq
}
// XXX wg.Add(1) + defer wg.Done() ?
peer.wg.Go(func(peerCtx context.Context) error {
ctx, cancel := xxcontext.Merge/*Cancel*/(ctx, peerCtx) ctx, cancel := xxcontext.Merge/*Cancel*/(ctx, peerCtx)
//defer cancel() defer cancel()
})
wg.Add(1)
go func() {
defer wg.Done()
// start recovery
var pt *xneo.PartitionTable var pt *xneo.PartitionTable
err := acceptAndRun(func(workCtx context.Context, node *xneo.PeerNode) error { err := peer.run(ctx, func(...) {
ctx, cancel := xxcontext.Merge/*Cancel*/(ctx, workCtx) pt, err = storCtlRecovery(...)
defer cancel()
pt, err = storCtlRecovery(ctx, node)
return err
}) })
ack := make(chan struct{}) ack := make(chan struct{})
recoveryq <- storRecovery{stor: peer.node, partTab: pt, err: err, ack: ack} recoveryq <- storRecovery{stor: peer, partTab: pt, err: err, ack: ack}
<-ack <-ack
/* // canceled recovery does not mean we should down the peer
err := m.accept(node, state0, n.req, resp) if xcontext.Canceled(err) {
if err != nil { err = nil
recovery <- storRecovery{stor: node, err: err}
return
} }
return err
})
// start recovery
storCtlRecovery(ctx, node, recovery)
*/
}()
// XXX move up (before nodeComeq) ? // XXX move up (before nodeComeq) ?
case n := <-m.nodeLeaveq: case n := <-m.nodeLeaveq:
...@@ -1316,13 +1303,15 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (peer *_MasteredPeer, ...@@ -1316,13 +1303,15 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (peer *_MasteredPeer,
} }
m.peerTab[node.NID] = peer m.peerTab[node.NID] = peer
// XXX peer.wg.Go(m.accept)
return peer, true return peer, true
} }
// accept sends acceptance to just identified peer, sends nodeTab and partTab // accept sends acceptance to just identified peer, sends nodeTab and partTab
// and spawns task to proxy their updates to the peer. XXX // and spawns task to proxy their updates to the peer. XXX
// XXX +ctx? // XXX +ctx?
func (m *Master) accept(p *_MasteredPeer, idReq *neonet.Request) error { func (m *Master) __accept(p *_MasteredPeer, idReq *neonet.Request) error {
// XXX errctx? // XXX errctx?
err := idReq.Reply(p.accept) err := idReq.Reply(p.accept)
if err != nil { if err != nil {
...@@ -1349,6 +1338,8 @@ func (m *Master) accept(p *_MasteredPeer, idReq *neonet.Request) error { ...@@ -1349,6 +1338,8 @@ func (m *Master) accept(p *_MasteredPeer, idReq *neonet.Request) error {
// XXX send clusterState too? (NEO/py does not send it) // XXX send clusterState too? (NEO/py does not send it)
// TODO indicate that initial phase of accept is done
p.wg.Go(p.notify) // main -> peer δnodeTab/δpartTab/δcluterState to proxy to peer link p.wg.Go(p.notify) // main -> peer δnodeTab/δpartTab/δcluterState to proxy to peer link
m.mainWG.Go(p.waitAll) // main <- peer "peer (should be) disconnected" m.mainWG.Go(p.waitAll) // main <- peer "peer (should be) disconnected"
return nil return nil
...@@ -1420,6 +1411,7 @@ func (p *_MasteredPeer) notify(ctx context.Context) (err error) { ...@@ -1420,6 +1411,7 @@ func (p *_MasteredPeer) notify(ctx context.Context) (err error) {
// waitAll waits for all tasks related to peer to complete and then notifies // waitAll waits for all tasks related to peer to complete and then notifies
// main that peer node should go. It is spawned under mainWG. // main that peer node should go. It is spawned under mainWG.
// XXX naming -> wait? // XXX naming -> wait?
// XXX inline into identify
func (p *_MasteredPeer) waitAll(_ context.Context) error { func (p *_MasteredPeer) waitAll(_ context.Context) error {
// don't take our ctx into account - it is ~ runCtx and should be // don't take our ctx into account - it is ~ runCtx and should be
// parent of context under which per-peer tasks are spawned. This way // parent of context under which per-peer tasks are spawned. This way
...@@ -1431,6 +1423,19 @@ func (p *_MasteredPeer) waitAll(_ context.Context) error { ...@@ -1431,6 +1423,19 @@ func (p *_MasteredPeer) waitAll(_ context.Context) error {
return nil // XXX or ctx.Err() ? return nil // XXX or ctx.Err() ?
} }
// XXX run runs f after initial phase of peer acceptance is over.
//
// XXX this is very similar if a separate Accept call would return peers
// already identified and answered with initial accept message sequence.
// However identification needs decisions from main task (to e.g. consult
// nodeTab to see if peer laddr is not overlapping with anyone's, and to assign
// nid). Because main is involved we cannot move it to completely separate task
// and give main only one Accept entry point to call.
func (p *_MasteredPeer) run(f) error {
// XXX wait p.acceptDone
// XXX f()
}
// allocNID allocates new node ID for a node of kind nodeType. // allocNID allocates new node ID for a node of kind nodeType.
// XXX it is bad idea for master to assign node ID to coming node // XXX it is bad idea for master to assign node ID to coming node
// -> better nodes generate really unique UUID themselves and always show with them // -> better nodes generate really unique UUID themselves and always show with them
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment