.

e956f33c · Kirill Smelkov · a158ddd1 · e956f33c · e956f33c · e956f33c
Commit e956f33c authored Jan 19, 2021 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 16 deletions

go/neo/client.go go/neo/client.go +10 -11

go/neo/master.go go/neo/master.go +5 -3

go/neo/server.go go/neo/server.go +1 -1

go/neo/storage.go go/neo/storage.go +10 -1

No files found.
--- a/go/neo/client.go
+++ b/go/neo/client.go
@@ -195,6 +195,7 @@ func (c *Client) updateOperational() (sendReady func()) {
 //
 // The only error possible is if provided ctx cancels.
 // XXX and client stopped/closed? (ctx passed to Run cancelled)
+//
 // XXX change signature to call f from under withOperational ?
 func (c *Client) withOperational(ctx context.Context) error {
 	for {
@@ -222,7 +223,7 @@ func (c *Client) withOperational(ctx context.Context) error {
 // talkMaster connects to master, announces self and receives notifications.
 // it tries to persist master link reconnecting as needed.
 //
-// XXX C -> M for commit	(-> another channel)
+// TODO C -> M for commit  (-> another channel)
 //
 // XXX always error  (dup Storage.talkMaster) ?
 func (c *Client) talkMaster(ctx context.Context) (err error) {
@@ -318,7 +319,7 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) {
 	})

 	// init partition table and lastTid from master
-	// TODO better change protocol for master to send us pt/head via notify
+	// TODO better change protocol for master to send us head via notify
 	// channel right after identification.
 	wg.Go(func() error {
 		return c.initFromMaster(ctx, mlink)
@@ -327,7 +328,7 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) {
 	return wg.Wait()
 }

-// initFromMaster asks M for partTab and DB head right after identification.
+// initFromMaster asks M for DB head right after identification.
 func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (err error) {
 	defer task.Running(&ctx, "init")(&err)

@@ -362,15 +363,11 @@ func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (er
 		close(c.at0Ready)
 	}

-
-	// XXX what next?
 	return nil
-
-	// TODO transaction control? -> better in original goroutines doing the txn (just share mlink)
 }


-// recvMaster receives and handles notifications from master
+// recvMaster receives and handles notifications from master.
 func (c *Client) recvMaster(ctx context.Context, mlink *neonet.NodeLink) (err error) {
 	defer task.Running(&ctx, "rx")(&err)

@@ -388,7 +385,7 @@ func (c *Client) recvMaster(ctx context.Context, mlink *neonet.NodeLink) (err er
 	}
 }

-// recvMaster1 handles 1 message from master
+// recvMaster1 handles 1 message from master.
 func (c *Client) recvMaster1(ctx context.Context, req neonet.Request) error {
 	switch msg := req.Msg.(type) {
 	// <- committed txn
@@ -398,6 +395,7 @@ func (c *Client) recvMaster1(ctx context.Context, req neonet.Request) error {


 	// messages for state changes
+	// XXX -> NodeApp into common code to handle NodeTab + PartTab updates from M?
 	c.node.StateMu.Lock()

 	switch msg := req.Msg.(type) {
@@ -549,8 +547,9 @@ func (c *Client) Load(ctx context.Context, xid zodb.Xid) (buf *mem.Buf, serial z
 	if err != nil {
 		return nil, 0, err	// XXX err ctx
 	}
-	// FIXME ^^^ slink.CloseAccept after really dialed (not to deadlock if
-	// S decides to send us something)
+	// close accept after really dialed (not to deadlock if S decides to
+	// send us something).
+	slink.CloseAccept() // XXX need to close only after really dialed

 	// on the wire it comes as "before", not "at"
 	req := proto.GetObject{

--- a/go/neo/master.go
+++ b/go/neo/master.go
@@ -211,7 +211,7 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {
 }

 // runMain is the process that implements main master cluster management logic: node tracking, cluster
-// state updates, scheduling data movement between storage nodes etc.
+// state updates, scheduling data movement between storage nodes, etc.
 func (m *Master) runMain(ctx context.Context) (err error) {
 	defer task.Running(&ctx, "main")(&err)

@@ -220,6 +220,7 @@ func (m *Master) runMain(ctx context.Context) (err error) {
 	// XXX however since clients request state reading we should use node.StateMu?
 	// XXX -> better rework protocol so that master pushes itself (not
 	//     being pulled) to clients everything they need.
+	//     -> it was reworked (see bf240897)

 	for ctx.Err() == nil {
 		// recover partition table from storages and wait till enough
@@ -488,6 +489,7 @@ func storCtlRecovery(ctx context.Context, stor *Node, res chan storRecovery) {
 	defer task.Runningf(&ctx, "%s: stor recovery", slink.RemoteAddr())(&err)

 	// XXX cancel on ctx
+	// XXX close slink on err? (if yes -> xcontext.WithCloseOnErrCancel)

 	recovery := proto.AnswerRecovery{}
 	err = slink.Ask1(&proto.Recovery{}, &recovery)
@@ -678,8 +680,8 @@ type storVerify struct {

 // storCtlVerify drives a storage node during cluster verifying (= starting) state
 func storCtlVerify(ctx context.Context, stor *Node, pt *PartitionTable, res chan storVerify) {
-	// XXX link.Close on err
-	// XXX cancel on ctx
+	// XXX link.Close on err	-> = xcontext.WithCloseOnErrCancel
+	// XXX cancel on ctx		-> = ^^^

 	var err error
 	defer func() {

--- a/go/neo/server.go
+++ b/go/neo/server.go
-// Copyright (C) 2016-2020  Nexedi SA and Contributors.
+// Copyright (C) 2016-2021  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute

--- a/go/neo/storage.go
+++ b/go/neo/storage.go
@@ -103,6 +103,7 @@ func (stor *Storage) Run(ctx context.Context, l xnet.Listener) (err error) {

 	//stor.node.OnShutdown = serveCancel
 	// XXX hack: until ctx cancel is not handled properly by Recv/Send
+	// XXX -> xcontext.WithCloseOnRetCancel
 	stor.node.OnShutdown = func() {
 		serveCancel()
 		lclose(ctx, lli)
@@ -156,6 +157,7 @@ func (stor *Storage) Run(ctx context.Context, l xnet.Listener) (err error) {
 	serveCancel()
 	wg.Wait()

+	// XXX should Storage do it, or should it leave back non-closed?
 	err2 := stor.back.Close()
 	if err == nil {
 		err = err2
@@ -221,6 +223,9 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) {
 		stor.node.MyInfo.UUID = accept.YourUUID
 	}

+	// XXX the first packet M sends always is NotifyNodeInformation (with us)
+	// -> receive it first via Expect1
+
 	// handle notifications and commands from master

 	// let master initialize us. If successful this ends with StartOperation command.
@@ -230,7 +235,7 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) {
 		return err
 	}

-	// we got StartOperation command. Let master drive us during servicing phase.
+	// we got StartOperation command. Let master drive us during service phase.
 	err = stor.m1serve(ctx, reqStart)
 	//log.Error(ctx, err)
 	return err
@@ -310,6 +315,7 @@ func (stor *Storage) m1initialize1(ctx context.Context, req neonet.Request) erro

 		err = req.Reply(&proto.AnswerLastIDs{LastTid: lastTid, LastOid: lastOid})

+	// XXX -> somehow to common part in NodeApp ?
 	case *proto.SendPartitionTable:
 		// TODO M sends us whole PT -> save locally
 		stor.node.UpdatePartTab(ctx, msg)	// XXX lock?  XXX handle msg.NumReplicas
@@ -381,6 +387,8 @@ func (stor *Storage) m1serve1(ctx context.Context, req neonet.Request) error {
 	case *proto.StopOperation:
 		return fmt.Errorf("stop requested")

+	// should be served by NodeApp.commonRecv1
+	// ---- 8< ----
 	// XXX SendPartitionTable?
 	// XXX NotifyPartitionChanges?

@@ -389,6 +397,7 @@ func (stor *Storage) m1serve1(ctx context.Context, req neonet.Request) error {

 	case *proto.NotifyClusterState:
 		stor.node.UpdateClusterState(ctx, msg)	// XXX lock? what to do with it?
+	// ---- 8< ----

 	// TODO commit related messages
 	}