Commit f4b87d77 authored by Kirill Smelkov's avatar Kirill Smelkov

Sync with NEO/py v1.12

parents 0e0820ab 837f49d1
Change History Change History
============== ==============
1.12 (2019-04-28)
-----------------
Most changes in this version focus on the ability to migrate efficiently
and reliably a big ZODB to NEO, which required changes in the protocol.
See testSplitAndMakeResilientUsingClone for an example of scenario.
Better cluster management:
- New --new-nid storage option for fast cloning.
- The number of wanted replicas is now a property of the database, which is
modifiable when the cluster is running, and reported by `neoctl print pt`.
- Better error reporting from the master to neoctl for denied requests.
- tweak: do not touch cells of nodes that are intended to be dropped.
- tweak: do not crash when trying to remove all nodes.
- tweak: new neoctl option to ask the master to simulate.
- neoctl: better display of full partition tables.
- master: reject drop/tweak commands that could lead to unwanted status.
Importer:
- Fix possible data loss on writeback.
- v1.9 broke replication (as source) once the import is finished.
- Speed up startup when the import is already finished.
- Fix closure of ZODB, and also do it when the import is finished.
- Fix hidden "maximum recursion depth exceeded" at startup.
- Fix resumption when using SQLite.
- v1.10 broke resumption when there are new transactions since the import
started.
MySQL:
- Better support of RocksDB by specifying column families.
- Fix handling of connection strings (--database) without credentials.
1.11 (2019-03-11) 1.11 (2019-03-11)
----------------- -----------------
......
...@@ -258,6 +258,37 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) { ...@@ -258,6 +258,37 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) {
c.node.MyInfo.UUID = accept.YourUUID c.node.MyInfo.UUID = accept.YourUUID
} }
wg, ctx := errgroup.WithContext(ctx) // XXX -> xsync.WorkGroup
defer xio.CloseWhenDone(ctx, mlink)()
// master pushes nodeTab and partTab to us right after identification
// XXX merge into -> node.DialMaster?
// nodeTab
mnt := proto.NotifyNodeInformation{}
_, err = mlink.Expect1(&mnt)
if err != nil {
return fmt.Errorf("after identification: %w", err)
}
// partTab
mpt := proto.SendPartitionTable{}
_, err = mlink.Expect1(&mpt)
if err != nil {
return fmt.Errorf("after identification: %w", err)
}
pt := PartTabFromDump(mpt.PTid, mpt.RowList) // TODO handle mpt.NumReplicas
log.Infof(ctx, "master initialized us with next parttab:\n%s", pt)
c.node.StateMu.Lock()
c.node.UpdateNodeTab(ctx, &mnt)
c.node.PartTab = pt
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
// set c.mlink and notify waiters // set c.mlink and notify waiters
c.mlinkMu.Lock() c.mlinkMu.Lock()
c.mlink = mlink c.mlink = mlink
...@@ -266,10 +297,6 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) { ...@@ -266,10 +297,6 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) {
c.mlinkMu.Unlock() c.mlinkMu.Unlock()
close(ready) close(ready)
wg, ctx := errgroup.WithContext(ctx) // XXX -> xsync.WorkGroup
defer xio.CloseWhenDone(ctx, mlink)()
// when we are done - reset .mlink // when we are done - reset .mlink
defer func() { defer func() {
c.mlinkMu.Lock() c.mlinkMu.Lock()
...@@ -298,21 +325,6 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) { ...@@ -298,21 +325,6 @@ func (c *Client) talkMaster1(ctx context.Context) (err error) {
func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (err error) { func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (err error) {
defer task.Running(&ctx, "init")(&err) defer task.Running(&ctx, "init")(&err)
// query partTab
rpt := proto.AnswerPartitionTable{}
err = mlink.Ask1(&proto.AskPartitionTable{}, &rpt)
if err != nil {
return err
}
pt := PartTabFromDump(rpt.PTid, rpt.RowList)
log.Infof(ctx, "master initialized us with next parttab:\n%s", pt)
c.node.StateMu.Lock()
c.node.PartTab = pt
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
// query last_tid // query last_tid
lastTxn := proto.AnswerLastTransaction{} lastTxn := proto.AnswerLastTransaction{}
err = mlink.Ask1(&proto.LastTransaction{}, &lastTxn) err = mlink.Ask1(&proto.LastTransaction{}, &lastTxn)
......
// Copyright (C) 2017-2018 Nexedi SA and Contributors. // Copyright (C) 2017-2020 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
...@@ -505,7 +505,7 @@ func storCtlRecovery(ctx context.Context, stor *Node, res chan storRecovery) { ...@@ -505,7 +505,7 @@ func storCtlRecovery(ctx context.Context, stor *Node, res chan storRecovery) {
} }
// reconstruct partition table from response // reconstruct partition table from response
pt := PartTabFromDump(resp.PTid, resp.RowList) pt := PartTabFromDump(resp.PTid, resp.RowList) // TODO handle resp.NumReplicas
res <- storRecovery{stor: stor, partTab: pt} res <- storRecovery{stor: stor, partTab: pt}
} }
...@@ -696,6 +696,7 @@ func storCtlVerify(ctx context.Context, stor *Node, pt *PartitionTable, res chan ...@@ -696,6 +696,7 @@ func storCtlVerify(ctx context.Context, stor *Node, pt *PartitionTable, res chan
// send just recovered parttab so storage saves it // send just recovered parttab so storage saves it
err = slink.Send1(&proto.SendPartitionTable{ err = slink.Send1(&proto.SendPartitionTable{
PTid: pt.PTid, PTid: pt.PTid,
NumReplicas: 0, // FIXME hardcoded
RowList: pt.Dump(), RowList: pt.Dump(),
}) })
if err != nil { if err != nil {
...@@ -898,6 +899,9 @@ func (m *Master) serveClient(ctx context.Context, cli *Node) (err error) { ...@@ -898,6 +899,9 @@ func (m *Master) serveClient(ctx context.Context, cli *Node) (err error) {
wg, ctx := errgroup.WithContext(ctx) wg, ctx := errgroup.WithContext(ctx)
defer xio.CloseWhenDone(ctx, clink)() // XXX -> cli.CloseLink? defer xio.CloseWhenDone(ctx, clink)() // XXX -> cli.CloseLink?
// FIXME send initial nodeTab and partTab before starting serveClient1
// (move those initial sends from keepPeerUpdated to here)
// M -> C notifications about cluster state // M -> C notifications about cluster state
wg.Go(func() error { wg.Go(func() error {
return m.keepPeerUpdated(ctx, clink) return m.keepPeerUpdated(ctx, clink)
...@@ -926,15 +930,6 @@ func (m *Master) serveClient(ctx context.Context, cli *Node) (err error) { ...@@ -926,15 +930,6 @@ func (m *Master) serveClient(ctx context.Context, cli *Node) (err error) {
// serveClient1 prepares response for 1 request from client // serveClient1 prepares response for 1 request from client
func (m *Master) serveClient1(ctx context.Context, req proto.Msg) (resp proto.Msg) { func (m *Master) serveClient1(ctx context.Context, req proto.Msg) (resp proto.Msg) {
switch req := req.(type) { switch req := req.(type) {
case *proto.AskPartitionTable:
m.node.StateMu.RLock()
rpt := &proto.AnswerPartitionTable{
PTid: m.node.PartTab.PTid,
RowList: m.node.PartTab.Dump(),
}
m.node.StateMu.RUnlock()
return rpt
case *proto.LastTransaction: case *proto.LastTransaction:
// XXX lock // XXX lock
return &proto.AnswerLastTransaction{m.lastTid} return &proto.AnswerLastTransaction{m.lastTid}
...@@ -965,6 +960,10 @@ func (m *Master) keepPeerUpdated(ctx context.Context, link *neonet.NodeLink) (er ...@@ -965,6 +960,10 @@ func (m *Master) keepPeerUpdated(ctx context.Context, link *neonet.NodeLink) (er
nodeiv[i] = node.NodeInfo nodeiv[i] = node.NodeInfo
} }
ptid := m.node.PartTab.PTid
ptnr := uint32(0) // FIXME hardcoded NumReplicas; NEO/py keeps this as n(replica)-1
ptv := m.node.PartTab.Dump()
// XXX RLock is not enough for subscribe - right? // XXX RLock is not enough for subscribe - right?
nodech, nodeUnsubscribe := m.node.NodeTab.SubscribeBuffered() nodech, nodeUnsubscribe := m.node.NodeTab.SubscribeBuffered()
...@@ -990,6 +989,16 @@ func (m *Master) keepPeerUpdated(ctx context.Context, link *neonet.NodeLink) (er ...@@ -990,6 +989,16 @@ func (m *Master) keepPeerUpdated(ctx context.Context, link *neonet.NodeLink) (er
return err return err
} }
err = link.Send1(&proto.SendPartitionTable{
PTid: ptid,
NumReplicas: ptnr,
RowList: ptv,
})
if err != nil {
return err
}
// now proxy the updates until we are done // now proxy the updates until we are done
for { for {
var msg proto.Msg var msg proto.Msg
...@@ -1081,8 +1090,6 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *Node, resp pro ...@@ -1081,8 +1090,6 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *Node, resp pro
accept := &proto.AcceptIdentification{ accept := &proto.AcceptIdentification{
NodeType: proto.MASTER, NodeType: proto.MASTER,
MyUUID: m.node.MyInfo.UUID, MyUUID: m.node.MyInfo.UUID,
NumPartitions: 1, // FIXME hardcoded
NumReplicas: 0, // FIXME hardcoded (neo/py meaning for n(replica) is `n(real-replica) - 1`)
YourUUID: uuid, YourUUID: uuid,
} }
......
...@@ -113,8 +113,9 @@ func (app *NodeApp) Dial(ctx context.Context, peerType proto.NodeType, addr stri ...@@ -113,8 +113,9 @@ func (app *NodeApp) Dial(ctx context.Context, peerType proto.NodeType, addr stri
UUID: app.MyInfo.UUID, UUID: app.MyInfo.UUID,
Address: app.MyInfo.Addr, Address: app.MyInfo.Addr,
ClusterName: app.ClusterName, ClusterName: app.ClusterName,
DevPath: nil, // XXX stub
IdTime: app.MyInfo.IdTime, // XXX ok? IdTime: app.MyInfo.IdTime, // XXX ok?
DevPath: nil, // XXX stub
NewNID: nil, // XXX stub
} }
accept := &proto.AcceptIdentification{} accept := &proto.AcceptIdentification{}
// FIXME error if peer sends us something with another connID // FIXME error if peer sends us something with another connID
...@@ -323,7 +324,7 @@ func (app *NodeApp) UpdateNodeTab(ctx context.Context, msg *proto.NotifyNodeInfo ...@@ -323,7 +324,7 @@ func (app *NodeApp) UpdateNodeTab(ctx context.Context, msg *proto.NotifyNodeInfo
// UpdatePartTab applies updates to .PartTab from message and logs changes appropriately. // UpdatePartTab applies updates to .PartTab from message and logs changes appropriately.
func (app *NodeApp) UpdatePartTab(ctx context.Context, msg *proto.SendPartitionTable) { func (app *NodeApp) UpdatePartTab(ctx context.Context, msg *proto.SendPartitionTable) {
pt := PartTabFromDump(msg.PTid, msg.RowList) pt := PartTabFromDump(msg.PTid, msg.RowList) // FIXME handle msg.NumReplicas
// XXX logging under lock // XXX logging under lock
log.Infof(ctx, "parttab update: %v", pt) log.Infof(ctx, "parttab update: %v", pt)
app.PartTab = pt app.PartTab = pt
......
...@@ -107,8 +107,9 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -107,8 +107,9 @@ func TestMasterStorage(t0 *testing.T) {
UUID: 0, UUID: 0,
Address: xnaddr("s:1"), Address: xnaddr("s:1"),
ClusterName: "abc1", ClusterName: "abc1",
DevPath: nil,
IdTime: proto.IdTimeNone, IdTime: proto.IdTimeNone,
DevPath: nil,
NewNID: nil,
})) }))
...@@ -117,8 +118,6 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -117,8 +118,6 @@ func TestMasterStorage(t0 *testing.T) {
tSM.Expect(conntx("m:2", "s:2", 1, &proto.AcceptIdentification{ tSM.Expect(conntx("m:2", "s:2", 1, &proto.AcceptIdentification{
NodeType: proto.MASTER, NodeType: proto.MASTER,
MyUUID: proto.UUID(proto.MASTER, 1), MyUUID: proto.UUID(proto.MASTER, 1),
NumPartitions: 1,
NumReplicas: 0,
YourUUID: proto.UUID(proto.STORAGE, 1), YourUUID: proto.UUID(proto.STORAGE, 1),
})) }))
...@@ -136,6 +135,7 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -136,6 +135,7 @@ func TestMasterStorage(t0 *testing.T) {
tMS.Expect(conntx("m:2", "s:2", 2, &proto.AskPartitionTable{})) tMS.Expect(conntx("m:2", "s:2", 2, &proto.AskPartitionTable{}))
tMS.Expect(conntx("s:2", "m:2", 2, &proto.AnswerPartitionTable{ tMS.Expect(conntx("s:2", "m:2", 2, &proto.AnswerPartitionTable{
PTid: 0, PTid: 0,
NumReplicas: 0,
RowList: []proto.RowInfo{}, RowList: []proto.RowInfo{},
})) }))
...@@ -162,8 +162,9 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -162,8 +162,9 @@ func TestMasterStorage(t0 *testing.T) {
tMS.Expect(conntx("m:2", "s:2", 4, &proto.SendPartitionTable{ tMS.Expect(conntx("m:2", "s:2", 4, &proto.SendPartitionTable{
PTid: 1, PTid: 1,
NumReplicas: 0,
RowList: []proto.RowInfo{ RowList: []proto.RowInfo{
{0, []proto.CellInfo{{proto.UUID(proto.STORAGE, 1), proto.UP_TO_DATE}}}, {[]proto.CellInfo{{proto.UUID(proto.STORAGE, 1), proto.UP_TO_DATE}}},
}, },
})) }))
...@@ -209,8 +210,9 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -209,8 +210,9 @@ func TestMasterStorage(t0 *testing.T) {
UUID: 0, UUID: 0,
Address: xnaddr(""), Address: xnaddr(""),
ClusterName: "abc1", ClusterName: "abc1",
DevPath: nil,
IdTime: proto.IdTimeNone, IdTime: proto.IdTimeNone,
DevPath: nil,
NewNID: nil,
})) }))
tM.Expect(δnode("m", "", proto.CLIENT, 1, proto.RUNNING, 0.02)) tM.Expect(δnode("m", "", proto.CLIENT, 1, proto.RUNNING, 0.02))
...@@ -218,25 +220,10 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -218,25 +220,10 @@ func TestMasterStorage(t0 *testing.T) {
tCM.Expect(conntx("m:3", "c:1", 1, &proto.AcceptIdentification{ tCM.Expect(conntx("m:3", "c:1", 1, &proto.AcceptIdentification{
NodeType: proto.MASTER, NodeType: proto.MASTER,
MyUUID: proto.UUID(proto.MASTER, 1), MyUUID: proto.UUID(proto.MASTER, 1),
NumPartitions: 1,
NumReplicas: 0,
YourUUID: proto.UUID(proto.CLIENT, 1), YourUUID: proto.UUID(proto.CLIENT, 1),
})) }))
// C asks M about PT and last_tid
// NOTE this might come in parallel with vvv "C <- M NotifyNodeInformation C1,M1,S1"
tCM.Expect(conntx("c:1", "m:3", 3, &proto.AskPartitionTable{}))
tCM.Expect(conntx("m:3", "c:1", 3, &proto.AnswerPartitionTable{
PTid: 1,
RowList: []proto.RowInfo{
{0, []proto.CellInfo{{proto.UUID(proto.STORAGE, 1), proto.UP_TO_DATE}}},
},
}))
tCM.Expect(conntx("c:1", "m:3", 5, &proto.LastTransaction{}))
tCM.Expect(conntx("m:3", "c:1", 5, &proto.AnswerLastTransaction{lastTid}))
// C <- M NotifyNodeInformation C1,M1,S1 // C <- M NotifyNodeInformation C1,M1,S1
// NOTE this might come in parallel with ^^^ "C asks M about PT"
tMC.Expect(conntx("m:3", "c:1", 0, &proto.NotifyNodeInformation{ tMC.Expect(conntx("m:3", "c:1", 0, &proto.NotifyNodeInformation{
IdTime: proto.IdTimeNone, // XXX ? IdTime: proto.IdTimeNone, // XXX ?
NodeList: []proto.NodeInfo{ NodeList: []proto.NodeInfo{
...@@ -246,10 +233,22 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -246,10 +233,22 @@ func TestMasterStorage(t0 *testing.T) {
}, },
})) }))
tMC.Expect(conntx("m:3", "c:1", 2, &proto.SendPartitionTable{
PTid: 1,
NumReplicas: 0,
RowList: []proto.RowInfo{
{[]proto.CellInfo{{proto.UUID(proto.STORAGE, 1), proto.UP_TO_DATE}}},
},
}))
tC.Expect(δnode("c", "m:1", proto.MASTER, 1, proto.RUNNING, proto.IdTimeNone)) tC.Expect(δnode("c", "m:1", proto.MASTER, 1, proto.RUNNING, proto.IdTimeNone))
tC.Expect(δnode("c", "s:1", proto.STORAGE, 1, proto.RUNNING, 0.01)) tC.Expect(δnode("c", "s:1", proto.STORAGE, 1, proto.RUNNING, 0.01))
tC.Expect(δnode("c", "", proto.CLIENT, 1, proto.RUNNING, 0.02)) tC.Expect(δnode("c", "", proto.CLIENT, 1, proto.RUNNING, 0.02))
// C asks M last_tid
tCM.Expect(conntx("c:1", "m:3", 3, &proto.LastTransaction{}))
tCM.Expect(conntx("m:3", "c:1", 3, &proto.AnswerLastTransaction{lastTid}))
// ---------------------------------------- // ----------------------------------------
// C asks M about last tid XXX better master sends it itself on new client connected // C asks M about last tid XXX better master sends it itself on new client connected
...@@ -262,8 +261,8 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -262,8 +261,8 @@ func TestMasterStorage(t0 *testing.T) {
} }
}) })
tCM.Expect(conntx("c:1", "m:3", 7, &proto.LastTransaction{})) tCM.Expect(conntx("c:1", "m:3", 5, &proto.LastTransaction{}))
tCM.Expect(conntx("m:3", "c:1", 7, &proto.AnswerLastTransaction{ tCM.Expect(conntx("m:3", "c:1", 5, &proto.AnswerLastTransaction{
Tid: lastTid, Tid: lastTid,
})) }))
...@@ -295,15 +294,14 @@ func TestMasterStorage(t0 *testing.T) { ...@@ -295,15 +294,14 @@ func TestMasterStorage(t0 *testing.T) {
UUID: proto.UUID(proto.CLIENT, 1), UUID: proto.UUID(proto.CLIENT, 1),
Address: xnaddr(""), Address: xnaddr(""),
ClusterName: "abc1", ClusterName: "abc1",
DevPath: nil,
IdTime: 0.02, IdTime: 0.02,
DevPath: nil,
NewNID: nil,
})) }))
tCS.Expect(conntx("s:3", "c:2", 1, &proto.AcceptIdentification{ tCS.Expect(conntx("s:3", "c:2", 1, &proto.AcceptIdentification{
NodeType: proto.STORAGE, NodeType: proto.STORAGE,
MyUUID: proto.UUID(proto.STORAGE, 1), MyUUID: proto.UUID(proto.STORAGE, 1),
NumPartitions: 1,
NumReplicas: 0,
YourUUID: proto.UUID(proto.CLIENT, 1), YourUUID: proto.UUID(proto.CLIENT, 1),
})) }))
......
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
// object. The Request contains the message received and internally the // object. The Request contains the message received and internally the
// connection. A response can be sent back via Request.Reply. Then once // connection. A response can be sent back via Request.Reply. Then once
// Request.Close is called the connection object that was accepted is // Request.Close is called the connection object that was accepted is
// immediately put back into pool for later reuse. // immediately put back into pool for later reuse. XXX Expect1
package neonet package neonet
// XXX neonet compatibility with NEO/py depends on the following small NEO/py patch: // XXX neonet compatibility with NEO/py depends on the following small NEO/py patch:
...@@ -1552,6 +1552,24 @@ func (link *NodeLink) Send1(msg proto.Msg) error { ...@@ -1552,6 +1552,24 @@ func (link *NodeLink) Send1(msg proto.Msg) error {
return err return err
} }
// Expect1 receives notification in 1-1 model.
//
// See Conn.Expect for semantic details.
//
// See "Lightweight mode" in top-level package doc for overview.
func (link *NodeLink) Expect1(msgv ...proto.Msg) (which int, err error) {
// XXX a bit dup wrt Recv1
conn, err := link.Accept()
if err != nil {
return -1, err
}
// NOTE serveRecv guaranty that when a conn is accepted, there is 1 message in conn.rxq
which, err = conn.Expect(msgv...)
conn.lightClose()
return which, err
}
// Ask1 sends request and receives response in 1-1 model. // Ask1 sends request and receives response in 1-1 model.
// //
......
// Copyright (C) 2017 Nexedi SA and Contributors. // Copyright (C) 2017-2020 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com> // Kirill Smelkov <kirr@nexedi.com>
// //
// This program is free software: you can Use, Study, Modify and Redistribute // This program is free software: you can Use, Study, Modify and Redistribute
...@@ -254,7 +254,7 @@ func (pt *PartitionTable) Dump() []proto.RowInfo { // XXX also include .ptid? -> ...@@ -254,7 +254,7 @@ func (pt *PartitionTable) Dump() []proto.RowInfo { // XXX also include .ptid? ->
cellv[j] = cell.CellInfo cellv[j] = cell.CellInfo
} }
rowv[i] = proto.RowInfo{Offset: uint32(i), CellList: cellv} // XXX cast? rowv[i] = proto.RowInfo{CellList: cellv}
} }
return rowv return rowv
} }
...@@ -264,9 +264,8 @@ func PartTabFromDump(ptid proto.PTid, rowv []proto.RowInfo) *PartitionTable { ...@@ -264,9 +264,8 @@ func PartTabFromDump(ptid proto.PTid, rowv []proto.RowInfo) *PartitionTable {
pt := &PartitionTable{} pt := &PartitionTable{}
pt.PTid = ptid pt.PTid = ptid
for _, row := range rowv { for i, row := range rowv {
i := row.Offset for i >= len(pt.tab) {
for i >= uint32(len(pt.tab)) {
pt.tab = append(pt.tab, []Cell{}) pt.tab = append(pt.tab, []Cell{})
} }
......
...@@ -81,7 +81,7 @@ const ( ...@@ -81,7 +81,7 @@ const (
// The protocol version must be increased whenever upgrading a node may require // The protocol version must be increased whenever upgrading a node may require
// to upgrade other nodes. It is encoded as a 4-bytes big-endian integer and // to upgrade other nodes. It is encoded as a 4-bytes big-endian integer and
// the high order byte 0 is different from TLS Handshake (0x16). // the high order byte 0 is different from TLS Handshake (0x16).
Version = 5 Version = 6
// length of packet header // length of packet header
PktHeaderLen = 10 // = unsafe.Sizeof(PktHeader{}), but latter gives typed constant (uintptr) PktHeaderLen = 10 // = unsafe.Sizeof(PktHeader{}), but latter gives typed constant (uintptr)
...@@ -138,6 +138,7 @@ var ErrDecodeOverflow = errors.New("decode: buffer overflow") ...@@ -138,6 +138,7 @@ var ErrDecodeOverflow = errors.New("decode: buffer overflow")
type ErrorCode int8 type ErrorCode int8
const ( const (
ACK ErrorCode = iota ACK ErrorCode = iota
DENIED
NOT_READY NOT_READY
OID_NOT_FOUND OID_NOT_FOUND
TID_NOT_FOUND TID_NOT_FOUND
...@@ -346,7 +347,6 @@ type CellInfo struct { ...@@ -346,7 +347,6 @@ type CellInfo struct {
//neo:proto typeonly //neo:proto typeonly
type RowInfo struct { type RowInfo struct {
Offset uint32 // PNumber XXX -> Pid
CellList []CellInfo CellList []CellInfo
} }
...@@ -372,16 +372,16 @@ type RequestIdentification struct { ...@@ -372,16 +372,16 @@ type RequestIdentification struct {
UUID NodeUUID UUID NodeUUID
Address Address // where requesting node is also accepting connections Address Address // where requesting node is also accepting connections
ClusterName string ClusterName string
DevPath []string // [] of devid
IdTime IdTime IdTime IdTime
// storage
DevPath []string // [] of devid
NewNID []uint32 // [] of PNumber
} }
//neo:proto answer //neo:proto answer
type AcceptIdentification struct { type AcceptIdentification struct {
NodeType NodeType // XXX name NodeType NodeType // XXX name
MyUUID NodeUUID MyUUID NodeUUID
NumPartitions uint32 // PNumber
NumReplicas uint32 // PNumber
YourUUID NodeUUID YourUUID NodeUUID
} }
...@@ -455,22 +455,23 @@ type AnswerLastIDs struct { ...@@ -455,22 +455,23 @@ type AnswerLastIDs struct {
} }
// Ask storage node the remaining data needed by master to recover. // Ask storage node the remaining data needed by master to recover.
// This is also how the clients get the full partition table on connection.
// //
//neo:nodes M -> S; C -> M //neo:nodes M -> S
type AskPartitionTable struct { type AskPartitionTable struct {
} }
type AnswerPartitionTable struct { type AnswerPartitionTable struct {
PTid PTid
NumReplicas uint32 // PNumber
RowList []RowInfo RowList []RowInfo
} }
// Send the full partition table to admin/storage nodes on connection. // Send the full partition table to admin/client/storage nodes on connection.
// //
//neo:nodes M -> A, S //neo:nodes M -> A, C, S
type SendPartitionTable struct { type SendPartitionTable struct {
PTid PTid
NumReplicas uint32 // PNumber
RowList []RowInfo RowList []RowInfo
} }
...@@ -479,6 +480,7 @@ type SendPartitionTable struct { ...@@ -479,6 +480,7 @@ type SendPartitionTable struct {
//neo:nodes M -> * //neo:nodes M -> *
type NotifyPartitionChanges struct { type NotifyPartitionChanges struct {
PTid PTid
NumReplicas uint32 // PNumber
CellList []struct { CellList []struct {
Offset uint32 // PNumber XXX -> Pid Offset uint32 // PNumber XXX -> Pid
CellInfo CellInfo CellInfo CellInfo
...@@ -804,6 +806,7 @@ type PartitionList struct { ...@@ -804,6 +806,7 @@ type PartitionList struct {
type AnswerPartitionList struct { type AnswerPartitionList struct {
PTid PTid
NumReplicas uint32 // PNumber
RowList []RowInfo RowList []RowInfo
} }
...@@ -843,11 +846,24 @@ type AddPendingNodes struct { ...@@ -843,11 +846,24 @@ type AddPendingNodes struct {
// //
//neo:nodes ctl -> A -> M //neo:nodes ctl -> A -> M
type TweakPartitionTable struct { type TweakPartitionTable struct {
DryRun bool
NodeList []NodeUUID NodeList []NodeUUID
// answer = Error // answer = Error
} }
type AnswerTweakPartitionTable struct {
Changed bool
RowList []RowInfo
}
// Set the number of replicas.
//
//neo:nodes ctl -> A -> M
type SetNumReplicas struct {
NumReplicas uint32 // PNumber
}
// Set the cluster state. // Set the cluster state.
// //
//neo:nodes ctl -> A -> M //neo:nodes ctl -> A -> M
......
...@@ -190,18 +190,20 @@ func TestMsgMarshal(t *testing.T) { ...@@ -190,18 +190,20 @@ func TestMsgMarshal(t *testing.T) {
// PTid, [] (of [] of {UUID, CellState}) // PTid, [] (of [] of {UUID, CellState})
{&AnswerPartitionTable{ {&AnswerPartitionTable{
PTid: 0x0102030405060708, PTid: 0x0102030405060708,
NumReplicas: 34,
RowList: []RowInfo{ RowList: []RowInfo{
{1, []CellInfo{{11, UP_TO_DATE}, {17, OUT_OF_DATE}}}, {[]CellInfo{{11, UP_TO_DATE}, {17, OUT_OF_DATE}}},
{2, []CellInfo{{11, FEEDING}}}, {[]CellInfo{{11, FEEDING}}},
{7, []CellInfo{{11, CORRUPTED}, {15, DISCARDED}, {23, UP_TO_DATE}}}, {[]CellInfo{{11, CORRUPTED}, {15, DISCARDED}, {23, UP_TO_DATE}}},
}, },
}, },
hex("0102030405060708") + hex("0102030405060708") +
hex("00000022") +
hex("00000003") + hex("00000003") +
hex("00000001000000020000000b010000001100") + hex("000000020000000b010000001100") +
hex("00000002000000010000000b02") + hex("000000010000000b02") +
hex("00000007000000030000000b030000000f040000001701"), hex("000000030000000b030000000f040000001701"),
}, },
// map[Oid]struct {Tid,Tid,bool} // map[Oid]struct {Tid,Tid,bool}
...@@ -250,13 +252,14 @@ func TestMsgMarshal(t *testing.T) { ...@@ -250,13 +252,14 @@ func TestMsgMarshal(t *testing.T) {
}, },
// uint32, Address, string, IdTime // uint32, Address, string, IdTime
{&RequestIdentification{CLIENT, 17, Address{"localhost", 7777}, "myname", []string{"room1", "rack234"}, 0.12345678}, {&RequestIdentification{CLIENT, 17, Address{"localhost", 7777}, "myname", 0.12345678, []string{"room1", "rack234"}, []uint32{3,4,5} },
u8(2) + u32(17) + u32(9) + u8(2) + u32(17) + u32(9) +
"localhost" + u16(7777) + "localhost" + u16(7777) +
u32(6) + "myname" + u32(6) + "myname" +
hex("3fbf9add1091c895") +
u32(2) + u32(5)+"room1" + u32(7)+"rack234" + u32(2) + u32(5)+"room1" + u32(7)+"rack234" +
hex("3fbf9add1091c895"), u32(3) + u32(3)+u32(4)+u32(5),
}, },
// IdTime, empty Address, int32 // IdTime, empty Address, int32
......
This diff is collapsed.
...@@ -9,22 +9,23 @@ func _() { ...@@ -9,22 +9,23 @@ func _() {
// Re-run the stringer command to generate them again. // Re-run the stringer command to generate them again.
var x [1]struct{} var x [1]struct{}
_ = x[ACK-0] _ = x[ACK-0]
_ = x[NOT_READY-1] _ = x[DENIED-1]
_ = x[OID_NOT_FOUND-2] _ = x[NOT_READY-2]
_ = x[TID_NOT_FOUND-3] _ = x[OID_NOT_FOUND-3]
_ = x[OID_DOES_NOT_EXIST-4] _ = x[TID_NOT_FOUND-4]
_ = x[PROTOCOL_ERROR-5] _ = x[OID_DOES_NOT_EXIST-5]
_ = x[REPLICATION_ERROR-6] _ = x[PROTOCOL_ERROR-6]
_ = x[CHECKING_ERROR-7] _ = x[REPLICATION_ERROR-7]
_ = x[BACKEND_NOT_IMPLEMENTED-8] _ = x[CHECKING_ERROR-8]
_ = x[NON_READABLE_CELL-9] _ = x[BACKEND_NOT_IMPLEMENTED-9]
_ = x[READ_ONLY_ACCESS-10] _ = x[NON_READABLE_CELL-10]
_ = x[INCOMPLETE_TRANSACTION-11] _ = x[READ_ONLY_ACCESS-11]
_ = x[INCOMPLETE_TRANSACTION-12]
} }
const _ErrorCode_name = "ACKNOT_READYOID_NOT_FOUNDTID_NOT_FOUNDOID_DOES_NOT_EXISTPROTOCOL_ERRORREPLICATION_ERRORCHECKING_ERRORBACKEND_NOT_IMPLEMENTEDNON_READABLE_CELLREAD_ONLY_ACCESSINCOMPLETE_TRANSACTION" const _ErrorCode_name = "ACKDENIEDNOT_READYOID_NOT_FOUNDTID_NOT_FOUNDOID_DOES_NOT_EXISTPROTOCOL_ERRORREPLICATION_ERRORCHECKING_ERRORBACKEND_NOT_IMPLEMENTEDNON_READABLE_CELLREAD_ONLY_ACCESSINCOMPLETE_TRANSACTION"
var _ErrorCode_index = [...]uint8{0, 3, 12, 25, 38, 56, 70, 87, 101, 124, 141, 157, 179} var _ErrorCode_index = [...]uint8{0, 3, 9, 18, 31, 44, 62, 76, 93, 107, 130, 147, 163, 185}
func (i ErrorCode) String() string { func (i ErrorCode) String() string {
if i < 0 || i >= ErrorCode(len(_ErrorCode_index)-1) { if i < 0 || i >= ErrorCode(len(_ErrorCode_index)-1) {
......
...@@ -42,31 +42,32 @@ var pyMsgRegistry = map[uint16]string{ ...@@ -42,31 +42,32 @@ var pyMsgRegistry = map[uint16]string{
38: "SetNodeState", 38: "SetNodeState",
39: "AddPendingNodes", 39: "AddPendingNodes",
40: "TweakPartitionTable", 40: "TweakPartitionTable",
41: "SetClusterState", 41: "SetNumReplicas",
42: "Repair", 42: "SetClusterState",
43: "RepairOne", 43: "Repair",
44: "NotifyClusterState", 44: "RepairOne",
45: "AskClusterState", 45: "NotifyClusterState",
46: "ObjectUndoSerial", 46: "AskClusterState",
47: "AskTIDsFrom", 47: "ObjectUndoSerial",
48: "Pack", 48: "AskTIDsFrom",
49: "CheckReplicas", 49: "Pack",
50: "CheckPartition", 50: "CheckReplicas",
51: "CheckTIDRange", 51: "CheckPartition",
52: "CheckSerialRange", 52: "CheckTIDRange",
53: "PartitionCorrupted", 53: "CheckSerialRange",
54: "NotifyReady", 54: "PartitionCorrupted",
55: "LastTransaction", 55: "NotifyReady",
56: "CheckCurrentSerial", 56: "LastTransaction",
57: "NotifyTransactionFinished", 57: "CheckCurrentSerial",
58: "Replicate", 58: "NotifyTransactionFinished",
59: "ReplicationDone", 59: "Replicate",
60: "FetchTransactions", 60: "ReplicationDone",
61: "FetchObjects", 61: "FetchTransactions",
62: "AddTransaction", 62: "FetchObjects",
63: "AddObject", 63: "AddTransaction",
64: "Truncate", 64: "AddObject",
65: "FlushLog", 65: "Truncate",
66: "FlushLog",
32768: "Error", 32768: "Error",
32769: "AcceptIdentification", 32769: "AcceptIdentification",
32770: "Pong", 32770: "Pong",
...@@ -92,14 +93,15 @@ var pyMsgRegistry = map[uint16]string{ ...@@ -92,14 +93,15 @@ var pyMsgRegistry = map[uint16]string{
32803: "AnswerObjectHistory", 32803: "AnswerObjectHistory",
32804: "AnswerPartitionList", 32804: "AnswerPartitionList",
32805: "AnswerNodeList", 32805: "AnswerNodeList",
32813: "AnswerClusterState", 32808: "AnswerTweakPartitionTable",
32814: "AnswerObjectUndoSerial", 32814: "AnswerClusterState",
32815: "AnswerTIDsFrom", 32815: "AnswerObjectUndoSerial",
32816: "AnswerPack", 32816: "AnswerTIDsFrom",
32819: "AnswerCheckTIDRange", 32817: "AnswerPack",
32820: "AnswerCheckSerialRange", 32820: "AnswerCheckTIDRange",
32823: "AnswerLastTransaction", 32821: "AnswerCheckSerialRange",
32824: "AnswerCheckCurrentSerial", 32824: "AnswerLastTransaction",
32828: "AnswerFetchTransactions", 32825: "AnswerCheckCurrentSerial",
32829: "AnswerFetchObjects", 32829: "AnswerFetchTransactions",
32830: "AnswerFetchObjects",
} }
...@@ -203,10 +203,13 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) { ...@@ -203,10 +203,13 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) {
// XXX add master UUID -> nodeTab ? or master will notify us with it himself ? // XXX add master UUID -> nodeTab ? or master will notify us with it himself ?
// XXX move -> SetNumReplicas handler
/*
// NumReplicas: neo/py meaning for n(replica) = `n(real-replica) - 1` // NumReplicas: neo/py meaning for n(replica) = `n(real-replica) - 1`
if !(accept.NumPartitions == 1 && accept.NumReplicas == 0) { if !(accept.NumPartitions == 1 && accept.NumReplicas == 0) {
return fmt.Errorf("TODO for 1-storage POC: Npt: %v Nreplica: %v", accept.NumPartitions, accept.NumReplicas) return fmt.Errorf("TODO for 1-storage POC: Npt: %v Nreplica: %v", accept.NumPartitions, accept.NumReplicas)
} }
*/
// XXX -> node.Dial ? // XXX -> node.Dial ?
if accept.YourUUID != stor.node.MyInfo.UUID { if accept.YourUUID != stor.node.MyInfo.UUID {
...@@ -285,6 +288,7 @@ func (stor *Storage) m1initialize1(ctx context.Context, req neonet.Request) erro ...@@ -285,6 +288,7 @@ func (stor *Storage) m1initialize1(ctx context.Context, req neonet.Request) erro
// TODO initially read PT from disk // TODO initially read PT from disk
err = req.Reply(&proto.AnswerPartitionTable{ err = req.Reply(&proto.AnswerPartitionTable{
PTid: stor.node.PartTab.PTid, PTid: stor.node.PartTab.PTid,
NumReplicas: 0, // FIXME hardcoded; NEO/py uses this as n(replica)-1
RowList: stor.node.PartTab.Dump()}) RowList: stor.node.PartTab.Dump()})
case *proto.LockedTransactions: case *proto.LockedTransactions:
...@@ -304,7 +308,7 @@ func (stor *Storage) m1initialize1(ctx context.Context, req neonet.Request) erro ...@@ -304,7 +308,7 @@ func (stor *Storage) m1initialize1(ctx context.Context, req neonet.Request) erro
case *proto.SendPartitionTable: case *proto.SendPartitionTable:
// TODO M sends us whole PT -> save locally // TODO M sends us whole PT -> save locally
stor.node.UpdatePartTab(ctx, msg) // XXX lock? stor.node.UpdatePartTab(ctx, msg) // XXX lock? XXX handle msg.NumReplicas
case *proto.NotifyPartitionChanges: case *proto.NotifyPartitionChanges:
// TODO M sends us δPT -> save locally? // TODO M sends us δPT -> save locally?
...@@ -412,8 +416,6 @@ func (stor *Storage) identify(idReq *proto.RequestIdentification) (proto.Msg, bo ...@@ -412,8 +416,6 @@ func (stor *Storage) identify(idReq *proto.RequestIdentification) (proto.Msg, bo
return &proto.AcceptIdentification{ return &proto.AcceptIdentification{
NodeType: stor.node.MyInfo.Type, NodeType: stor.node.MyInfo.Type,
MyUUID: stor.node.MyInfo.UUID, // XXX lock wrt update MyUUID: stor.node.MyInfo.UUID, // XXX lock wrt update
NumPartitions: 1, // XXX
NumReplicas: 0, // XXX
YourUUID: idReq.UUID, YourUUID: idReq.UUID,
}, true }, true
} }
......
...@@ -21,7 +21,6 @@ from neo.lib.exception import PrimaryFailure ...@@ -21,7 +21,6 @@ from neo.lib.exception import PrimaryFailure
from .handler import AdminEventHandler, MasterEventHandler, \ from .handler import AdminEventHandler, MasterEventHandler, \
MasterRequestEventHandler MasterRequestEventHandler
from neo.lib.bootstrap import BootstrapManager from neo.lib.bootstrap import BootstrapManager
from neo.lib.pt import PartitionTable
from neo.lib.protocol import ClusterStates, Errors, NodeTypes, Packets from neo.lib.protocol import ClusterStates, Errors, NodeTypes, Packets
from neo.lib.debug import register as registerLiveDebugger from neo.lib.debug import register as registerLiveDebugger
...@@ -36,8 +35,8 @@ class Application(BaseApplication): ...@@ -36,8 +35,8 @@ class Application(BaseApplication):
cls.addCommonServerOptions('admin', '127.0.0.1:9999') cls.addCommonServerOptions('admin', '127.0.0.1:9999')
_ = _.group('admin') _ = _.group('admin')
_.int('u', 'uuid', _.int('i', 'nid',
help="specify an UUID to use for this process (testing purpose)") help="specify an NID to use for this process (testing purpose)")
def __init__(self, config): def __init__(self, config):
super(Application, self).__init__( super(Application, self).__init__(
...@@ -53,7 +52,7 @@ class Application(BaseApplication): ...@@ -53,7 +52,7 @@ class Application(BaseApplication):
# The partition table is initialized after getting the number of # The partition table is initialized after getting the number of
# partitions. # partitions.
self.pt = None self.pt = None
self.uuid = config.get('uuid') self.uuid = config.get('nid')
logging.node(self.name, self.uuid) logging.node(self.name, self.uuid)
self.request_handler = MasterRequestEventHandler(self) self.request_handler = MasterRequestEventHandler(self)
self.master_event_handler = MasterEventHandler(self) self.master_event_handler = MasterEventHandler(self)
...@@ -66,7 +65,6 @@ class Application(BaseApplication): ...@@ -66,7 +65,6 @@ class Application(BaseApplication):
super(Application, self).close() super(Application, self).close()
def reset(self): def reset(self):
self.bootstrapped = False
self.master_conn = None self.master_conn = None
self.master_node = None self.master_node = None
...@@ -117,40 +115,20 @@ class Application(BaseApplication): ...@@ -117,40 +115,20 @@ class Application(BaseApplication):
self.cluster_state = None self.cluster_state = None
# search, find, connect and identify to the primary master # search, find, connect and identify to the primary master
bootstrap = BootstrapManager(self, NodeTypes.ADMIN, self.server) bootstrap = BootstrapManager(self, NodeTypes.ADMIN, self.server)
self.master_node, self.master_conn, num_partitions, num_replicas = \ self.master_node, self.master_conn = bootstrap.getPrimaryConnection()
bootstrap.getPrimaryConnection()
if self.pt is None:
self.pt = PartitionTable(num_partitions, num_replicas)
elif self.pt.getPartitions() != num_partitions:
# XXX: shouldn't we recover instead of raising ?
raise RuntimeError('the number of partitions is inconsistent')
elif self.pt.getReplicas() != num_replicas:
# XXX: shouldn't we recover instead of raising ?
raise RuntimeError('the number of replicas is inconsistent')
# passive handler # passive handler
self.master_conn.setHandler(self.master_event_handler) self.master_conn.setHandler(self.master_event_handler)
self.master_conn.ask(Packets.AskClusterState()) self.master_conn.ask(Packets.AskClusterState())
self.master_conn.ask(Packets.AskPartitionTable())
def sendPartitionTable(self, conn, min_offset, max_offset, uuid): def sendPartitionTable(self, conn, min_offset, max_offset, uuid):
# we have a pt pt = self.pt
self.pt.log()
row_list = []
if max_offset == 0: if max_offset == 0:
max_offset = self.pt.getPartitions() max_offset = pt.getPartitions()
try:
for offset in xrange(min_offset, max_offset):
row = []
try: try:
for cell in self.pt.getCellList(offset): row_list = map(pt.getRow, xrange(min_offset, max_offset))
if uuid is None or cell.getUUID() == uuid:
row.append((cell.getUUID(), cell.getState()))
except TypeError:
pass
row_list.append((offset, row))
except IndexError: except IndexError:
conn.send(Errors.ProtocolError('invalid partition table offset')) conn.send(Errors.ProtocolError('invalid partition table offset'))
else: else:
conn.answer(Packets.AnswerPartitionList(self.pt.getID(), row_list)) conn.answer(Packets.AnswerPartitionList(
pt.getID(), pt.getReplicas(), row_list))
...@@ -17,11 +17,12 @@ ...@@ -17,11 +17,12 @@
from neo.lib import logging, protocol from neo.lib import logging, protocol
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import uuid_str, Packets from neo.lib.protocol import uuid_str, Packets
from neo.lib.pt import PartitionTable
from neo.lib.exception import PrimaryFailure from neo.lib.exception import PrimaryFailure
def check_primary_master(func): def check_primary_master(func):
def wrapper(self, *args, **kw): def wrapper(self, *args, **kw):
if self.app.bootstrapped: if self.app.master_conn is not None:
return func(self, *args, **kw) return func(self, *args, **kw)
raise protocol.NotReadyError('Not connected to a primary master.') raise protocol.NotReadyError('Not connected to a primary master.')
return wrapper return wrapper
...@@ -74,6 +75,7 @@ class AdminEventHandler(EventHandler): ...@@ -74,6 +75,7 @@ class AdminEventHandler(EventHandler):
tweakPartitionTable = forward_ask(Packets.TweakPartitionTable) tweakPartitionTable = forward_ask(Packets.TweakPartitionTable)
setClusterState = forward_ask(Packets.SetClusterState) setClusterState = forward_ask(Packets.SetClusterState)
setNodeState = forward_ask(Packets.SetNodeState) setNodeState = forward_ask(Packets.SetNodeState)
setNumReplicas = forward_ask(Packets.SetNumReplicas)
checkReplicas = forward_ask(Packets.CheckReplicas) checkReplicas = forward_ask(Packets.CheckReplicas)
truncate = forward_ask(Packets.Truncate) truncate = forward_ask(Packets.Truncate)
repair = forward_ask(Packets.Repair) repair = forward_ask(Packets.Repair)
...@@ -112,16 +114,12 @@ class MasterEventHandler(EventHandler): ...@@ -112,16 +114,12 @@ class MasterEventHandler(EventHandler):
def answerClusterState(self, conn, state): def answerClusterState(self, conn, state):
self.app.cluster_state = state self.app.cluster_state = state
def notifyPartitionChanges(self, conn, ptid, cell_list): def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
self.app.pt.update(ptid, cell_list, self.app.nm) pt = self.app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
def answerPartitionTable(self, conn, ptid, row_list): def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.load(ptid, row_list, self.app.nm) self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
self.app.bootstrapped = True
def sendPartitionTable(self, conn, ptid, row_list):
if self.app.bootstrapped:
self.app.pt.load(ptid, row_list, self.app.nm)
def notifyClusterInformation(self, conn, cluster_state): def notifyClusterInformation(self, conn, cluster_state):
self.app.cluster_state = cluster_state self.app.cluster_state = cluster_state
......
...@@ -76,7 +76,7 @@ class Application(ThreadedApplication): ...@@ -76,7 +76,7 @@ class Application(ThreadedApplication):
self.primary_master_node = None self.primary_master_node = None
self.trying_master_node = None self.trying_master_node = None
# no self-assigned UUID, primary master will supply us one # no self-assigned NID, primary master will supply us one
self._cache = ClientCache() if cache_size is None else \ self._cache = ClientCache() if cache_size is None else \
ClientCache(max_size=cache_size) ClientCache(max_size=cache_size)
self._loading_oid = None self._loading_oid = None
...@@ -226,8 +226,8 @@ class Application(ThreadedApplication): ...@@ -226,8 +226,8 @@ class Application(ThreadedApplication):
self.notifications_handler, self.notifications_handler,
node=node, node=node,
dispatcher=self.dispatcher) dispatcher=self.dispatcher)
p = Packets.RequestIdentification( p = Packets.RequestIdentification(NodeTypes.CLIENT,
NodeTypes.CLIENT, self.uuid, None, self.name, (), None) self.uuid, None, self.name, None, (), ())
try: try:
ask(conn, p, handler=handler) ask(conn, p, handler=handler)
except ConnectionClosed: except ConnectionClosed:
...@@ -244,7 +244,6 @@ class Application(ThreadedApplication): ...@@ -244,7 +244,6 @@ class Application(ThreadedApplication):
# operational. Might raise ConnectionClosed so that the new # operational. Might raise ConnectionClosed so that the new
# primary can be looked-up again. # primary can be looked-up again.
logging.info('Initializing from master') logging.info('Initializing from master')
ask(conn, Packets.AskPartitionTable(), handler=handler)
ask(conn, Packets.AskLastTransaction(), handler=handler) ask(conn, Packets.AskLastTransaction(), handler=handler)
if self.pt.operational(): if self.pt.operational():
break break
...@@ -270,7 +269,7 @@ class Application(ThreadedApplication): ...@@ -270,7 +269,7 @@ class Application(ThreadedApplication):
conn = MTClientConnection(self, self.storage_event_handler, node, conn = MTClientConnection(self, self.storage_event_handler, node,
dispatcher=self.dispatcher) dispatcher=self.dispatcher)
p = Packets.RequestIdentification(NodeTypes.CLIENT, p = Packets.RequestIdentification(NodeTypes.CLIENT,
self.uuid, None, self.name, (), self.id_timestamp) self.uuid, None, self.name, self.id_timestamp, (), ())
try: try:
self._ask(conn, p, handler=self.storage_bootstrap_handler) self._ask(conn, p, handler=self.storage_bootstrap_handler)
except ConnectionClosed: except ConnectionClosed:
......
...@@ -26,10 +26,6 @@ from ..exception import NEOStorageError ...@@ -26,10 +26,6 @@ from ..exception import NEOStorageError
class PrimaryBootstrapHandler(AnswerBaseHandler): class PrimaryBootstrapHandler(AnswerBaseHandler):
""" Bootstrap handler used when looking for the primary master """ """ Bootstrap handler used when looking for the primary master """
def answerPartitionTable(self, conn, ptid, row_list):
assert row_list
self.app.pt.load(ptid, row_list, self.app.nm)
def answerLastTransaction(*args): def answerLastTransaction(*args):
pass pass
...@@ -42,9 +38,6 @@ class PrimaryNotificationsHandler(MTEventHandler): ...@@ -42,9 +38,6 @@ class PrimaryNotificationsHandler(MTEventHandler):
except PrimaryElected, e: except PrimaryElected, e:
self.app.primary_master_node, = e.args self.app.primary_master_node, = e.args
def _acceptIdentification(self, node, num_partitions, num_replicas):
self.app.pt = PartitionTable(num_partitions, num_replicas)
def answerLastTransaction(self, conn, ltid): def answerLastTransaction(self, conn, ltid):
app = self.app app = self.app
app_last_tid = app.__dict__.get('last_tid', '') app_last_tid = app.__dict__.get('last_tid', '')
...@@ -134,9 +127,12 @@ class PrimaryNotificationsHandler(MTEventHandler): ...@@ -134,9 +127,12 @@ class PrimaryNotificationsHandler(MTEventHandler):
finally: finally:
app._cache_lock_release() app._cache_lock_release()
def notifyPartitionChanges(self, conn, ptid, cell_list): def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
if self.app.pt.filled(): # XXX wrong - updating only when already filled ? pt = self.app.pt = object.__new__(PartitionTable)
self.app.pt.update(ptid, cell_list, self.app.nm) pt.load(ptid, num_replicas, row_list, self.app.nm)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def notifyNodeInformation(self, conn, timestamp, node_list): def notifyNodeInformation(self, conn, timestamp, node_list):
super(PrimaryNotificationsHandler, self).notifyNodeInformation( super(PrimaryNotificationsHandler, self).notifyNodeInformation(
......
...@@ -26,7 +26,7 @@ class BootstrapManager(EventHandler): ...@@ -26,7 +26,7 @@ class BootstrapManager(EventHandler):
Manage the bootstrap stage, lookup for the primary master then connect to it Manage the bootstrap stage, lookup for the primary master then connect to it
""" """
def __init__(self, app, node_type, server=None, devpath=()): def __init__(self, app, node_type, server=None, devpath=(), new_nid=()):
""" """
Manage the bootstrap stage of a non-master node, it lookup for the Manage the bootstrap stage of a non-master node, it lookup for the
primary master node, connect to it then returns when the master node primary master node, connect to it then returns when the master node
...@@ -34,9 +34,8 @@ class BootstrapManager(EventHandler): ...@@ -34,9 +34,8 @@ class BootstrapManager(EventHandler):
""" """
self.server = server self.server = server
self.devpath = devpath self.devpath = devpath
self.new_nid = new_nid
self.node_type = node_type self.node_type = node_type
self.num_replicas = None
self.num_partitions = None
app.nm.reset() app.nm.reset()
uuid = property(lambda self: self.app.uuid) uuid = property(lambda self: self.app.uuid)
...@@ -44,7 +43,7 @@ class BootstrapManager(EventHandler): ...@@ -44,7 +43,7 @@ class BootstrapManager(EventHandler):
def connectionCompleted(self, conn): def connectionCompleted(self, conn):
EventHandler.connectionCompleted(self, conn) EventHandler.connectionCompleted(self, conn)
conn.ask(Packets.RequestIdentification(self.node_type, self.uuid, conn.ask(Packets.RequestIdentification(self.node_type, self.uuid,
self.server, self.app.name, self.devpath, None)) self.server, self.app.name, None, self.devpath, self.new_nid))
def connectionFailed(self, conn): def connectionFailed(self, conn):
EventHandler.connectionFailed(self, conn) EventHandler.connectionFailed(self, conn)
...@@ -53,10 +52,8 @@ class BootstrapManager(EventHandler): ...@@ -53,10 +52,8 @@ class BootstrapManager(EventHandler):
def connectionLost(self, conn, new_state): def connectionLost(self, conn, new_state):
self.current = None self.current = None
def _acceptIdentification(self, node, num_partitions, num_replicas): def _acceptIdentification(self, node):
assert self.current is node, (self.current, node) assert self.current is node, (self.current, node)
self.num_partitions = num_partitions
self.num_replicas = num_replicas
def getPrimaryConnection(self): def getPrimaryConnection(self):
""" """
...@@ -73,8 +70,7 @@ class BootstrapManager(EventHandler): ...@@ -73,8 +70,7 @@ class BootstrapManager(EventHandler):
try: try:
while self.current: while self.current:
if self.current.isIdentified(): if self.current.isIdentified():
return (self.current, self.current.getConnection(), return self.current, self.current.getConnection()
self.num_partitions, self.num_replicas)
poll(1) poll(1)
except PrimaryElected, e: except PrimaryElected, e:
if self.current: if self.current:
......
...@@ -210,7 +210,7 @@ class BaseConnection(object): ...@@ -210,7 +210,7 @@ class BaseConnection(object):
def _getReprInfo(self): def _getReprInfo(self):
r = [ r = [
('uuid', uuid_str(self.getUUID())), ('nid', uuid_str(self.getUUID())),
('address', ('[%s]:%s' if ':' in self.addr[0] else '%s:%s') ('address', ('[%s]:%s' if ':' in self.addr[0] else '%s:%s')
% self.addr if self.addr else '?'), % self.addr if self.addr else '?'),
('handler', self.getHandler()), ('handler', self.getHandler()),
......
...@@ -26,6 +26,9 @@ from .protocol import (NodeStates, NodeTypes, Packets, uuid_str, ...@@ -26,6 +26,9 @@ from .protocol import (NodeStates, NodeTypes, Packets, uuid_str,
from .util import cached_property from .util import cached_property
class AnswerDenied(Exception):
"""Helper exception to stop packet processing and answer a Denied error"""
class DelayEvent(Exception): class DelayEvent(Exception):
pass pass
...@@ -98,6 +101,8 @@ class EventHandler(object): ...@@ -98,6 +101,8 @@ class EventHandler(object):
% (m.im_class.__module__, m.im_class.__name__, m.__name__))) % (m.im_class.__module__, m.im_class.__name__, m.__name__)))
except NonReadableCell, e: except NonReadableCell, e:
conn.answer(Errors.NonReadableCell()) conn.answer(Errors.NonReadableCell())
except AnswerDenied, e:
conn.answer(Errors.Denied(str(e)))
except AssertionError: except AssertionError:
e = sys.exc_info() e = sys.exc_info()
try: try:
...@@ -160,8 +165,7 @@ class EventHandler(object): ...@@ -160,8 +165,7 @@ class EventHandler(object):
def _acceptIdentification(*args): def _acceptIdentification(*args):
pass pass
def acceptIdentification(self, conn, node_type, uuid, def acceptIdentification(self, conn, node_type, uuid, your_uuid):
num_partitions, num_replicas, your_uuid):
app = self.app app = self.app
node = app.nm.getByAddress(conn.getAddress()) node = app.nm.getByAddress(conn.getAddress())
assert node.getConnection() is conn, (node.getConnection(), conn) assert node.getConnection() is conn, (node.getConnection(), conn)
...@@ -180,7 +184,7 @@ class EventHandler(object): ...@@ -180,7 +184,7 @@ class EventHandler(object):
elif node.getUUID() != uuid or app.uuid != your_uuid != None: elif node.getUUID() != uuid or app.uuid != your_uuid != None:
raise ProtocolError('invalid uuids') raise ProtocolError('invalid uuids')
node.setIdentified() node.setIdentified()
self._acceptIdentification(node, num_partitions, num_replicas) self._acceptIdentification(node)
return return
conn.close() conn.close()
......
...@@ -486,7 +486,7 @@ class NodeManager(EventQueue): ...@@ -486,7 +486,7 @@ class NodeManager(EventQueue):
# For the first notification, we receive a full list of nodes from # For the first notification, we receive a full list of nodes from
# the master. Remove all unknown nodes from a previous connection. # the master. Remove all unknown nodes from a previous connection.
for node in self._node_set.difference(added_list): for node in self._node_set.difference(added_list):
if app.pt.dropNode(node): if not node.isStorage() or app.pt.dropNode(node):
self.remove(node) self.remove(node)
self.log() self.log()
self.executeQueuedEvents() self.executeQueuedEvents()
......
...@@ -22,7 +22,7 @@ from struct import Struct ...@@ -22,7 +22,7 @@ from struct import Struct
# The protocol version must be increased whenever upgrading a node may require # The protocol version must be increased whenever upgrading a node may require
# to upgrade other nodes. It is encoded as a 4-bytes big-endian integer and # to upgrade other nodes. It is encoded as a 4-bytes big-endian integer and
# the high order byte 0 is different from TLS Handshake (0x16). # the high order byte 0 is different from TLS Handshake (0x16).
PROTOCOL_VERSION = 5 PROTOCOL_VERSION = 6
ENCODED_VERSION = Struct('!L').pack(PROTOCOL_VERSION) ENCODED_VERSION = Struct('!L').pack(PROTOCOL_VERSION)
# Avoid memory errors on corrupted data. # Avoid memory errors on corrupted data.
...@@ -62,6 +62,7 @@ class Enum(tuple): ...@@ -62,6 +62,7 @@ class Enum(tuple):
@Enum @Enum
def ErrorCodes(): def ErrorCodes():
ACK ACK
DENIED
NOT_READY NOT_READY
OID_NOT_FOUND OID_NOT_FOUND
TID_NOT_FOUND TID_NOT_FOUND
...@@ -617,10 +618,7 @@ PFCellList = PList('cell_list', ...@@ -617,10 +618,7 @@ PFCellList = PList('cell_list',
) )
PFRowList = PList('row_list', PFRowList = PList('row_list',
PStruct('row',
PNumber('offset'),
PFCellList, PFCellList,
),
) )
PFHistoryList = PList('history_list', PFHistoryList = PList('history_list',
...@@ -686,15 +684,15 @@ class RequestIdentification(Packet): ...@@ -686,15 +684,15 @@ class RequestIdentification(Packet):
PUUID('uuid'), PUUID('uuid'),
PAddress('address'), PAddress('address'),
PString('name'), PString('name'),
PList('devpath', PString('devid')),
PFloat('id_timestamp'), PFloat('id_timestamp'),
# storage:
PList('devpath', PString('devid')),
PList('new_nid', PNumber('offset')),
) )
_answer = PStruct('accept_identification', _answer = PStruct('accept_identification',
PFNodeType, PFNodeType,
PUUID('my_uuid'), PUUID('my_uuid'),
PNumber('num_partitions'),
PNumber('num_replicas'),
PUUID('your_uuid'), PUUID('your_uuid'),
) )
...@@ -750,23 +748,24 @@ class LastIDs(Packet): ...@@ -750,23 +748,24 @@ class LastIDs(Packet):
class PartitionTable(Packet): class PartitionTable(Packet):
""" """
Ask storage node the remaining data needed by master to recover. Ask storage node the remaining data needed by master to recover.
This is also how the clients get the full partition table on connection.
:nodes: M -> S; C -> M :nodes: M -> S
""" """
_answer = PStruct('answer_partition_table', _answer = PStruct('answer_partition_table',
PPTID('ptid'), PPTID('ptid'),
PNumber('num_replicas'),
PFRowList, PFRowList,
) )
class NotifyPartitionTable(Packet): class NotifyPartitionTable(Packet):
""" """
Send the full partition table to admin/storage nodes on connection. Send the full partition table to admin/client/storage nodes on connection.
:nodes: M -> A, S :nodes: M -> A, C, S
""" """
_fmt = PStruct('send_partition_table', _fmt = PStruct('send_partition_table',
PPTID('ptid'), PPTID('ptid'),
PNumber('num_replicas'),
PFRowList, PFRowList,
) )
...@@ -778,6 +777,7 @@ class PartitionChanges(Packet): ...@@ -778,6 +777,7 @@ class PartitionChanges(Packet):
""" """
_fmt = PStruct('notify_partition_changes', _fmt = PStruct('notify_partition_changes',
PPTID('ptid'), PPTID('ptid'),
PNumber('num_replicas'),
PList('cell_list', PList('cell_list',
PStruct('cell', PStruct('cell',
PNumber('offset'), PNumber('offset'),
...@@ -1203,6 +1203,7 @@ class PartitionList(Packet): ...@@ -1203,6 +1203,7 @@ class PartitionList(Packet):
_answer = PStruct('answer_partition_list', _answer = PStruct('answer_partition_list',
PPTID('ptid'), PPTID('ptid'),
PNumber('num_replicas'),
PFRowList, PFRowList,
) )
...@@ -1254,10 +1255,14 @@ class TweakPartitionTable(Packet): ...@@ -1254,10 +1255,14 @@ class TweakPartitionTable(Packet):
:nodes: ctl -> A -> M :nodes: ctl -> A -> M
""" """
_fmt = PStruct('tweak_partition_table', _fmt = PStruct('tweak_partition_table',
PBoolean('dry_run'),
PFUUIDList, PFUUIDList,
) )
_answer = Error _answer = PStruct('answer_tweak_partition_table',
PBoolean('changed'),
PFRowList,
)
class NotifyNodeInformation(Packet): class NotifyNodeInformation(Packet):
""" """
...@@ -1270,6 +1275,18 @@ class NotifyNodeInformation(Packet): ...@@ -1270,6 +1275,18 @@ class NotifyNodeInformation(Packet):
PFNodeList, PFNodeList,
) )
class SetNumReplicas(Packet):
"""
Set the number of replicas.
:nodes: ctl -> A -> M
"""
_fmt = PStruct('set_num_replicas',
PNumber('num_replicas'),
)
_answer = Error
class SetClusterState(Packet): class SetClusterState(Packet):
""" """
Set the cluster state. Set the cluster state.
...@@ -1763,8 +1780,10 @@ class Packets(dict): ...@@ -1763,8 +1780,10 @@ class Packets(dict):
SetNodeState, ignore_when_closed=False) SetNodeState, ignore_when_closed=False)
AddPendingNodes = register( AddPendingNodes = register(
AddPendingNodes, ignore_when_closed=False) AddPendingNodes, ignore_when_closed=False)
TweakPartitionTable = register( TweakPartitionTable, AnswerTweakPartitionTable = register(
TweakPartitionTable, ignore_when_closed=False) TweakPartitionTable)
SetNumReplicas = register(
SetNumReplicas, ignore_when_closed=False)
SetClusterState = register( SetClusterState = register(
SetClusterState, ignore_when_closed=False) SetClusterState, ignore_when_closed=False)
Repair = register( Repair = register(
......
...@@ -86,15 +86,9 @@ class PartitionTable(object): ...@@ -86,15 +86,9 @@ class PartitionTable(object):
'a cell became non-readable whereas all cells were readable' 'a cell became non-readable whereas all cells were readable'
def __init__(self, num_partitions, num_replicas): def __init__(self, num_partitions, num_replicas):
self._id = None
self.np = num_partitions self.np = num_partitions
self.nr = num_replicas self.nr = num_replicas
self.num_filled_rows = 0 self.clear()
# Note: don't use [[]] * num_partition construct, as it duplicates
# instance *references*, so the outer list contains really just one
# inner list instance.
self.partition_list = [[] for _ in xrange(num_partitions)]
self.count_dict = {}
def getID(self): def getID(self):
return self._id return self._id
...@@ -113,7 +107,16 @@ class PartitionTable(object): ...@@ -113,7 +107,16 @@ class PartitionTable(object):
# instance *references*, so the outer list contains really just one # instance *references*, so the outer list contains really just one
# inner list instance. # inner list instance.
self.partition_list = [[] for _ in xrange(self.np)] self.partition_list = [[] for _ in xrange(self.np)]
self.count_dict.clear() self.count_dict = {}
def addNodeList(self, node_list):
"""Add nodes"""
added_list = []
for node in node_list:
if node not in self.count_dict:
self.count_dict[node] = 0
added_list.append(node)
return added_list
def getAssignedPartitionList(self, uuid): def getAssignedPartitionList(self, uuid):
""" Return the partition assigned to the specified UUID """ """ Return the partition assigned to the specified UUID """
...@@ -203,31 +206,31 @@ class PartitionTable(object): ...@@ -203,31 +206,31 @@ class PartitionTable(object):
del self.count_dict[node] del self.count_dict[node]
return not count return not count
def load(self, ptid, row_list, nm): def _load(self, ptid, num_replicas, row_list, getByUUID):
self.__init__(len(row_list), num_replicas)
self._id = ptid
for offset, row in enumerate(row_list):
for uuid, state in row:
node = getByUUID(uuid)
self._setCell(offset, node, state)
def load(self, ptid, num_replicas, row_list, nm):
""" """
Load the partition table with the specified PTID, discard all previous Load the partition table with the specified PTID, discard all previous
content. content.
""" """
self.clear() self._load(ptid, num_replicas, row_list, nm.getByUUID)
self._id = ptid
for offset, row in row_list:
if offset >= self.getPartitions():
raise IndexError
for uuid, state in row:
node = nm.getByUUID(uuid)
# the node must be known by the node manager
assert node is not None
self._setCell(offset, node, state)
logging.debug('partition table loaded (ptid=%s)', ptid) logging.debug('partition table loaded (ptid=%s)', ptid)
self.log() self.log()
def update(self, ptid, cell_list, nm): def update(self, ptid, num_replicas, cell_list, nm):
""" """
Update the partition with the cell list supplied. If a node Update the partition with the cell list supplied. If a node
is not known, it is created in the node manager and set as unavailable is not known, it is created in the node manager and set as unavailable
""" """
assert self._id < ptid, (self._id, ptid) assert self._id < ptid, (self._id, ptid)
self._id = ptid self._id = ptid
self.nr = num_replicas
readable_list = [] readable_list = []
for row in self.partition_list: for row in self.partition_list:
if not all(cell.isReadable() for cell in row): if not all(cell.isReadable() for cell in row):
...@@ -310,14 +313,11 @@ class PartitionTable(object): ...@@ -310,14 +313,11 @@ class PartitionTable(object):
return True return True
def getRow(self, offset): def getRow(self, offset):
row = self.partition_list[offset] return [(cell.getUUID(), cell.getState())
if row is None: for cell in self.partition_list[offset]]
return []
return [(cell.getUUID(), cell.getState()) for cell in row]
def getRowList(self): def getRowList(self):
getRow = self.getRow return map(self.getRow, xrange(self.np))
return [(x, getRow(x)) for x in xrange(self.np)]
class MTPartitionTable(PartitionTable): class MTPartitionTable(PartitionTable):
""" Thread-safe aware version of the partition table, override only methods """ Thread-safe aware version of the partition table, override only methods
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
import sys import sys
from collections import defaultdict from collections import defaultdict
from functools import partial
from time import time from time import time
from neo.lib import logging, util from neo.lib import logging, util
...@@ -76,13 +77,11 @@ class Application(BaseApplication): ...@@ -76,13 +77,11 @@ class Application(BaseApplication):
@classmethod @classmethod
def _buildOptionParser(cls): def _buildOptionParser(cls):
_ = cls.option_parser parser = cls.option_parser
_.description = "NEO Master node" parser.description = "NEO Master node"
cls.addCommonServerOptions('master', '127.0.0.1:10000', '') cls.addCommonServerOptions('master', '127.0.0.1:10000', '')
_ = _.group('master') _ = parser.group('master')
_.int('r', 'replicas', default=0, help="replicas number")
_.int('p', 'partitions', default=100, help="partitions number")
_.int('A', 'autostart', _.int('A', 'autostart',
help="minimum number of pending storage nodes to automatically" help="minimum number of pending storage nodes to automatically"
" start new cluster (to avoid unwanted recreation of the" " start new cluster (to avoid unwanted recreation of the"
...@@ -91,8 +90,12 @@ class Application(BaseApplication): ...@@ -91,8 +90,12 @@ class Application(BaseApplication):
help='the name of cluster to backup') help='the name of cluster to backup')
_('M', 'upstream-masters', parse=util.parseMasterList, _('M', 'upstream-masters', parse=util.parseMasterList,
help='list of master nodes in the cluster to backup') help='list of master nodes in the cluster to backup')
_.int('u', 'uuid', _.int('i', 'nid',
help="specify an UUID to use for this process (testing purpose)") help="specify an NID to use for this process (testing purpose)")
_ = parser.group('database creation')
_.int('r', 'replicas', default=0, help="replicas number")
_.int('p', 'partitions', default=100, help="partitions number")
def __init__(self, config): def __init__(self, config):
super(Application, self).__init__( super(Application, self).__init__(
...@@ -108,7 +111,7 @@ class Application(BaseApplication): ...@@ -108,7 +111,7 @@ class Application(BaseApplication):
for master_address in config['masters']: for master_address in config['masters']:
self.nm.createMaster(address=master_address) self.nm.createMaster(address=master_address)
self._node = self.nm.createMaster(address=self.server, self._node = self.nm.createMaster(address=self.server,
uuid=config.get('uuid')) uuid=config.get('nid'))
logging.node(self.name, self.uuid) logging.node(self.name, self.uuid)
logging.debug('IP address is %s, port is %d', *self.server) logging.debug('IP address is %s, port is %d', *self.server)
...@@ -117,14 +120,14 @@ class Application(BaseApplication): ...@@ -117,14 +120,14 @@ class Application(BaseApplication):
replicas = config['replicas'] replicas = config['replicas']
partitions = config['partitions'] partitions = config['partitions']
if replicas < 0: if replicas < 0:
raise RuntimeError, 'replicas must be a positive integer' sys.exit('replicas must be a positive integer')
if partitions <= 0: if partitions <= 0:
raise RuntimeError, 'partitions must be more than zero' sys.exit('partitions must be more than zero')
self.pt = PartitionTable(partitions, replicas)
logging.info('Configuration:') logging.info('Configuration:')
logging.info('Partitions: %d', partitions) logging.info('Partitions: %d', partitions)
logging.info('Replicas : %d', replicas) logging.info('Replicas : %d', replicas)
logging.info('Name : %s', self.name) logging.info('Name : %s', self.name)
self.newPartitionTable = partial(PartitionTable, partitions, replicas)
self.listening_conn = None self.listening_conn = None
self.cluster_state = None self.cluster_state = None
...@@ -196,7 +199,7 @@ class Application(BaseApplication): ...@@ -196,7 +199,7 @@ class Application(BaseApplication):
node_dict[NodeTypes.MASTER].append(node_info) node_dict[NodeTypes.MASTER].append(node_info)
return node_dict return node_dict
def broadcastNodesInformation(self, node_list, exclude=None): def broadcastNodesInformation(self, node_list):
""" """
Broadcast changes for a set a nodes Broadcast changes for a set a nodes
Send only one packet per connection to reduce bandwidth Send only one packet per connection to reduce bandwidth
...@@ -209,15 +212,21 @@ class Application(BaseApplication): ...@@ -209,15 +212,21 @@ class Application(BaseApplication):
# We don't skip pending storage nodes because we don't send them # We don't skip pending storage nodes because we don't send them
# the full list of nodes when they're added, and it's also quite # the full list of nodes when they're added, and it's also quite
# useful to notify them about new masters. # useful to notify them about new masters.
if node_list and node is not exclude: if node_list:
node.send(Packets.NotifyNodeInformation(now, node_list)) node.send(Packets.NotifyNodeInformation(now, node_list))
def broadcastPartitionChanges(self, cell_list): def broadcastPartitionChanges(self, cell_list, num_replicas=None):
"""Broadcast a Notify Partition Changes packet.""" """Broadcast a Notify Partition Changes packet."""
if cell_list: pt = self.pt
ptid = self.pt.setNextID() if num_replicas is not None:
self.pt.logUpdated() pt.setReplicas(num_replicas)
packet = Packets.NotifyPartitionChanges(ptid, cell_list) elif cell_list:
num_replicas = pt.getReplicas()
else:
return
packet = Packets.NotifyPartitionChanges(
pt.setNextID(), num_replicas, cell_list)
pt.logUpdated()
for node in self.nm.getIdentifiedList(): for node in self.nm.getIdentifiedList():
# As for broadcastNodesInformation, we don't send the full PT # As for broadcastNodesInformation, we don't send the full PT
# when pending storage nodes are added, so keep them notified. # when pending storage nodes are added, so keep them notified.
...@@ -437,16 +446,7 @@ class Application(BaseApplication): ...@@ -437,16 +446,7 @@ class Application(BaseApplication):
conn.send(notification_packet) conn.send(notification_packet)
elif conn.isServer(): elif conn.isServer():
continue continue
if node.isClient(): if node.isMaster():
if state == ClusterStates.RUNNING:
handler = self.client_service_handler
elif state == ClusterStates.BACKINGUP:
handler = self.client_ro_service_handler
else:
if state != ClusterStates.STOPPING:
conn.abort()
continue
elif node.isMaster():
if state == ClusterStates.RECOVERING: if state == ClusterStates.RECOVERING:
handler = self.election_handler handler = self.election_handler
else: else:
...@@ -454,10 +454,16 @@ class Application(BaseApplication): ...@@ -454,10 +454,16 @@ class Application(BaseApplication):
elif node.isStorage() and storage_handler: elif node.isStorage() and storage_handler:
handler = storage_handler handler = storage_handler
else: else:
continue # keep handler # FIXME handler can be not setup-yet at all # There's a single handler type for admins.
# Client can't change handler without being first disconnected.
assert state in (
ClusterStates.STOPPING,
ClusterStates.STOPPING_BACKUP,
) or not node.isClient(), (state, node)
continue # keep handler
if type(handler) is not type(conn.getLastHandler()): if type(handler) is not type(conn.getLastHandler()):
conn.setHandler(handler) conn.setHandler(handler)
handler.connectionCompleted(conn, new=False) handler.handlerSwitched(conn, new=False)
self.cluster_state = state self.cluster_state = state
def getNewUUID(self, uuid, address, node_type): def getNewUUID(self, uuid, address, node_type):
......
...@@ -112,17 +112,12 @@ class BackupApplication(object): ...@@ -112,17 +112,12 @@ class BackupApplication(object):
else: else:
break break
poll(1) poll(1)
node, conn, num_partitions, num_replicas = \ node, conn = bootstrap.getPrimaryConnection()
bootstrap.getPrimaryConnection()
try: try:
app.changeClusterState(ClusterStates.BACKINGUP) app.changeClusterState(ClusterStates.BACKINGUP)
del bootstrap, node del bootstrap, node
if num_partitions != pt.getPartitions():
raise RuntimeError("inconsistent number of partitions")
self.ignore_invalidations = True self.ignore_invalidations = True
self.pt = PartitionTable(num_partitions, num_replicas)
conn.setHandler(BackupHandler(self)) conn.setHandler(BackupHandler(self))
conn.ask(Packets.AskPartitionTable())
conn.ask(Packets.AskLastTransaction()) conn.ask(Packets.AskLastTransaction())
# debug variable to log how big 'tid_list' can be. # debug variable to log how big 'tid_list' can be.
self.debug_tid_count = 0 self.debug_tid_count = 0
......
...@@ -23,10 +23,6 @@ from neo.lib.protocol import Packets ...@@ -23,10 +23,6 @@ from neo.lib.protocol import Packets
class MasterHandler(EventHandler): class MasterHandler(EventHandler):
"""This class implements a generic part of the event handlers.""" """This class implements a generic part of the event handlers."""
def connectionCompleted(self, conn, new=None):
if new is None:
super(MasterHandler, self).connectionCompleted(conn)
def connectionLost(self, conn, new_state=None): def connectionLost(self, conn, new_state=None):
if self.app.listening_conn: # if running if self.app.listening_conn: # if running
self._connectionLost(conn) self._connectionLost(conn)
...@@ -59,17 +55,20 @@ class MasterHandler(EventHandler): ...@@ -59,17 +55,20 @@ class MasterHandler(EventHandler):
+ app.getNodeInformationDict(node_list)[node.getType()]) + app.getNodeInformationDict(node_list)[node.getType()])
conn.send(Packets.NotifyNodeInformation(monotonic_time(), node_list)) conn.send(Packets.NotifyNodeInformation(monotonic_time(), node_list))
def askPartitionTable(self, conn): def handlerSwitched(self, conn, new):
pt = self.app.pt pt = self.app.pt
conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList())) # Except storages during recovery and secondary masters, all nodes
# receives the full partition table as soon as they're identified.
# It is also sent in 2 other cases:
# - to admins during recovery, whenever a newer PT is loaded;
# - to storage when switching from recovery to verification.
# After that, non-master nodes only receive incremental updates.
conn.send(Packets.SendPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
class BaseServiceHandler(MasterHandler): class BaseServiceHandler(MasterHandler):
"""This class deals with events for a service phase.""" """Common handler class for storage nodes."""
def connectionCompleted(self, conn, new):
pt = self.app.pt
conn.send(Packets.SendPartitionTable(pt.getID(), pt.getRowList()))
def connectionLost(self, conn, new_state): def connectionLost(self, conn, new_state):
app = self.app app = self.app
......
...@@ -15,14 +15,16 @@ ...@@ -15,14 +15,16 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import random import random
from functools import wraps
from . import MasterHandler from . import MasterHandler
from ..app import monotonic_time, StateChangedException from ..app import monotonic_time, StateChangedException
from neo.lib import logging from neo.lib import logging
from neo.lib.exception import StoppedOperation from neo.lib.exception import StoppedOperation
from neo.lib.handler import AnswerDenied
from neo.lib.pt import PartitionTableException from neo.lib.pt import PartitionTableException
from neo.lib.protocol import ClusterStates, Errors, \ from neo.lib.protocol import ClusterStates, Errors, \
NodeStates, NodeTypes, Packets, ProtocolError, uuid_str NodeStates, NodeTypes, Packets, uuid_str
from neo.lib.util import dump from neo.lib.util import dump
CLUSTER_STATE_WORKFLOW = { CLUSTER_STATE_WORKFLOW = {
...@@ -38,9 +40,25 @@ NODE_STATE_WORKFLOW = { ...@@ -38,9 +40,25 @@ NODE_STATE_WORKFLOW = {
NodeTypes.STORAGE: (NodeStates.DOWN, NodeStates.UNKNOWN), NodeTypes.STORAGE: (NodeStates.DOWN, NodeStates.UNKNOWN),
} }
def check_state(*states):
def decorator(wrapped):
def wrapper(self, *args):
state = self.app.getClusterState()
if state not in states:
raise AnswerDenied('%s RPC can not be used in %s state'
% (wrapped.__name__, state))
wrapped(self, *args)
return wraps(wrapped)(wrapper)
return decorator
class AdministrationHandler(MasterHandler): class AdministrationHandler(MasterHandler):
"""This class deals with messages from the admin node only""" """This class deals with messages from the admin node only"""
def handlerSwitched(self, conn, new):
assert new
super(AdministrationHandler, self).handlerSwitched(conn, new)
def connectionLost(self, conn, new_state): def connectionLost(self, conn, new_state):
node = self.app.nm.getByUUID(conn.getUUID()) node = self.app.nm.getByUUID(conn.getUUID())
if node is not None: if node is not None:
...@@ -58,30 +76,28 @@ class AdministrationHandler(MasterHandler): ...@@ -58,30 +76,28 @@ class AdministrationHandler(MasterHandler):
# check request # check request
try: try:
if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]: if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
raise ProtocolError('Can not switch to this state') raise AnswerDenied('Can not switch to this state')
except KeyError: except KeyError:
if state != ClusterStates.STOPPING: if state != ClusterStates.STOPPING:
raise ProtocolError('Invalid state requested') raise AnswerDenied('Invalid state requested')
# change state # change state
if state == ClusterStates.VERIFYING: if state == ClusterStates.VERIFYING:
storage_list = app.nm.getStorageList(only_identified=True) storage_list = app.nm.getStorageList(only_identified=True)
if not storage_list: if not storage_list:
raise ProtocolError('Cannot exit recovery without any ' raise AnswerDenied(
'storage node') 'Cannot exit recovery without any storage node')
for node in storage_list: for node in storage_list:
assert node.isPending(), node assert node.isPending(), node
if node.getConnection().isPending(): if node.getConnection().isPending():
# XXX: It's wrong to use ProtocolError here. We must reply raise AnswerDenied(
# less aggressively because the admin has no way to 'Cannot exit recovery now: node %r is entering cluster'
# know that there's still pending activity. % node,)
raise ProtocolError('Cannot exit recovery now: node %r is '
'entering cluster' % (node, ))
app._startup_allowed = True app._startup_allowed = True
state = app.cluster_state state = app.cluster_state
elif state == ClusterStates.STARTING_BACKUP: elif state == ClusterStates.STARTING_BACKUP:
if app.tm.hasPending() or app.nm.getClientList(True): if app.tm.hasPending() or app.nm.getClientList(True):
raise ProtocolError("Can not switch to %s state with pending" raise AnswerDenied("Can not switch to %s state with pending"
" transactions or connected clients" % state) " transactions or connected clients" % state)
conn.answer(Errors.Ack('Cluster state changed')) conn.answer(Errors.Ack('Cluster state changed'))
...@@ -93,21 +109,24 @@ class AdministrationHandler(MasterHandler): ...@@ -93,21 +109,24 @@ class AdministrationHandler(MasterHandler):
app = self.app app = self.app
node = app.nm.getByUUID(uuid) node = app.nm.getByUUID(uuid)
if node is None: if node is None:
raise ProtocolError('unknown node') raise AnswerDenied('unknown node')
if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()): if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
raise ProtocolError('can not switch node to this state') raise AnswerDenied('can not switch node to %s state' % state)
if uuid == app.uuid: if uuid == app.uuid:
raise ProtocolError('can not kill primary master node') raise AnswerDenied('can not kill primary master node')
state_changed = state != node.getState() state_changed = state != node.getState()
message = ('state changed' if state_changed else message = ('state changed' if state_changed else
'node already in %s state' % state) 'node already in %s state' % state)
if node.isStorage(): if node.isStorage():
keep = state == NodeStates.DOWN keep = state == NodeStates.DOWN
if node.isRunning() and not keep:
raise AnswerDenied(
"a running node must be stopped before removal")
try: try:
cell_list = app.pt.dropNodeList([node], keep) cell_list = app.pt.dropNodeList([node], keep)
except PartitionTableException, e: except PartitionTableException, e:
raise ProtocolError(str(e)) raise AnswerDenied(str(e))
node.setState(state) node.setState(state)
if node.isConnected(): if node.isConnected():
# notify itself so it can shutdown # notify itself so it can shutdown
...@@ -134,16 +153,17 @@ class AdministrationHandler(MasterHandler): ...@@ -134,16 +153,17 @@ class AdministrationHandler(MasterHandler):
monotonic_time(), [node.asTuple()])) monotonic_time(), [node.asTuple()]))
app.broadcastNodesInformation([node]) app.broadcastNodesInformation([node])
# XXX: Would it be safe to allow more states ?
__change_pt_rpc = check_state(
ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP)
@__change_pt_rpc
def addPendingNodes(self, conn, uuid_list): def addPendingNodes(self, conn, uuid_list):
uuids = ', '.join(map(uuid_str, uuid_list)) uuids = ', '.join(map(uuid_str, uuid_list))
logging.debug('Add nodes %s', uuids) logging.debug('Add nodes %s', uuids)
app = self.app app = self.app
state = app.getClusterState()
# XXX: Would it be safe to allow more states ?
if state not in (ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP):
raise ProtocolError('Can not add nodes in %s state' % state)
# take all pending nodes # take all pending nodes
node_list = list(app.pt.addNodeList(node node_list = list(app.pt.addNodeList(node
for node in app.nm.getStorageList() for node in app.nm.getStorageList()
...@@ -165,31 +185,50 @@ class AdministrationHandler(MasterHandler): ...@@ -165,31 +185,50 @@ class AdministrationHandler(MasterHandler):
for uuid in uuid_list: for uuid in uuid_list:
node = getByUUID(uuid) node = getByUUID(uuid)
if node is None or not (node.isStorage() and node.isIdentified()): if node is None or not (node.isStorage() and node.isIdentified()):
raise ProtocolError("invalid storage node %s" % uuid_str(uuid)) raise AnswerDenied("invalid storage node %s" % uuid_str(uuid))
node_list.append(node) node_list.append(node)
repair = Packets.NotifyRepair(*args) repair = Packets.NotifyRepair(*args)
for node in node_list: for node in node_list:
node.send(repair) node.send(repair)
conn.answer(Errors.Ack('')) conn.answer(Errors.Ack(''))
def tweakPartitionTable(self, conn, uuid_list): @__change_pt_rpc
app = self.app def setNumReplicas(self, conn, num_replicas):
state = app.getClusterState() self.app.broadcastPartitionChanges((), num_replicas)
# XXX: Would it be safe to allow more states ?
if state not in (ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP):
raise ProtocolError('Can not tweak partition table in %s state'
% state)
app.broadcastPartitionChanges(app.pt.tweak([node
for node in app.nm.getStorageList()
if node.getUUID() in uuid_list or not node.isRunning()]))
conn.answer(Errors.Ack('')) conn.answer(Errors.Ack(''))
def truncate(self, conn, tid): @__change_pt_rpc
def tweakPartitionTable(self, conn, dry_run, uuid_list):
app = self.app app = self.app
if app.cluster_state != ClusterStates.RUNNING: drop_list = []
raise ProtocolError('Can not truncate in this state') for node in app.nm.getStorageList():
if node.getUUID() in uuid_list or node.isPending():
drop_list.append(node)
elif not node.isRunning():
drop_list.append(node)
raise AnswerDenied(
'tweak: down nodes must be listed explicitly')
if dry_run:
pt = object.__new__(app.pt.__class__)
new_nodes = pt.load(app.pt.getID(), app.pt.getReplicas(),
app.pt.getRowList(), app.nm)
assert not new_nodes
pt.addNodeList(node
for node, count in app.pt.count_dict.iteritems()
if not count)
else:
pt = app.pt
try:
changed_list = pt.tweak(drop_list)
except PartitionTableException, e:
raise AnswerDenied(str(e))
if not dry_run:
app.broadcastPartitionChanges(changed_list)
conn.answer(Packets.AnswerTweakPartitionTable(
bool(changed_list), pt.getRowList()))
@check_state(ClusterStates.RUNNING)
def truncate(self, conn, tid):
conn.answer(Errors.Ack('')) conn.answer(Errors.Ack(''))
raise StoppedOperation(tid) raise StoppedOperation(tid)
...@@ -237,3 +276,5 @@ class AdministrationHandler(MasterHandler): ...@@ -237,3 +276,5 @@ class AdministrationHandler(MasterHandler):
node.send(Packets.CheckPartition( node.send(Packets.CheckPartition(
offset, source, min_tid, max_tid)) offset, source, min_tid, max_tid))
conn.answer(Errors.Ack('')) conn.answer(Errors.Ack(''))
del __change_pt_rpc
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
from neo.lib.exception import PrimaryFailure from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import ZERO_TID from neo.lib.protocol import ZERO_TID
from neo.lib.pt import PartitionTable
class BackupHandler(EventHandler): class BackupHandler(EventHandler):
"""Handler dedicated to upstream master during BACKINGUP state""" """Handler dedicated to upstream master during BACKINGUP state"""
...@@ -25,12 +26,15 @@ class BackupHandler(EventHandler): ...@@ -25,12 +26,15 @@ class BackupHandler(EventHandler):
if self.app.app.listening_conn: # if running if self.app.app.listening_conn: # if running
raise PrimaryFailure('connection lost') raise PrimaryFailure('connection lost')
def answerPartitionTable(self, conn, ptid, row_list): def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
self.app.pt.load(ptid, row_list, self.app.nm) app = self.app
pt = app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
if pt.getPartitions() != app.app.pt.getPartitions():
raise RuntimeError("inconsistent number of partitions")
def notifyPartitionChanges(self, conn, ptid, cell_list): def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
if self.app.pt.filled(): self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
self.app.pt.update(ptid, cell_list, self.app.nm)
# NOTE invalidation from M -> Mb (all partitions) # NOTE invalidation from M -> Mb (all partitions)
def answerLastTransaction(self, conn, tid): def answerLastTransaction(self, conn, tid):
......
...@@ -22,6 +22,10 @@ from . import MasterHandler ...@@ -22,6 +22,10 @@ from . import MasterHandler
class ClientServiceHandler(MasterHandler): class ClientServiceHandler(MasterHandler):
""" Handler dedicated to client during service state """ """ Handler dedicated to client during service state """
def handlerSwitched(self, conn, new):
assert new
super(ClientServiceHandler, self).handlerSwitched(conn, new)
def _connectionLost(self, conn): def _connectionLost(self, conn):
# cancel its transactions and forgot the node # cancel its transactions and forgot the node
app = self.app app = self.app
......
...@@ -17,14 +17,14 @@ ...@@ -17,14 +17,14 @@
from neo.lib import logging from neo.lib import logging
from neo.lib.exception import PrimaryElected from neo.lib.exception import PrimaryElected
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, \ from neo.lib.protocol import CellStates, ClusterStates, NodeStates, \
NotReadyError, Packets, ProtocolError, uuid_str NodeTypes, NotReadyError, Packets, ProtocolError, uuid_str
from ..app import monotonic_time from ..app import monotonic_time
class IdentificationHandler(EventHandler): class IdentificationHandler(EventHandler):
def requestIdentification(self, conn, node_type, uuid, def requestIdentification(self, conn, node_type, uuid,
address, name, devpath, id_timestamp): address, name, id_timestamp, devpath, new_nid):
app = self.app app = self.app
self.checkClusterName(name) self.checkClusterName(name)
if address == app.server: if address == app.server:
...@@ -77,6 +77,16 @@ class IdentificationHandler(EventHandler): ...@@ -77,6 +77,16 @@ class IdentificationHandler(EventHandler):
manager = app manager = app
state, handler = manager.identifyStorageNode( state, handler = manager.identifyStorageNode(
uuid is not None and node is not None) uuid is not None and node is not None)
if not address:
if app.cluster_state == ClusterStates.RECOVERING:
raise NotReadyError
if uuid or not new_nid:
raise ProtocolError
state = NodeStates.DOWN
# We'll let the storage node close the connection. If we
# aborted it at the end of the method, BootstrapManager
# (which is used by storage nodes) could see the closure
# and try to reconnect to a master.
human_readable_node_type = ' storage (%s) ' % (state, ) human_readable_node_type = ' storage (%s) ' % (state, )
elif node_type == NodeTypes.MASTER: elif node_type == NodeTypes.MASTER:
if app.election: if app.election:
...@@ -105,24 +115,27 @@ class IdentificationHandler(EventHandler): ...@@ -105,24 +115,27 @@ class IdentificationHandler(EventHandler):
node.devpath = tuple(devpath) node.devpath = tuple(devpath)
node.id_timestamp = monotonic_time() node.id_timestamp = monotonic_time()
node.setState(state) node.setState(state)
app.broadcastNodesInformation([node])
if new_nid:
changed_list = []
for offset in new_nid:
changed_list.append((offset, uuid, CellStates.OUT_OF_DATE))
app.pt._setCell(offset, node, CellStates.OUT_OF_DATE)
app.broadcastPartitionChanges(changed_list)
conn.setHandler(handler) conn.setHandler(handler)
node.setConnection(conn, not node.isIdentified()) node.setConnection(conn, not node.isIdentified())
app.broadcastNodesInformation([node], node)
conn.answer(Packets.AcceptIdentification( conn.answer(Packets.AcceptIdentification(
NodeTypes.MASTER, NodeTypes.MASTER,
app.uuid, app.uuid,
app.pt.getPartitions(),
app.pt.getReplicas(),
uuid)) uuid))
handler._notifyNodeInformation(conn) handler._notifyNodeInformation(conn)
handler.connectionCompleted(conn, True) handler.handlerSwitched(conn, True)
class SecondaryIdentificationHandler(EventHandler): class SecondaryIdentificationHandler(EventHandler):
def requestIdentification(self, conn, node_type, uuid, def requestIdentification(self, conn, node_type, uuid,
address, name, devpath, id_timestamp): address, name, id_timestamp, devpath, new_nid):
app = self.app app = self.app
self.checkClusterName(name) self.checkClusterName(name)
if address == app.server: if address == app.server:
......
...@@ -23,6 +23,9 @@ from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, Packets ...@@ -23,6 +23,9 @@ from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, Packets
class SecondaryHandler(MasterHandler): class SecondaryHandler(MasterHandler):
"""Handler used by primary to handle secondary masters""" """Handler used by primary to handle secondary masters"""
def handlerSwitched(self, conn, new):
pass
def _connectionLost(self, conn): def _connectionLost(self, conn):
app = self.app app = self.app
node = app.nm.getByUUID(conn.getUUID()) node = app.nm.getByUUID(conn.getUUID())
...@@ -30,21 +33,20 @@ class SecondaryHandler(MasterHandler): ...@@ -30,21 +33,20 @@ class SecondaryHandler(MasterHandler):
app.broadcastNodesInformation([node]) app.broadcastNodesInformation([node])
class ElectionHandler(MasterHandler): class ElectionHandler(SecondaryHandler):
"""Handler used by primary to handle secondary masters during election""" """Handler used by primary to handle secondary masters during election"""
def connectionCompleted(self, conn, new=None): def connectionCompleted(self, conn):
if new is None:
super(ElectionHandler, self).connectionCompleted(conn) super(ElectionHandler, self).connectionCompleted(conn)
app = self.app app = self.app
conn.ask(Packets.RequestIdentification(NodeTypes.MASTER, conn.ask(Packets.RequestIdentification(NodeTypes.MASTER,
app.uuid, app.server, app.name, (), app.election)) app.uuid, app.server, app.name, app.election, (), ()))
def connectionFailed(self, conn): def connectionFailed(self, conn):
super(ElectionHandler, self).connectionFailed(conn) super(ElectionHandler, self).connectionFailed(conn)
self.connectionLost(conn) self.connectionLost(conn)
def _acceptIdentification(self, node, *args): def _acceptIdentification(self, node):
raise PrimaryElected(node) raise PrimaryElected(node)
def _connectionLost(self, *args): def _connectionLost(self, *args):
...@@ -66,7 +68,7 @@ class ElectionHandler(MasterHandler): ...@@ -66,7 +68,7 @@ class ElectionHandler(MasterHandler):
class PrimaryHandler(ElectionHandler): class PrimaryHandler(ElectionHandler):
"""Handler used by secondaries to handle primary master""" """Handler used by secondaries to handle primary master"""
def _acceptIdentification(self, node, num_partitions, num_replicas): def _acceptIdentification(self, node):
assert self.app.primary_master is node, (self.app.primary_master, node) assert self.app.primary_master is node, (self.app.primary_master, node)
def _connectionLost(self, conn): def _connectionLost(self, conn):
......
...@@ -26,10 +26,10 @@ from . import BaseServiceHandler ...@@ -26,10 +26,10 @@ from . import BaseServiceHandler
class StorageServiceHandler(BaseServiceHandler): class StorageServiceHandler(BaseServiceHandler):
""" Handler dedicated to storages during service state """ """ Handler dedicated to storages during service state """
def connectionCompleted(self, conn, new): def handlerSwitched(self, conn, new):
app = self.app app = self.app
if new: if new:
super(StorageServiceHandler, self).connectionCompleted(conn, new) super(StorageServiceHandler, self).handlerSwitched(conn, new)
node = app.nm.getByUUID(conn.getUUID()) node = app.nm.getByUUID(conn.getUUID())
if node.isRunning(): # node may be PENDING if node.isRunning(): # node may be PENDING
app.startStorage(node) app.startStorage(node)
......
...@@ -56,6 +56,10 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -56,6 +56,10 @@ class PartitionTable(neo.lib.pt.PartitionTable):
self._id += 1 self._id += 1
return self._id return self._id
def setReplicas(self, num_replicas):
assert num_replicas >= 0, num_replicas
self.nr = num_replicas
def make(self, node_list): def make(self, node_list):
"""Make a new partition table from scratch.""" """Make a new partition table from scratch."""
assert self._id is None and node_list, (self._id, node_list) assert self._id is None and node_list, (self._id, node_list)
...@@ -108,26 +112,19 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -108,26 +112,19 @@ class PartitionTable(neo.lib.pt.PartitionTable):
self.num_filled_rows = len(filter(None, self.partition_list)) self.num_filled_rows = len(filter(None, self.partition_list))
return change_list return change_list
def load(self, ptid, row_list, nm): def load(self, ptid, num_replicas, row_list, nm):
""" """
Load a partition table from a storage node during the recovery. Load a partition table from a storage node during the recovery.
Return the new storage nodes registered Return the new storage nodes registered
""" """
# check offsets
for offset, _row in row_list:
if offset >= self.getPartitions():
raise IndexError, offset
# store the partition table
self.clear()
self._id = ptid
new_nodes = [] new_nodes = []
for offset, row in row_list: def getByUUID(nid):
for uuid, state in row: node = nm.getByUUID(nid)
node = nm.getByUUID(uuid)
if node is None: if node is None:
node = nm.createStorage(uuid=uuid) node = nm.createStorage(uuid=nid)
new_nodes.append(node.asTuple()) new_nodes.append(node.asTuple())
self._setCell(offset, node, state) return node
self._load(ptid, num_replicas, row_list, getByUUID)
return new_nodes return new_nodes
def setUpToDate(self, node, offset): def setUpToDate(self, node, offset):
...@@ -166,15 +163,6 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -166,15 +163,6 @@ class PartitionTable(neo.lib.pt.PartitionTable):
return cell_list return cell_list
def addNodeList(self, node_list):
"""Add nodes"""
added_list = []
for node in node_list:
if node not in self.count_dict:
self.count_dict[node] = 0
added_list.append(node)
return added_list
def tweak(self, drop_list=()): def tweak(self, drop_list=()):
"""Optimize partition table """Optimize partition table
...@@ -183,7 +171,8 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -183,7 +171,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
few readable cells, some cells are instead marked as FEEDING. This is few readable cells, some cells are instead marked as FEEDING. This is
a preliminary step to drop these nodes, otherwise the partition table a preliminary step to drop these nodes, otherwise the partition table
could become non-operational. could become non-operational.
- Other nodes must have the same number of cells, off by 1. In fact, the code touching these cells is disabled (see NOTE below).
- Other nodes must have the same number of non-feeding cells, off by 1.
- When a transaction creates new objects (oids are roughly allocated - When a transaction creates new objects (oids are roughly allocated
sequentially), we expect better performance by maximizing the number sequentially), we expect better performance by maximizing the number
of involved nodes (i.e. parallelizing writes). of involved nodes (i.e. parallelizing writes).
...@@ -232,6 +221,8 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -232,6 +221,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
# Collect some data in a usable form for the rest of the method. # Collect some data in a usable form for the rest of the method.
node_list = {node: {} for node in self.count_dict node_list = {node: {} for node in self.count_dict
if node not in drop_list} if node not in drop_list}
if not node_list:
raise neo.lib.pt.PartitionTableException("Can't remove all nodes.")
drop_list = defaultdict(list) drop_list = defaultdict(list)
for offset, row in enumerate(self.partition_list): for offset, row in enumerate(self.partition_list):
for cell in row: for cell in row:
...@@ -420,6 +411,22 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -420,6 +411,22 @@ class PartitionTable(neo.lib.pt.PartitionTable):
outdated_list[offset] -= 1 outdated_list[offset] -= 1
for offset, cell in cell_dict.iteritems(): for offset, cell in cell_dict.iteritems():
discard_list[offset].append(cell) discard_list[offset].append(cell)
# NOTE: The following line disables the next 2 lines, which actually
# causes cells in drop_list to be discarded, now or later;
# drop_list could be renamed into ignore_list.
# 1. Deleting data partition per partition is a lot of work, so
# why ask nodes in drop_list to do that when the goal is
# simply to trash the whole underlying database?
# 2. By excluding nodes from a tweak, it becomes possible to have
# parts of the partition table that are tweaked differently.
# This may require to temporarily change the number of
# replicas for the part being tweaked. In the future, this
# number may be specified in the 'tweak' command, to avoid
# race conditions with setUpToDate().
# Overall, a common use case is when importing a ZODB to NEO,
# to keep the initial importing node up until the database is
# split and replicated to the final nodes.
drop_list = {}
for offset, drop_list in drop_list.iteritems(): for offset, drop_list in drop_list.iteritems():
discard_list[offset] += drop_list discard_list[offset] += drop_list
# We have sorted cells to discard in order to first deallocate nodes # We have sorted cells to discard in order to first deallocate nodes
......
...@@ -28,7 +28,7 @@ class RecoveryManager(MasterHandler): ...@@ -28,7 +28,7 @@ class RecoveryManager(MasterHandler):
def __init__(self, app): def __init__(self, app):
# The target node's uuid to request next. # The target node's uuid to request next.
self.target_ptid = None self.target_ptid = 0
self.ask_pt = [] self.ask_pt = []
self.backup_tid_dict = {} self.backup_tid_dict = {}
self.truncate_dict = {} self.truncate_dict = {}
...@@ -52,9 +52,8 @@ class RecoveryManager(MasterHandler): ...@@ -52,9 +52,8 @@ class RecoveryManager(MasterHandler):
""" """
logging.info('begin the recovery of the status') logging.info('begin the recovery of the status')
app = self.app app = self.app
pt = app.pt pt = app.pt = app.newPartitionTable()
app.changeClusterState(ClusterStates.RECOVERING) app.changeClusterState(ClusterStates.RECOVERING)
pt.clear()
self.try_secondary = True self.try_secondary = True
...@@ -113,7 +112,7 @@ class RecoveryManager(MasterHandler): ...@@ -113,7 +112,7 @@ class RecoveryManager(MasterHandler):
for node in node_list: for node in node_list:
conn = node.getConnection() conn = node.getConnection()
conn.send(truncate) conn.send(truncate)
self.connectionCompleted(conn, False) self.handlerSwitched(conn, False)
continue continue
node_list = pt.getConnectedNodeList() node_list = pt.getConnectedNodeList()
break break
...@@ -140,12 +139,12 @@ class RecoveryManager(MasterHandler): ...@@ -140,12 +139,12 @@ class RecoveryManager(MasterHandler):
logging.info('creating a new partition table') logging.info('creating a new partition table')
pt.make(node_list) pt.make(node_list)
self._notifyAdmins(Packets.SendPartitionTable( self._notifyAdmins(Packets.SendPartitionTable(
pt.getID(), pt.getRowList())) pt.getID(), pt.getReplicas(), pt.getRowList()))
else: else:
cell_list = pt.outdate() cell_list = pt.outdate()
if cell_list: if cell_list:
self._notifyAdmins(Packets.NotifyPartitionChanges( self._notifyAdmins(Packets.NotifyPartitionChanges(
pt.setNextID(), cell_list)) pt.setNextID(), pt.getReplicas(), cell_list))
if app.backup_tid: if app.backup_tid:
pt.setBackupTidDict(self.backup_tid_dict) pt.setBackupTidDict(self.backup_tid_dict)
app.backup_tid = pt.getBackupTid() app.backup_tid = pt.getBackupTid()
...@@ -175,16 +174,16 @@ class RecoveryManager(MasterHandler): ...@@ -175,16 +174,16 @@ class RecoveryManager(MasterHandler):
if node is None or node.getState() == new_state: if node is None or node.getState() == new_state:
return return
node.setState(new_state) node.setState(new_state)
# broadcast to all so that admin nodes gets informed
self.app.broadcastNodesInformation([node]) self.app.broadcastNodesInformation([node])
def connectionCompleted(self, conn, new): def handlerSwitched(self, conn, new):
# ask the last IDs to perform the recovery # ask the last IDs to perform the recovery
conn.ask(Packets.AskRecovery()) conn.ask(Packets.AskRecovery())
def answerRecovery(self, conn, ptid, backup_tid, truncate_tid): def answerRecovery(self, conn, ptid, backup_tid, truncate_tid):
uuid = conn.getUUID() uuid = conn.getUUID()
if self.target_ptid <= ptid: # ptid is None if the node has an empty partition table.
if ptid and self.target_ptid <= ptid:
# Maybe a newer partition table. # Maybe a newer partition table.
if self.target_ptid == ptid and self.ask_pt: if self.target_ptid == ptid and self.ask_pt:
# Another node is already asked. # Another node is already asked.
...@@ -197,17 +196,14 @@ class RecoveryManager(MasterHandler): ...@@ -197,17 +196,14 @@ class RecoveryManager(MasterHandler):
self.backup_tid_dict[uuid] = backup_tid self.backup_tid_dict[uuid] = backup_tid
self.truncate_dict[uuid] = truncate_tid self.truncate_dict[uuid] = truncate_tid
def answerPartitionTable(self, conn, ptid, row_list): def answerPartitionTable(self, conn, ptid, num_replicas, row_list):
# If this is not from a target node, ignore it. # If this is not from a target node, ignore it.
if ptid == self.target_ptid: if ptid == self.target_ptid:
app = self.app app = self.app
try: new_nodes = app.pt.load(ptid, num_replicas, row_list, app.nm)
new_nodes = app.pt.load(ptid, row_list, app.nm)
except IndexError:
raise ProtocolError('Invalid offset')
self._notifyAdmins( self._notifyAdmins(
Packets.NotifyNodeInformation(monotonic_time(), new_nodes), Packets.NotifyNodeInformation(monotonic_time(), new_nodes),
Packets.SendPartitionTable(ptid, row_list)) Packets.SendPartitionTable(ptid, num_replicas, row_list))
self.ask_pt = () self.ask_pt = ()
uuid = conn.getUUID() uuid = conn.getUUID()
app.backup_tid = self.backup_tid_dict[uuid] app.backup_tid = self.backup_tid_dict[uuid]
......
...@@ -16,9 +16,11 @@ ...@@ -16,9 +16,11 @@
import sys import sys
from .neoctl import NeoCTL, NotReadyException from .neoctl import NeoCTL, NotReadyException
from neo.lib.node import NodeManager
from neo.lib.pt import PartitionTable
from neo.lib.util import p64, u64, tidFromTime, timeStringFromTID from neo.lib.util import p64, u64, tidFromTime, timeStringFromTID
from neo.lib.protocol import uuid_str, formatNodeList, \ from neo.lib.protocol import uuid_str, formatNodeList, \
ClusterStates, NodeTypes, UUID_NAMESPACES, ZERO_TID ClusterStates, NodeStates, NodeTypes, UUID_NAMESPACES, ZERO_TID
action_dict = { action_dict = {
'print': { 'print': {
...@@ -30,6 +32,7 @@ action_dict = { ...@@ -30,6 +32,7 @@ action_dict = {
}, },
'set': { 'set': {
'cluster': 'setClusterState', 'cluster': 'setClusterState',
'replicas': 'setNumReplicas',
}, },
'check': 'checkReplicas', 'check': 'checkReplicas',
'start': 'startCluster', 'start': 'startCluster',
...@@ -47,6 +50,11 @@ uuid_int = (lambda ns: lambda uuid: ...@@ -47,6 +50,11 @@ uuid_int = (lambda ns: lambda uuid:
(ns[uuid[0]] << 24) + int(uuid[1:]) (ns[uuid[0]] << 24) + int(uuid[1:])
)({str(k)[0]: v for k, v in UUID_NAMESPACES.iteritems()}) )({str(k)[0]: v for k, v in UUID_NAMESPACES.iteritems()})
class dummy_app:
id_timestamp = uuid = 0
class TerminalNeoCTL(object): class TerminalNeoCTL(object):
def __init__(self, *args, **kw): def __init__(self, *args, **kw):
self.neoctl = NeoCTL(*args, **kw) self.neoctl = NeoCTL(*args, **kw)
...@@ -68,6 +76,15 @@ class TerminalNeoCTL(object): ...@@ -68,6 +76,15 @@ class TerminalNeoCTL(object):
asNode = staticmethod(uuid_int) asNode = staticmethod(uuid_int)
def formatPartitionTable(self, row_list):
nm = NodeManager()
nm.update(dummy_app, 1,
self.neoctl.getNodeList(node_type=NodeTypes.STORAGE))
pt = object.__new__(PartitionTable)
pt._load(None, None, row_list, nm.getByUUID)
pt.addNodeList(nm.getByStateList(NodeStates.RUNNING))
return '\n'.join(line[4:] for line in pt._format())
def formatRowList(self, row_list): def formatRowList(self, row_list):
return '\n'.join('%03d |%s' % (offset, return '\n'.join('%03d |%s' % (offset,
''.join(' %s - %s |' % (uuid_str(uuid), state) ''.join(' %s - %s |' % (uuid_str(uuid), state)
...@@ -106,10 +123,12 @@ class TerminalNeoCTL(object): ...@@ -106,10 +123,12 @@ class TerminalNeoCTL(object):
max_offset = int(max_offset) max_offset = int(max_offset)
if node is not None: if node is not None:
node = self.asNode(node) node = self.asNode(node)
ptid, row_list = self.neoctl.getPartitionRowList( ptid, num_replicas, row_list = self.neoctl.getPartitionRowList(
min_offset=min_offset, max_offset=max_offset, node=node) min_offset=min_offset, max_offset=max_offset, node=node)
# TODO: return ptid return '# ptid: %s, replicas: %s\n%s' % (ptid, num_replicas,
return self.formatRowList(row_list) self.formatRowList(enumerate(row_list, min_offset))
if min_offset or max_offset else
self.formatPartitionTable(row_list))
def getNodeList(self, params): def getNodeList(self, params):
""" """
...@@ -141,6 +160,18 @@ class TerminalNeoCTL(object): ...@@ -141,6 +160,18 @@ class TerminalNeoCTL(object):
assert len(params) == 1 assert len(params) == 1
return self.neoctl.setClusterState(self.asClusterState(params[0])) return self.neoctl.setClusterState(self.asClusterState(params[0]))
def setNumReplicas(self, params):
"""
Set number of replicas.
Parameters: nr
nr: positive number (0 means no redundancy)
"""
assert len(params) == 1
nr = int(params[0])
if nr < 0:
sys.exit('invalid number of replicas')
return self.neoctl.setNumReplicas(nr)
def startCluster(self, params): def startCluster(self, params):
""" """
Starts cluster operation after a startup. Starts cluster operation after a startup.
...@@ -168,10 +199,18 @@ class TerminalNeoCTL(object): ...@@ -168,10 +199,18 @@ class TerminalNeoCTL(object):
def tweakPartitionTable(self, params): def tweakPartitionTable(self, params):
""" """
Optimize partition table. Optimize partition table.
No partition will be assigned to specified storage nodes. No change is done to the specified/down storage nodes and they don't
Parameters: [node [...]] count as replicas. The purpose of listing nodes is usually to drop
them once the data is replicated to other nodes.
Parameters: [-n] [node [...]]
-n: dry run
""" """
return self.neoctl.tweakPartitionTable(map(self.asNode, params)) dry_run = params[0] == '-n'
changed, row_list = self.neoctl.tweakPartitionTable(
map(self.asNode, params[dry_run:]), dry_run)
if changed:
return self.formatPartitionTable(row_list)
return 'No change done.'
def killNode(self, params): def killNode(self, params):
""" """
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import ErrorCodes, Packets from neo.lib.protocol import ErrorCodes, Packets
...@@ -44,8 +45,8 @@ class CommandEventHandler(EventHandler): ...@@ -44,8 +45,8 @@ class CommandEventHandler(EventHandler):
def ack(self, conn, msg): def ack(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.ACK, msg)) self.__respond((Packets.Error, ErrorCodes.ACK, msg))
def protocolError(self, conn, msg): def denied(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.PROTOCOL_ERROR, msg)) sys.exit(msg)
def notReady(self, conn, msg): def notReady(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.NOT_READY, msg)) self.__respond((Packets.Error, ErrorCodes.NOT_READY, msg))
...@@ -62,3 +63,4 @@ class CommandEventHandler(EventHandler): ...@@ -62,3 +63,4 @@ class CommandEventHandler(EventHandler):
answerLastIDs = __answer(Packets.AnswerLastIDs) answerLastIDs = __answer(Packets.AnswerLastIDs)
answerLastTransaction = __answer(Packets.AnswerLastTransaction) answerLastTransaction = __answer(Packets.AnswerLastTransaction)
answerRecovery = __answer(Packets.AnswerRecovery) answerRecovery = __answer(Packets.AnswerRecovery)
answerTweakPartitionTable = __answer(Packets.AnswerTweakPartitionTable)
...@@ -91,8 +91,14 @@ class NeoCTL(BaseApplication): ...@@ -91,8 +91,14 @@ class NeoCTL(BaseApplication):
raise RuntimeError(response) raise RuntimeError(response)
return response[2] return response[2]
def tweakPartitionTable(self, uuid_list=()): def tweakPartitionTable(self, uuid_list=(), dry_run=False):
response = self.__ask(Packets.TweakPartitionTable(uuid_list)) response = self.__ask(Packets.TweakPartitionTable(dry_run, uuid_list))
if response[0] != Packets.AnswerTweakPartitionTable:
raise RuntimeError(response)
return response[1:]
def setNumReplicas(self, nr):
response = self.__ask(Packets.SetNumReplicas(nr))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK: if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response) raise RuntimeError(response)
return response[2] return response[2]
...@@ -163,7 +169,7 @@ class NeoCTL(BaseApplication): ...@@ -163,7 +169,7 @@ class NeoCTL(BaseApplication):
response = self.__ask(packet) response = self.__ask(packet)
if response[0] != Packets.AnswerPartitionList: if response[0] != Packets.AnswerPartitionList:
raise RuntimeError(response) raise RuntimeError(response)
return response[1:3] # ptid, row_list return response[1:]
def startCluster(self): def startCluster(self):
""" """
......
...@@ -51,13 +51,11 @@ UNIT_TEST_MODULES = [ ...@@ -51,13 +51,11 @@ UNIT_TEST_MODULES = [
'neo.tests.master.testClientHandler', 'neo.tests.master.testClientHandler',
'neo.tests.master.testMasterApp', 'neo.tests.master.testMasterApp',
'neo.tests.master.testMasterPT', 'neo.tests.master.testMasterPT',
'neo.tests.master.testRecovery',
'neo.tests.master.testStorageHandler', 'neo.tests.master.testStorageHandler',
'neo.tests.master.testTransactions', 'neo.tests.master.testTransactions',
# storage application # storage application
'neo.tests.storage.testClientHandler', 'neo.tests.storage.testClientHandler',
'neo.tests.storage.testMasterHandler', 'neo.tests.storage.testMasterHandler',
'neo.tests.storage.testStorageApp',
'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'), 'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
'neo.tests.storage.testTransactions', 'neo.tests.storage.testTransactions',
# client application # client application
......
...@@ -63,11 +63,16 @@ class Application(BaseApplication): ...@@ -63,11 +63,16 @@ class Application(BaseApplication):
help="do not delete data of discarded cells, which is useful for" help="do not delete data of discarded cells, which is useful for"
" big databases because the current implementation is" " big databases because the current implementation is"
" inefficient (this option should disappear in the future)") " inefficient (this option should disappear in the future)")
_.bool('new-nid',
help="request a new NID from a cluster that is already"
" operational, update the database with the new NID and exit,"
" which makes easier to quickly set up a replica by copying"
" the database of another node while it was stopped")
_ = parser.group('database creation') _ = parser.group('database creation')
_.int('u', 'uuid', _.int('i', 'nid',
help="specify an UUID to use for this process. Previously" help="specify an NID to use for this process. Previously"
" assigned UUID takes precedence (i.e. you should" " assigned NID takes precedence (i.e. you should"
" always use reset with this switch)") " always use reset with this switch)")
_('e', 'engine', help="database engine (MySQL only)") _('e', 'engine', help="database engine (MySQL only)")
_.bool('dedup', _.bool('dedup',
...@@ -118,9 +123,15 @@ class Application(BaseApplication): ...@@ -118,9 +123,15 @@ class Application(BaseApplication):
self.loadConfiguration() self.loadConfiguration()
self.devpath = self.dm.getTopologyPath() self.devpath = self.dm.getTopologyPath()
# force node uuid from command line argument, for testing purpose only if config.get('new_nid'):
if 'uuid' in config: self.new_nid = [x[0] for x in self.dm.iterAssignedCells()]
self.uuid = config['uuid'] if not self.new_nid:
sys.exit('database is empty')
self.uuid = None
else:
self.new_nid = ()
if 'nid' in config: # for testing purpose only
self.uuid = config['nid']
logging.node(self.name, self.uuid) logging.node(self.name, self.uuid)
registerLiveDebugger(on_log=self.log) registerLiveDebugger(on_log=self.log)
...@@ -158,36 +169,27 @@ class Application(BaseApplication): ...@@ -158,36 +169,27 @@ class Application(BaseApplication):
# load configuration # load configuration
self.uuid = dm.getUUID() self.uuid = dm.getUUID()
logging.node(self.name, self.uuid) logging.node(self.name, self.uuid)
num_partitions = dm.getNumPartitions()
num_replicas = dm.getNumReplicas()
ptid = dm.getPTID()
# check partition table configuration
if num_partitions is not None and num_replicas is not None:
if num_partitions <= 0:
raise RuntimeError, 'partitions must be more than zero'
# create a partition table
self.pt = PartitionTable(num_partitions, num_replicas)
logging.info('Configuration loaded:') logging.info('Configuration loaded:')
logging.info('PTID : %s', dump(ptid)) logging.info('PTID : %s', dump(dm.getPTID()))
logging.info('Name : %s', self.name) logging.info('Name : %s', self.name)
logging.info('Partitions: %s', num_partitions)
logging.info('Replicas : %s', num_replicas)
def loadPartitionTable(self): def loadPartitionTable(self):
"""Load a partition table from the database.""" """Load a partition table from the database."""
self.pt.clear()
ptid = self.dm.getPTID() ptid = self.dm.getPTID()
if ptid is None: if ptid is None:
self.pt = PartitionTable(0, 0)
return return
cell_list = [] row_list = []
for offset, uuid, state in self.dm.getPartitionTable(): for offset, uuid, state in self.dm.getPartitionTable():
while len(row_list) <= offset:
row_list.append([])
# register unknown nodes # register unknown nodes
if self.nm.getByUUID(uuid) is None: if self.nm.getByUUID(uuid) is None:
self.nm.createStorage(uuid=uuid) self.nm.createStorage(uuid=uuid)
cell_list.append((offset, uuid, CellStates[state])) row_list[offset].append((uuid, CellStates[state]))
self.pt.update(ptid, cell_list, self.nm) self.pt = object.__new__(PartitionTable)
self.pt.load(ptid, self.dm.getNumReplicas(), row_list, self.nm)
def run(self): def run(self):
try: try:
...@@ -247,28 +249,15 @@ class Application(BaseApplication): ...@@ -247,28 +249,15 @@ class Application(BaseApplication):
Note that I do not accept any connection from non-master nodes Note that I do not accept any connection from non-master nodes
at this stage.""" at this stage."""
pt = self.pt
# search, find, connect and identify to the primary master # search, find, connect and identify to the primary master
bootstrap = BootstrapManager(self, NodeTypes.STORAGE, self.server, bootstrap = BootstrapManager(self, NodeTypes.STORAGE,
self.devpath) None if self.new_nid else self.server,
self.master_node, self.master_conn, num_partitions, num_replicas = \ self.devpath, self.new_nid)
bootstrap.getPrimaryConnection() self.master_node, self.master_conn = bootstrap.getPrimaryConnection()
self.dm.setUUID(self.uuid) self.dm.setUUID(self.uuid)
# Reload a partition table from the database. This is necessary # Reload a partition table from the database,
# when a previous primary master died while sending a partition # in case that we're in RECOVERING phase.
# table, because the table might be incomplete.
if pt is not None:
self.loadPartitionTable()
if num_partitions != pt.getPartitions():
raise RuntimeError('the number of partitions is inconsistent')
if pt is None or pt.getReplicas() != num_replicas:
# changing number of replicas is not an issue
self.dm.setNumPartitions(num_partitions)
self.dm.setNumReplicas(num_replicas)
self.pt = PartitionTable(num_partitions, num_replicas)
self.loadPartitionTable() self.loadPartitionTable()
def initialize(self): def initialize(self):
......
...@@ -51,7 +51,7 @@ class Checker(object): ...@@ -51,7 +51,7 @@ class Checker(object):
else: else:
conn = ClientConnection(app, StorageOperationHandler(app), node) conn = ClientConnection(app, StorageOperationHandler(app), node)
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE, conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
uuid, app.server, name, (), app.id_timestamp)) uuid, app.server, name, app.id_timestamp, (), ()))
self.conn_dict[conn] = node.isIdentified() self.conn_dict[conn] = node.isIdentified()
conn_set = set(self.conn_dict) conn_set = set(self.conn_dict)
conn_set.discard(None) conn_set.discard(None)
......
...@@ -216,7 +216,7 @@ class ZODB(object): ...@@ -216,7 +216,7 @@ class ZODB(object):
self._connect = _connect self._connect = _connect
config = section.config config = section.config
if 'read_only' in config.getSectionAttributes(): if 'read_only' in config.getSectionAttributes():
has_next_oid = config.read_only = hasattr(self, 'next_oid') has_next_oid = config.read_only = 'next_oid' in self.__dict__
if not has_next_oid: if not has_next_oid:
import gc import gc
# This will reopen read-only as soon as we know the last oid. # This will reopen read-only as soon as we know the last oid.
...@@ -378,8 +378,8 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -378,8 +378,8 @@ class ImporterDatabaseManager(DatabaseManager):
conf = self._conf conf = self._conf
db = self.db = buildDatabaseManager(conf['adapter'], db = self.db = buildDatabaseManager(conf['adapter'],
(conf['database'], conf.get('engine'), conf['wait'])) (conf['database'], conf.get('engine'), conf['wait']))
for x in """getConfiguration _setConfiguration setNumPartitions for x in """getConfiguration _setConfiguration _getMaxPartition
query erase getPartitionTable _iterAssignedCells query erase getPartitionTable iterAssignedCells
updateCellTID getUnfinishedTIDDict dropUnfinishedData updateCellTID getUnfinishedTIDDict dropUnfinishedData
abortTransaction storeTransaction lockTransaction abortTransaction storeTransaction lockTransaction
loadData storeData getOrphanList _pruneData deferCommit loadData storeData getOrphanList _pruneData deferCommit
...@@ -396,9 +396,16 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -396,9 +396,16 @@ class ImporterDatabaseManager(DatabaseManager):
self._writeback.committed() self._writeback.committed()
self.commit = db.commit = commit self.commit = db.commit = commit
def _updateReadable(self): def _updateReadable(*_):
raise AssertionError raise AssertionError
def setUUID(self, nid):
old_nid = self.getUUID()
if old_nid:
assert old_nid == nid, (old_nid, nid)
else:
self.setConfiguration('nid', str(nid))
def changePartitionTable(self, *args, **kw): def changePartitionTable(self, *args, **kw):
self.db.changePartitionTable(*args, **kw) self.db.changePartitionTable(*args, **kw)
if self._writeback: if self._writeback:
...@@ -413,7 +420,7 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -413,7 +420,7 @@ class ImporterDatabaseManager(DatabaseManager):
if self._writeback: if self._writeback:
self._writeback.close() self._writeback.close()
self.db.close() self.db.close()
if isinstance(self.zodb, list): # _setup called if isinstance(self.zodb, tuple): # _setup called
for zodb in self.zodb: for zodb in self.zodb:
zodb.close() zodb.close()
...@@ -436,8 +443,12 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -436,8 +443,12 @@ class ImporterDatabaseManager(DatabaseManager):
self.zodb_ltid = max(x.ltid for x in self.zodb) self.zodb_ltid = max(x.ltid for x in self.zodb)
zodb = self.zodb[-1] zodb = self.zodb[-1]
self.zodb_loid = zodb.shift_oid + zodb.next_oid - 1 self.zodb_loid = zodb.shift_oid + zodb.next_oid - 1
self.zodb_tid = self.db.getLastTID(self.zodb_ltid) or 0 self.zodb_tid = self._getMaxPartition() is not None and \
if callable(self._import): self.db.getLastTID(self.zodb_ltid) or 0
if callable(self._import): # XXX: why ?
if self.zodb_tid == self.zodb_ltid:
self._finished()
else:
self._import = self._import() self._import = self._import()
def doOperation(self, app): def doOperation(self, app):
...@@ -498,12 +509,19 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -498,12 +509,19 @@ class ImporterDatabaseManager(DatabaseManager):
if process: if process:
process.join() process.join()
self.commit() self.commit()
self._finished()
def _finished(self):
logging.warning("All data are imported. You should change" logging.warning("All data are imported. You should change"
" your configuration to use the native backend and restart.") " your configuration to use the native backend and restart.")
self._import = None self._import = None
for x in """getObject getReplicationTIDList getReplicationObjectList for x in """getObject getReplicationTIDList getReplicationObjectList
_fetchObject
""".split(): """.split():
setattr(self, x, getattr(self.db, x)) setattr(self, x, getattr(self.db, x))
for zodb in self.zodb:
zodb.close()
self.zodb = None
def _iter_zodb(self, zodb_list): def _iter_zodb(self, zodb_list):
util.setproctitle('neostorage: import') util.setproctitle('neostorage: import')
...@@ -667,6 +685,9 @@ class ImporterDatabaseManager(DatabaseManager): ...@@ -667,6 +685,9 @@ class ImporterDatabaseManager(DatabaseManager):
length, partition) length, partition)
return r return r
def _fetchObject(*_):
raise AssertionError
def getObjectHistory(self, *args, **kw): def getObjectHistory(self, *args, **kw):
raise BackendNotImplemented(self.getObjectHistory) raise BackendNotImplemented(self.getObjectHistory)
...@@ -678,6 +699,7 @@ class WriteBack(object): ...@@ -678,6 +699,7 @@ class WriteBack(object):
_changed = False _changed = False
_process = None _process = None
chunk_size = 100
def __init__(self, db, storage): def __init__(self, db, storage):
self._db = db self._db = db
...@@ -705,7 +727,7 @@ class WriteBack(object): ...@@ -705,7 +727,7 @@ class WriteBack(object):
self._event = Event() self._event = Event()
self._idle = Event() self._idle = Event()
self._stop = Event() self._stop = Event()
self._np = self._db.getNumPartitions() self._np = 1 + self._db._getMaxPartition()
self._db = cPickle.dumps(self._db, 2) self._db = cPickle.dumps(self._db, 2)
self._process = Process(target=self._run) self._process = Process(target=self._run)
self._process.daemon = True self._process.daemon = True
...@@ -737,7 +759,6 @@ class WriteBack(object): ...@@ -737,7 +759,6 @@ class WriteBack(object):
def iterator(self): def iterator(self):
db = self._db db = self._db
np = self._np np = self._np
chunk_size = max(2, 1000 // np)
offset_list = xrange(np) offset_list = xrange(np)
while 1: while 1:
with db: with db:
...@@ -748,23 +769,26 @@ class WriteBack(object): ...@@ -748,23 +769,26 @@ class WriteBack(object):
if np == len(db._readable_set): if np == len(db._readable_set):
while 1: while 1:
tid_list = [] tid_list = []
loop = False max_tid = MAX_TID
for offset in offset_list: for offset in offset_list:
x = db.getReplicationTIDList( x = db.getReplicationTIDList(
self.min_tid, MAX_TID, chunk_size, offset) self.min_tid, max_tid, self.chunk_size, offset)
tid_list += x tid_list += x
if len(x) == chunk_size: if len(x) == self.chunk_size:
loop = True max_tid = x[-1]
if tid_list: if not tid_list:
break
tid_list.sort() tid_list.sort()
for tid in tid_list: for tid in tid_list:
if self._stop.is_set(): if self._stop.is_set():
return return
yield TransactionRecord(db, tid) yield TransactionRecord(db, tid)
if tid == max_tid:
break
else:
self.min_tid = util.add64(tid, 1) self.min_tid = util.add64(tid, 1)
if loop:
continue
break break
self.min_tid = util.add64(tid, 1)
if not self._event.is_set(): if not self._event.is_set():
self._idle.set() self._idle.set()
self._event.wait() self._event.wait()
......
...@@ -102,25 +102,24 @@ class DatabaseManager(object): ...@@ -102,25 +102,24 @@ class DatabaseManager(object):
finally: finally:
db.close() db.close()
_cached_attr_list = (
'_readable_set', '_getPartition', '_getReadablePartition')
def __getattr__(self, attr): def __getattr__(self, attr):
if attr in ('_readable_set', '_getPartition', '_getReadablePartition'): if attr in self._cached_attr_list:
self._updateReadable() self._updateReadable()
return self.__getattribute__(attr) return self.__getattribute__(attr)
def _partitionTableChanged(self):
try:
del (self._readable_set,
self._getPartition,
self._getReadablePartition)
except AttributeError:
pass
def __enter__(self): def __enter__(self):
assert not self.LOCK, "not a secondary connection" assert not self.LOCK, "not a secondary connection"
# XXX: All config caching should be done in this class, # XXX: All config caching should be done in this class,
# rather than in backend classes. # rather than in backend classes.
self._config.clear() self._config.clear()
self._partitionTableChanged() try:
for attr in self._cached_attr_list:
delattr(self, attr)
except AttributeError:
pass
def __exit__(self, t, v, tb): def __exit__(self, t, v, tb):
if v is None: if v is None:
...@@ -180,6 +179,10 @@ class DatabaseManager(object): ...@@ -180,6 +179,10 @@ class DatabaseManager(object):
def erase(self): def erase(self):
"""""" """"""
def restore(self, dump): # for tests
self.erase()
self._restore(dump)
def _setup(self, dedup=False): def _setup(self, dedup=False):
"""To be overridden by the backend to set up a database """To be overridden by the backend to set up a database
...@@ -271,6 +274,18 @@ class DatabaseManager(object): ...@@ -271,6 +274,18 @@ class DatabaseManager(object):
def _setConfiguration(self, key, value): def _setConfiguration(self, key, value):
"""""" """"""
def _changePartitionTable(self, cell_list, reset=False):
"""Change a part of a partition table. The list of cells is
a tuple of tuples, each of which consists of an offset (row ID),
the NID of a storage node, and a cell state. If reset is True,
existing data is first thrown away.
"""
def _getPartitionTable(self):
"""Return a whole partition table as a sequence of rows. Each row
is again a tuple of an offset (row ID), the NID of a storage
node, and a cell state."""
def getUUID(self): def getUUID(self):
""" """
Load a NID from a database. Load a NID from a database.
...@@ -279,27 +294,20 @@ class DatabaseManager(object): ...@@ -279,27 +294,20 @@ class DatabaseManager(object):
if nid is not None: if nid is not None:
return int(nid) return int(nid)
@requires(_changePartitionTable, _getPartitionTable)
def setUUID(self, nid): def setUUID(self, nid):
""" """
Store a NID into a database. Store a NID into a database.
""" """
old_nid = self.getUUID()
if nid != old_nid:
if old_nid:
self._changePartitionTable((offset, x, tid)
for offset, x, tid in self._getPartitionTable()
if x == old_nid
for x, tid in ((x, None), (nid, tid)))
self.setConfiguration('nid', str(nid)) self.setConfiguration('nid', str(nid))
def getNumPartitions(self):
"""
Load the number of partitions from a database.
"""
n = self.getConfiguration('partitions')
if n is not None:
return int(n)
def setNumPartitions(self, num_partitions):
"""
Store the number of partitions into a database.
"""
self.setConfiguration('partitions', num_partitions)
self._partitionTableChanged()
def getNumReplicas(self): def getNumReplicas(self):
""" """
Load the number of replicas from a database. Load the number of replicas from a database.
...@@ -308,12 +316,6 @@ class DatabaseManager(object): ...@@ -308,12 +316,6 @@ class DatabaseManager(object):
if n is not None: if n is not None:
return int(n) return int(n)
def setNumReplicas(self, num_replicas):
"""
Store the number of replicas into a database.
"""
self.setConfiguration('replicas', num_replicas)
def getName(self): def getName(self):
""" """
Load a name from a database. Load a name from a database.
...@@ -374,8 +376,9 @@ class DatabaseManager(object): ...@@ -374,8 +376,9 @@ class DatabaseManager(object):
tids are in unpacked format. tids are in unpacked format.
""" """
if self.getNumPartitions(): x = self._readable_set
return max(map(self._getLastTID, self._readable_set)) if x:
return max(self._getLastTID(x, max_tid) for x in x)
def _getLastIDs(self, partition): def _getLastIDs(self, partition):
"""Return max(tid) & max(oid) for objects of given partition """Return max(tid) & max(oid) for objects of given partition
...@@ -395,7 +398,7 @@ class DatabaseManager(object): ...@@ -395,7 +398,7 @@ class DatabaseManager(object):
x = self._readable_set x = self._readable_set
if x: if x:
tid, oid = zip(*map(self._getLastIDs, x)) tid, oid = zip(*map(self._getLastIDs, x))
tid = max(self.getLastTID(None), max(tid)) tid = max(self.getLastTID(), max(tid))
oid = max(oid) oid = max(oid)
return (None if tid is None else util.p64(tid), return (None if tid is None else util.p64(tid),
None if oid is None else util.p64(oid)) None if oid is None else util.p64(oid))
...@@ -511,13 +514,8 @@ class DatabaseManager(object): ...@@ -511,13 +514,8 @@ class DatabaseManager(object):
return (util.p64(serial), compression, checksum, data, return (util.p64(serial), compression, checksum, data,
None if data_serial is None else util.p64(data_serial)) None if data_serial is None else util.p64(data_serial))
def _getPartitionTable(self):
"""Return a whole partition table as a sequence of rows. Each row
is again a tuple of an offset (row ID), the NID of a storage
node, and a cell state."""
@requires(_getPartitionTable) @requires(_getPartitionTable)
def _iterAssignedCells(self): def iterAssignedCells(self):
my_nid = self.getUUID() my_nid = self.getUUID()
return ((offset, tid) for offset, nid, tid in self._getPartitionTable() return ((offset, tid) for offset, nid, tid in self._getPartitionTable()
if my_nid == nid) if my_nid == nid)
...@@ -537,24 +535,19 @@ class DatabaseManager(object): ...@@ -537,24 +535,19 @@ class DatabaseManager(object):
finally: finally:
readable_set.remove(offset) readable_set.remove(offset)
def _changePartitionTable(self, cell_list, reset=False): def _getDataLastId(self, partition):
"""Change a part of a partition table. The list of cells is """
a tuple of tuples, each of which consists of an offset (row ID),
the NID of a storage node, and a cell state. If reset is True,
existing data is first thrown away.
""" """
def _getDataLastId(self, partition): def _getMaxPartition(self):
""" """
""" """
@requires(_getDataLastId) @requires(_getDataLastId, _getMaxPartition)
def _updateReadable(self): def _updateReadable(self, reset=True):
try: if reset:
readable_set = self.__dict__['_readable_set']
except KeyError:
readable_set = self._readable_set = set() readable_set = self._readable_set = set()
np = self.getNumPartitions() np = 1 + self._getMaxPartition()
def _getPartition(x, np=np): def _getPartition(x, np=np):
return x % np return x % np
def _getReadablePartition(x, np=np, r=readable_set): def _getReadablePartition(x, np=np, r=readable_set):
...@@ -569,14 +562,15 @@ class DatabaseManager(object): ...@@ -569,14 +562,15 @@ class DatabaseManager(object):
i = self._getDataLastId(p) i = self._getDataLastId(p)
d.append(p << 48 if i is None else i + 1) d.append(p << 48 if i is None else i + 1)
else: else:
readable_set = self._readable_set
readable_set.clear() readable_set.clear()
readable_set.update(x[0] for x in self._iterAssignedCells() readable_set.update(x[0] for x in self.iterAssignedCells()
if -x[1] in READABLE) if -x[1] in READABLE)
@requires(_changePartitionTable, _getLastIDs, _getLastTID) @requires(_changePartitionTable, _getLastIDs, _getLastTID)
def changePartitionTable(self, ptid, cell_list, reset=False): def changePartitionTable(self, ptid, num_replicas, cell_list, reset=False):
my_nid = self.getUUID() my_nid = self.getUUID()
pt = dict(self._iterAssignedCells()) pt = dict(self.iterAssignedCells())
# In backup mode, the last transactions of a readable cell may be # In backup mode, the last transactions of a readable cell may be
# incomplete. # incomplete.
backup_tid = self.getBackupTID() backup_tid = self.getBackupTID()
...@@ -595,13 +589,14 @@ class DatabaseManager(object): ...@@ -595,13 +589,14 @@ class DatabaseManager(object):
outofdate_tid(offset))) outofdate_tid(offset)))
for offset, nid, state in cell_list] for offset, nid, state in cell_list]
self._changePartitionTable(cell_list, reset) self._changePartitionTable(cell_list, reset)
self._updateReadable() self._updateReadable(reset)
assert isinstance(ptid, (int, long)), ptid assert isinstance(ptid, (int, long)), ptid
self._setConfiguration('ptid', str(ptid)) self._setConfiguration('ptid', str(ptid))
self._setConfiguration('replicas', str(num_replicas))
@requires(_changePartitionTable) @requires(_changePartitionTable)
def updateCellTID(self, partition, tid): def updateCellTID(self, partition, tid):
t, = (t for p, t in self._iterAssignedCells() if p == partition) t, = (t for p, t in self.iterAssignedCells() if p == partition)
if t < 0: if t < 0:
return return
tid = util.u64(tid) tid = util.u64(tid)
...@@ -623,7 +618,7 @@ class DatabaseManager(object): ...@@ -623,7 +618,7 @@ class DatabaseManager(object):
next_tid = util.u64(backup_tid) next_tid = util.u64(backup_tid)
if next_tid: if next_tid:
next_tid += 1 next_tid += 1
for offset, tid in self._iterAssignedCells(): for offset, tid in self.iterAssignedCells():
if tid >= 0: # OUT_OF_DATE if tid >= 0: # OUT_OF_DATE
yield offset, p64(tid and tid + 1) yield offset, p64(tid and tid + 1)
elif -tid in READABLE: elif -tid in READABLE:
...@@ -865,7 +860,7 @@ class DatabaseManager(object): ...@@ -865,7 +860,7 @@ class DatabaseManager(object):
assert tid, tid assert tid, tid
cell_list = [] cell_list = []
my_nid = self.getUUID() my_nid = self.getUUID()
for partition, state in self._iterAssignedCells(): for partition, state in self.iterAssignedCells():
if state > tid: if state > tid:
cell_list.append((partition, my_nid, tid)) cell_list.append((partition, my_nid, tid))
self._deleteRange(partition, tid) self._deleteRange(partition, tid)
......
...@@ -117,7 +117,9 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -117,7 +117,9 @@ class MySQLDatabaseManager(DatabaseManager):
return super(MySQLDatabaseManager, self).__getattr__(attr) return super(MySQLDatabaseManager, self).__getattr__(attr)
def _tryConnect(self): def _tryConnect(self):
kwd = {'db' : self.db, 'user' : self.user} kwd = {'db' : self.db}
if self.user:
kwd['user'] = self.user
if self.passwd is not None: if self.passwd is not None:
kwd['passwd'] = self.passwd kwd['passwd'] = self.passwd
if self.socket: if self.socket:
...@@ -198,6 +200,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -198,6 +200,7 @@ class MySQLDatabaseManager(DatabaseManager):
self._connect() self._connect()
def _commit(self): def _commit(self):
# XXX: Should we translate OperationalError into MysqlError ?
self.conn.commit() self.conn.commit()
self._active = 0 self._active = 0
...@@ -270,6 +273,12 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -270,6 +273,12 @@ class MySQLDatabaseManager(DatabaseManager):
" ELSE 1-state" " ELSE 1-state"
" END as tid") " END as tid")
# Let's wait for a more important change to clean up,
# so that users can still downgrade.
if 0:
def _migrate4(self, schema_dict):
self._setConfiguration('partitions', None)
def _setup(self, dedup=False): def _setup(self, dedup=False):
self._config.clear() self._config.clear()
q = self.query q = self.query
...@@ -295,6 +304,12 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -295,6 +304,12 @@ class MySQLDatabaseManager(DatabaseManager):
p += """ PARTITION BY LIST (`partition`) ( p += """ PARTITION BY LIST (`partition`) (
PARTITION dummy VALUES IN (NULL))""" PARTITION dummy VALUES IN (NULL))"""
if engine == "RocksDB":
cf = lambda name, rev=False: " COMMENT '%scf_neo_%s'" % (
'rev:' if rev else '', name)
else:
cf = lambda *_: ''
# The table "trans" stores information on committed transactions. # The table "trans" stores information on committed transactions.
schema_dict['trans'] = """CREATE TABLE %s ( schema_dict['trans'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL, `partition` SMALLINT UNSIGNED NOT NULL,
...@@ -305,8 +320,8 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -305,8 +320,8 @@ class MySQLDatabaseManager(DatabaseManager):
description BLOB NOT NULL, description BLOB NOT NULL,
ext BLOB NOT NULL, ext BLOB NOT NULL,
ttid BIGINT UNSIGNED NOT NULL, ttid BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`partition`, tid) PRIMARY KEY (`partition`, tid){}
) ENGINE=""" + p ) ENGINE={}""".format(cf('append_meta'), p)
# The table "obj" stores committed object metadata. # The table "obj" stores committed object metadata.
schema_dict['obj'] = """CREATE TABLE %s ( schema_dict['obj'] = """CREATE TABLE %s (
...@@ -315,10 +330,11 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -315,10 +330,11 @@ class MySQLDatabaseManager(DatabaseManager):
tid BIGINT UNSIGNED NOT NULL, tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL, data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL, value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (`partition`, oid, tid), PRIMARY KEY (`partition`, oid, tid){},
KEY tid (`partition`, tid, oid), KEY tid (`partition`, tid, oid){},
KEY (data_id) KEY (data_id){}
) ENGINE=""" + p ) ENGINE={}""".format(cf('obj_pk', True),
cf('append_meta'), cf('append_meta'), p)
if engine == "TokuDB": if engine == "TokuDB":
engine += " compression='tokudb_uncompressed'" engine += " compression='tokudb_uncompressed'"
...@@ -326,18 +342,21 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -326,18 +342,21 @@ class MySQLDatabaseManager(DatabaseManager):
# The table "data" stores object data. # The table "data" stores object data.
# We'd like to have partial index on 'hash' column (e.g. hash(4)) # We'd like to have partial index on 'hash' column (e.g. hash(4))
# but 'UNIQUE' constraint would not work as expected. # but 'UNIQUE' constraint would not work as expected.
schema_dict['data'] = """CREATE TABLE %%s ( schema_dict['data'] = """CREATE TABLE %s (
id BIGINT UNSIGNED NOT NULL PRIMARY KEY, id BIGINT UNSIGNED NOT NULL,
hash BINARY(20) NOT NULL, hash BINARY(20) NOT NULL,
compression TINYINT UNSIGNED NULL, compression TINYINT UNSIGNED NULL,
value MEDIUMBLOB NOT NULL%s value MEDIUMBLOB NOT NULL,
) ENGINE=%s""" % (""", PRIMARY KEY (id){}{}
UNIQUE (hash, compression)""" if dedup else "", engine) ) ENGINE={}""".format(cf('append'), """,
UNIQUE (hash, compression)""" + cf('no_comp') if dedup else "",
engine)
schema_dict['bigdata'] = """CREATE TABLE %s ( schema_dict['bigdata'] = """CREATE TABLE %s (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, id INT UNSIGNED NOT NULL AUTO_INCREMENT,
value MEDIUMBLOB NOT NULL value MEDIUMBLOB NOT NULL,
) ENGINE=""" + engine PRIMARY KEY (id){}
) ENGINE={}""".format(cf('append'), p)
# The table "ttrans" stores information on uncommitted transactions. # The table "ttrans" stores information on uncommitted transactions.
schema_dict['ttrans'] = """CREATE TABLE %s ( schema_dict['ttrans'] = """CREATE TABLE %s (
...@@ -348,8 +367,9 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -348,8 +367,9 @@ class MySQLDatabaseManager(DatabaseManager):
user BLOB NOT NULL, user BLOB NOT NULL,
description BLOB NOT NULL, description BLOB NOT NULL,
ext BLOB NOT NULL, ext BLOB NOT NULL,
ttid BIGINT UNSIGNED NOT NULL ttid BIGINT UNSIGNED NOT NULL,
) ENGINE=""" + engine PRIMARY KEY (ttid){}
) ENGINE={}""".format(cf('no_comp'), p)
# The table "tobj" stores uncommitted object metadata. # The table "tobj" stores uncommitted object metadata.
schema_dict['tobj'] = """CREATE TABLE %s ( schema_dict['tobj'] = """CREATE TABLE %s (
...@@ -358,8 +378,8 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -358,8 +378,8 @@ class MySQLDatabaseManager(DatabaseManager):
tid BIGINT UNSIGNED NOT NULL, tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL, data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL, value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (tid, oid) PRIMARY KEY (tid, oid){}
) ENGINE=""" + engine ) ENGINE={}""".format(cf('no_comp'), p)
if self.nonempty('config') is None: if self.nonempty('config') is None:
q(schema_dict.pop('config') % 'config') q(schema_dict.pop('config') % 'config')
...@@ -407,6 +427,9 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -407,6 +427,9 @@ class MySQLDatabaseManager(DatabaseManager):
q("ALTER TABLE config MODIFY value VARBINARY(%s) NULL" % len(value)) q("ALTER TABLE config MODIFY value VARBINARY(%s) NULL" % len(value))
q(sql) q(sql)
def _getMaxPartition(self):
return self.query("SELECT MAX(`partition`) FROM pt")[0][0]
def _getPartitionTable(self): def _getPartitionTable(self):
return self.query("SELECT * FROM pt") return self.query("SELECT * FROM pt")
...@@ -965,7 +988,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -965,7 +988,7 @@ class MySQLDatabaseManager(DatabaseManager):
cmd += self._cmdline() cmd += self._cmdline()
return subprocess.check_output(cmd) return subprocess.check_output(cmd)
def restore(self, sql): def _restore(self, sql):
import subprocess import subprocess
cmd = ['mysql'] cmd = ['mysql']
cmd += self._cmdline() cmd += self._cmdline()
......
...@@ -79,6 +79,7 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -79,6 +79,7 @@ class SQLiteDatabaseManager(DatabaseManager):
def _connect(self): def _connect(self):
logging.info('connecting to SQLite database %r', self.db) logging.info('connecting to SQLite database %r', self.db)
self.conn = sqlite3.connect(self.db, check_same_thread=False) self.conn = sqlite3.connect(self.db, check_same_thread=False)
self.conn.text_factory = str
self.lock(self.db) self.lock(self.db)
if self.UNSAFE: if self.UNSAFE:
q = self.query q = self.query
...@@ -144,6 +145,12 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -144,6 +145,12 @@ class SQLiteDatabaseManager(DatabaseManager):
" WHEN 2 THEN -2" # FEEDING " WHEN 2 THEN -2" # FEEDING
" ELSE 1-state END") " ELSE 1-state END")
# Let's wait for a more important change to clean up,
# so that users can still downgrade.
if 0:
def _migrate4(self, schema_dict, index_dict):
self._setConfiguration('partitions', None)
def _setup(self, dedup=False): def _setup(self, dedup=False):
# BBB: SQLite has transactional DDL but before Python 3.6, # BBB: SQLite has transactional DDL but before Python 3.6,
# the binding automatically commits between such statements. # the binding automatically commits between such statements.
...@@ -265,6 +272,9 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -265,6 +272,9 @@ class SQLiteDatabaseManager(DatabaseManager):
else: else:
q("REPLACE INTO config VALUES (?,?)", (key, str(value))) q("REPLACE INTO config VALUES (?,?)", (key, str(value)))
def _getMaxPartition(self):
return self.query("SELECT MAX(`partition`) FROM pt").next()[0]
def _getPartitionTable(self): def _getPartitionTable(self):
return self.query("SELECT * FROM pt") return self.query("SELECT * FROM pt")
...@@ -712,5 +722,5 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -712,5 +722,5 @@ class SQLiteDatabaseManager(DatabaseManager):
main[-1:-1] = data main[-1:-1] = data
return '\n'.join(main) + '\n' return '\n'.join(main) + '\n'
def restore(self, sql): def _restore(self, sql):
self.conn.executescript(sql) self.conn.executescript(sql)
...@@ -65,14 +65,14 @@ class BaseMasterHandler(BaseHandler): ...@@ -65,14 +65,14 @@ class BaseMasterHandler(BaseHandler):
# See comment in ClientOperationHandler.connectionClosed # See comment in ClientOperationHandler.connectionClosed
self.app.tm.abortFor(uuid, even_if_voted=True) self.app.tm.abortFor(uuid, even_if_voted=True)
def notifyPartitionChanges(self, conn, ptid, cell_list): def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
"""This is very similar to Send Partition Table, except that """This is very similar to Send Partition Table, except that
the information is only about changes from the previous.""" the information is only about changes from the previous."""
app = self.app app = self.app
if ptid != 1 + app.pt.getID(): if ptid != 1 + app.pt.getID():
raise ProtocolError('wrong partition table id') raise ProtocolError('wrong partition table id')
app.pt.update(ptid, cell_list, app.nm) app.pt.update(ptid, num_replicas, cell_list, app.nm)
app.dm.changePartitionTable(ptid, cell_list) app.dm.changePartitionTable(ptid, num_replicas, cell_list)
if app.operational: if app.operational:
app.replicator.notifyPartitionChanges(cell_list) app.replicator.notifyPartitionChanges(cell_list)
app.dm.commit() app.dm.commit()
......
...@@ -32,7 +32,7 @@ class IdentificationHandler(EventHandler): ...@@ -32,7 +32,7 @@ class IdentificationHandler(EventHandler):
return self.app.nm return self.app.nm
def requestIdentification(self, conn, node_type, uuid, address, name, def requestIdentification(self, conn, node_type, uuid, address, name,
devpath, id_timestamp): id_timestamp, devpath, new_nid):
self.checkClusterName(name) self.checkClusterName(name)
app = self.app app = self.app
# reject any incoming connections if not ready # reject any incoming connections if not ready
...@@ -65,6 +65,6 @@ class IdentificationHandler(EventHandler): ...@@ -65,6 +65,6 @@ class IdentificationHandler(EventHandler):
conn.setHandler(handler) conn.setHandler(handler)
node.setConnection(conn, force) node.setConnection(conn, force)
# accept the identification and trigger an event # accept the identification and trigger an event
conn.answer(Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and conn.answer(Packets.AcceptIdentification(
app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid)) NodeTypes.STORAGE, uuid and app.uuid, uuid))
handler.connectionCompleted(conn) handler.connectionCompleted(conn)
...@@ -20,10 +20,10 @@ from neo.lib.protocol import Packets, ProtocolError, ZERO_TID ...@@ -20,10 +20,10 @@ from neo.lib.protocol import Packets, ProtocolError, ZERO_TID
class InitializationHandler(BaseMasterHandler): class InitializationHandler(BaseMasterHandler):
def sendPartitionTable(self, conn, ptid, row_list): def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
app = self.app app = self.app
pt = app.pt pt = app.pt
pt.load(ptid, row_list, app.nm) pt.load(ptid, num_replicas, row_list, app.nm)
if not pt.filled(): if not pt.filled():
raise ProtocolError('Partial partition table received') raise ProtocolError('Partial partition table received')
# Install the partition table into the database for persistence. # Install the partition table into the database for persistence.
...@@ -44,7 +44,7 @@ class InitializationHandler(BaseMasterHandler): ...@@ -44,7 +44,7 @@ class InitializationHandler(BaseMasterHandler):
logging.debug('drop data for partitions %r', unassigned) logging.debug('drop data for partitions %r', unassigned)
dm.dropPartitions(unassigned) dm.dropPartitions(unassigned)
dm.changePartitionTable(ptid, cell_list, reset=True) dm.changePartitionTable(ptid, num_replicas, cell_list, reset=True)
dm.commit() dm.commit()
def truncate(self, conn, tid): def truncate(self, conn, tid):
...@@ -68,7 +68,8 @@ class InitializationHandler(BaseMasterHandler): ...@@ -68,7 +68,8 @@ class InitializationHandler(BaseMasterHandler):
def askPartitionTable(self, conn): def askPartitionTable(self, conn):
pt = self.app.pt pt = self.app.pt
conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList())) conn.answer(Packets.AnswerPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
def askLockedTransactions(self, conn): def askLockedTransactions(self, conn):
conn.answer(Packets.AnswerLockedTransactions( conn.answer(Packets.AnswerLockedTransactions(
......
...@@ -350,7 +350,7 @@ class Replicator(object): ...@@ -350,7 +350,7 @@ class Replicator(object):
try: try:
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE, conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
None if name else app.uuid, app.server, name or app.name, None if name else app.uuid, app.server, name or app.name,
(), app.id_timestamp)) app.id_timestamp, (), ()))
except ConnectionClosed: except ConnectionClosed:
if previous_node is self.current_node: if previous_node is self.current_node:
return return
......
...@@ -98,9 +98,12 @@ class TransactionManager(EventQueue): ...@@ -98,9 +98,12 @@ class TransactionManager(EventQueue):
self._load_lock_dict = {} self._load_lock_dict = {}
self._replicated = {} self._replicated = {}
self._replicating = set() self._replicating = set()
def getPartition(self, oid):
from neo.lib.util import u64 from neo.lib.util import u64
np = app.pt.getPartitions() np = self._app.pt.getPartitions()
self.getPartition = lambda oid: u64(oid) % np self.getPartition = lambda oid: u64(oid) % np
return self.getPartition(oid)
def discarded(self, offset_list): def discarded(self, offset_list):
self._replicating.difference_update(offset_list) self._replicating.difference_update(offset_list)
......
...@@ -21,6 +21,7 @@ import gc ...@@ -21,6 +21,7 @@ import gc
import os import os
import random import random
import socket import socket
import subprocess
import sys import sys
import tempfile import tempfile
import unittest import unittest
...@@ -41,7 +42,7 @@ from .mock import Mock ...@@ -41,7 +42,7 @@ from .mock import Mock
from neo.lib import debug, logging, protocol from neo.lib import debug, logging, protocol
from neo.lib.protocol import NodeTypes, Packets, UUID_NAMESPACES from neo.lib.protocol import NodeTypes, Packets, UUID_NAMESPACES
from neo.lib.util import cached_property from neo.lib.util import cached_property
from time import time from time import time, sleep
from struct import pack, unpack from struct import pack, unpack
from unittest.case import _ExpectedFailure, _UnexpectedSuccess from unittest.case import _ExpectedFailure, _UnexpectedSuccess
try: try:
...@@ -72,6 +73,9 @@ DB_ADMIN = os.getenv('NEO_DB_ADMIN', 'root') ...@@ -72,6 +73,9 @@ DB_ADMIN = os.getenv('NEO_DB_ADMIN', 'root')
DB_PASSWD = os.getenv('NEO_DB_PASSWD', '') DB_PASSWD = os.getenv('NEO_DB_PASSWD', '')
DB_USER = os.getenv('NEO_DB_USER', 'test') DB_USER = os.getenv('NEO_DB_USER', 'test')
DB_SOCKET = os.getenv('NEO_DB_SOCKET', '') DB_SOCKET = os.getenv('NEO_DB_SOCKET', '')
DB_INSTALL = os.getenv('NEO_DB_INSTALL', 'mysql_install_db')
DB_MYSQLD = os.getenv('NEO_DB_MYSQLD', '/usr/sbin/mysqld')
DB_MYCNF = os.getenv('NEO_DB_MYCNF')
IP_VERSION_FORMAT_DICT = { IP_VERSION_FORMAT_DICT = {
socket.AF_INET: '127.0.0.1', socket.AF_INET: '127.0.0.1',
...@@ -134,8 +138,12 @@ def getTempDirectory(): ...@@ -134,8 +138,12 @@ def getTempDirectory():
print 'Using temp directory %r.' % temp_dir print 'Using temp directory %r.' % temp_dir
return temp_dir return temp_dir
def setupMySQLdb(db_list, user=DB_USER, password='', clear_databases=True): def setupMySQLdb(db_list, clear_databases=True):
if mysql_pool:
return mysql_pool.setup(db_list, clear_databases)
from MySQLdb.constants.ER import BAD_DB_ERROR from MySQLdb.constants.ER import BAD_DB_ERROR
user = DB_USER
password = ''
kw = {'unix_socket': os.path.expanduser(DB_SOCKET)} if DB_SOCKET else {} kw = {'unix_socket': os.path.expanduser(DB_SOCKET)} if DB_SOCKET else {}
conn = MySQLdb.connect(user=DB_ADMIN, passwd=DB_PASSWD, **kw) conn = MySQLdb.connect(user=DB_ADMIN, passwd=DB_PASSWD, **kw)
cursor = conn.cursor() cursor = conn.cursor()
...@@ -154,6 +162,88 @@ def setupMySQLdb(db_list, user=DB_USER, password='', clear_databases=True): ...@@ -154,6 +162,88 @@ def setupMySQLdb(db_list, user=DB_USER, password='', clear_databases=True):
cursor.close() cursor.close()
conn.commit() conn.commit()
conn.close() conn.close()
return '{}:{}@%s{}'.format(user, password, DB_SOCKET).__mod__
class MySQLPool(object):
def __init__(self, pool_dir=None):
self._args = {}
self._mysqld_dict = {}
if not pool_dir:
pool_dir = getTempDirectory()
self._base = pool_dir + os.sep
self._sock_template = os.path.join(pool_dir, '%s', 'mysql.sock')
def __del__(self):
self.kill(*self._mysqld_dict)
def setup(self, db_list, clear_databases):
start_list = set(db_list).difference(self._mysqld_dict)
if start_list:
start_list = sorted(start_list)
x = []
with open(os.devnull, 'wb') as f:
for db in start_list:
base = self._base + db
datadir = os.path.join(base, 'datadir')
sock = self._sock_template % db
tmpdir = os.path.join(base, 'tmp')
args = [DB_INSTALL,
'--defaults-file=' + DB_MYCNF,
'--datadir=' + datadir,
'--socket=' + sock,
'--tmpdir=' + tmpdir,
'--log_error=' + os.path.join(base, 'error.log')]
if os.path.exists(datadir):
try:
os.remove(sock)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
os.makedirs(tmpdir)
x.append(subprocess.Popen(args,
stdout=f, stderr=subprocess.STDOUT))
args[0] = DB_MYSQLD
self._args[db] = args
for x in x:
x = x.wait()
if x:
raise subprocess.CalledProcessError(x, DB_INSTALL)
self.start(*start_list)
for db in start_list:
sock = self._sock_template % db
p = self._mysqld_dict[db]
while not os.path.exists(sock):
sleep(1)
x = p.poll()
if x is not None:
raise subprocess.CalledProcessError(x, DB_MYSQLD)
for db in db_list:
db = MySQLdb.connect(unix_socket=self._sock_template % db,
user='root')
if clear_databases:
db.query('DROP DATABASE IF EXISTS neo')
db.query('CREATE DATABASE IF NOT EXISTS neo')
db.close()
return ('root@neo' + self._sock_template).__mod__
def start(self, *db, **kw):
assert set(db).isdisjoint(self._mysqld_dict)
for db in db:
self._mysqld_dict[db] = subprocess.Popen(self._args[db], **kw)
def kill(self, *db):
processes = []
for db in db:
p = self._mysqld_dict.pop(db)
processes.append(p)
p.kill()
for p in processes:
p.wait()
mysql_pool = MySQLPool() if DB_MYCNF else None
def ImporterConfigParser(adapter, zodb, **kw): def ImporterConfigParser(adapter, zodb, **kw):
cfg = SafeConfigParser() cfg = SafeConfigParser()
...@@ -244,13 +334,15 @@ class NeoUnitTestBase(NeoTestBase): ...@@ -244,13 +334,15 @@ class NeoUnitTestBase(NeoTestBase):
""" create empty databases """ """ create empty databases """
adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL') adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL')
if adapter == 'MySQL': if adapter == 'MySQL':
setupMySQLdb([prefix + str(i) for i in xrange(number)]) db_template = setupMySQLdb(
[prefix + str(i) for i in xrange(number)])
self.db_template = lambda i: db_template(prefix + str(i))
elif adapter == 'SQLite': elif adapter == 'SQLite':
temp_dir = getTempDirectory() self.db_template = os.path.join(getTempDirectory(),
prefix + '%s.sqlite').__mod__
for i in xrange(number): for i in xrange(number):
try: try:
os.remove(os.path.join(temp_dir, os.remove(self.db_template(i))
'%s%s.sqlite' % (prefix, i)))
except OSError, e: except OSError, e:
if e.errno != errno.ENOENT: if e.errno != errno.ENOENT:
raise raise
...@@ -274,21 +366,14 @@ class NeoUnitTestBase(NeoTestBase): ...@@ -274,21 +366,14 @@ class NeoUnitTestBase(NeoTestBase):
def getStorageConfiguration(self, cluster='main', master_number=2, def getStorageConfiguration(self, cluster='main', master_number=2,
index=0, prefix=DB_PREFIX, uuid=None): index=0, prefix=DB_PREFIX, uuid=None):
assert master_number >= 1 and master_number <= 10 assert master_number >= 1 and master_number <= 10
assert index >= 0 and index <= 9
masters = [(buildUrlFromString(self.local_ip), masters = [(buildUrlFromString(self.local_ip),
10010 + i) for i in xrange(master_number)] 10010 + i) for i in xrange(master_number)]
adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL') adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL')
if adapter == 'MySQL':
db = '%s@%s%s%s' % (DB_USER, prefix, index, DB_SOCKET)
elif adapter == 'SQLite':
db = os.path.join(getTempDirectory(), 'test_neo%s.sqlite' % index)
else:
assert False, adapter
return { return {
'cluster': cluster, 'cluster': cluster,
'bind': (masters[0], 10020 + index), 'bind': (masters[0], 10020 + index),
'masters': masters, 'masters': masters,
'database': db, 'database': self.db_template(index),
'uuid': uuid, 'uuid': uuid,
'adapter': adapter, 'adapter': adapter,
'wait': 0, 'wait': 0,
......
...@@ -36,7 +36,7 @@ from neo.lib import logging ...@@ -36,7 +36,7 @@ from neo.lib import logging
from neo.lib.protocol import ClusterStates, NodeTypes, CellStates, NodeStates, \ from neo.lib.protocol import ClusterStates, NodeTypes, CellStates, NodeStates, \
UUID_NAMESPACES UUID_NAMESPACES
from neo.lib.util import dump, setproctitle from neo.lib.util import dump, setproctitle
from .. import (ADDRESS_TYPE, DB_SOCKET, DB_USER, IP_VERSION_FORMAT_DICT, SSL, from .. import (ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, SSL,
buildUrlFromString, cluster, getTempDirectory, setupMySQLdb, buildUrlFromString, cluster, getTempDirectory, setupMySQLdb,
ImporterConfigParser, NeoTestBase, Patch) ImporterConfigParser, NeoTestBase, Patch)
from neo.client.Storage import Storage from neo.client.Storage import Storage
...@@ -282,7 +282,7 @@ class NEOProcess(Process): ...@@ -282,7 +282,7 @@ class NEOProcess(Process):
def _args(self): def _args(self):
args = super(NEOProcess, self)._args() args = super(NEOProcess, self)._args()
if self.uuid: if self.uuid:
args[:0] = '--uuid', str(self.uuid) args[:0] = '--nid', str(self.uuid)
return args return args
def run(self): def run(self):
...@@ -306,11 +306,11 @@ class NEOCluster(object): ...@@ -306,11 +306,11 @@ class NEOCluster(object):
SSL = None SSL = None
def __init__(self, db_list, master_count=1, partitions=1, replicas=0, def __init__(self, db_list, master_count=1, partitions=1, replicas=0,
db_user=DB_USER, db_password='', name=None, name=None,
cleanup_on_delete=False, temp_dir=None, clear_databases=True, cleanup_on_delete=False, temp_dir=None, clear_databases=True,
adapter=os.getenv('NEO_TESTS_ADAPTER'), adapter=os.getenv('NEO_TESTS_ADAPTER'),
address_type=ADDRESS_TYPE, bind_ip=None, logger=True, address_type=ADDRESS_TYPE, bind_ip=None, logger=True,
importer=None, upstream_masters=None, upstream_cluster=None): importer=None, upstream_masters=None, upstream_cluster=None, storage_kw={}):
if not adapter: if not adapter:
adapter = 'MySQL' adapter = 'MySQL'
self.adapter = adapter self.adapter = adapter
...@@ -322,20 +322,28 @@ class NEOCluster(object): ...@@ -322,20 +322,28 @@ class NEOCluster(object):
temp_dir = tempfile.mkdtemp(prefix='neo_') temp_dir = tempfile.mkdtemp(prefix='neo_')
print 'Using temp directory ' + temp_dir print 'Using temp directory ' + temp_dir
if adapter == 'MySQL': if adapter == 'MySQL':
self.db_user = db_user self.db_template = setupMySQLdb(db_list, clear_databases)
self.db_password = db_password
self.db_template = ('%s:%s@%%s%s' % (db_user, db_password,
DB_SOCKET)).__mod__
elif adapter == 'SQLite': elif adapter == 'SQLite':
self.db_template = (lambda t: lambda db: self.db_template = (lambda t: lambda db:
':memory:' if db is None else db if os.sep in db else t % db ':memory:' if db is None else db if os.sep in db else t % db
)(os.path.join(temp_dir, '%s.sqlite')) )(os.path.join(temp_dir, '%s.sqlite'))
if clear_databases:
for db in self.db_list:
if db is None:
continue
db = self.db_template(db)
try:
os.remove(db)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
logging.debug('%r deleted', db)
else: else:
assert False, adapter assert False, adapter
self.address_type = address_type self.address_type = address_type
self.local_ip = local_ip = bind_ip or \ self.local_ip = local_ip = bind_ip or \
IP_VERSION_FORMAT_DICT[self.address_type] IP_VERSION_FORMAT_DICT[self.address_type]
self.setupDB(clear_databases)
if importer: if importer:
cfg = ImporterConfigParser(adapter, **importer) cfg = ImporterConfigParser(adapter, **importer)
cfg.set("neo", "database", self.db_template(*db_list)) cfg.set("neo", "database", self.db_template(*db_list))
...@@ -372,7 +380,8 @@ class NEOCluster(object): ...@@ -372,7 +380,8 @@ class NEOCluster(object):
# create storage nodes # create storage nodes
for i, db in enumerate(db_list): for i, db in enumerate(db_list):
self._newProcess(NodeTypes.STORAGE, logger and 'storage_%u' % i, self._newProcess(NodeTypes.STORAGE, logger and 'storage_%u' % i,
0, adapter=adapter, database=self.db_template(db)) 0, adapter=adapter, database=self.db_template(db),
**storage_kw)
# create neoctl # create neoctl
self.neoctl = NeoCTL((self.local_ip, admin_port), ssl=self.SSL) self.neoctl = NeoCTL((self.local_ip, admin_port), ssl=self.SSL)
...@@ -390,23 +399,10 @@ class NEOCluster(object): ...@@ -390,23 +399,10 @@ class NEOCluster(object):
self.process_dict.setdefault(node_type, []).append( self.process_dict.setdefault(node_type, []).append(
NEOProcess(command_dict[node_type], uuid=uuid, **kw)) NEOProcess(command_dict[node_type], uuid=uuid, **kw))
def setupDB(self, clear_databases=True): def resetDB(self):
if self.adapter == 'MySQL':
setupMySQLdb(self.db_list, self.db_user, self.db_password,
clear_databases)
elif self.adapter == 'SQLite':
if clear_databases:
for db in self.db_list: for db in self.db_list:
if db is None: dm = buildDatabaseManager(self.adapter, (self.db_template(db),))
continue dm.setup(True)
db = self.db_template(db)
try:
os.remove(db)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
logging.debug('%r deleted', db)
def run(self, except_storages=()): def run(self, except_storages=()):
""" Start cluster processes except some storage nodes """ """ Start cluster processes except some storage nodes """
...@@ -445,7 +441,7 @@ class NEOCluster(object): ...@@ -445,7 +441,7 @@ class NEOCluster(object):
pending_count += 1 pending_count += 1
if pending_count == target[0]: if pending_count == target[0]:
neoctl.startCluster() neoctl.startCluster()
except (NotReadyException, RuntimeError): except (NotReadyException, SystemExit):
pass pass
if not pdb.wait(test, MAX_START_TIME): if not pdb.wait(test, MAX_START_TIME):
raise AssertionError('Timeout when starting cluster') raise AssertionError('Timeout when starting cluster')
...@@ -457,7 +453,7 @@ class NEOCluster(object): ...@@ -457,7 +453,7 @@ class NEOCluster(object):
def start(last_try): def start(last_try):
try: try:
self.neoctl.startCluster() self.neoctl.startCluster()
except (NotReadyException, RuntimeError), e: except (NotReadyException, SystemExit), e:
return False, e return False, e
return True, None return True, None
self.expectCondition(start) self.expectCondition(start)
...@@ -661,10 +657,10 @@ class NEOCluster(object): ...@@ -661,10 +657,10 @@ class NEOCluster(object):
def expectOudatedCells(self, number, *args, **kw): def expectOudatedCells(self, number, *args, **kw):
def callback(last_try): def callback(last_try):
row_list = self.neoctl.getPartitionRowList()[1] row_list = self.neoctl.getPartitionRowList()[2]
number_of_outdated = 0 number_of_outdated = 0
for row in row_list: for row in row_list:
for cell in row[1]: for cell in row:
if cell[1] == CellStates.OUT_OF_DATE: if cell[1] == CellStates.OUT_OF_DATE:
number_of_outdated += 1 number_of_outdated += 1
return number_of_outdated == number, number_of_outdated return number_of_outdated == number, number_of_outdated
...@@ -672,10 +668,10 @@ class NEOCluster(object): ...@@ -672,10 +668,10 @@ class NEOCluster(object):
def expectAssignedCells(self, process, number, *args, **kw): def expectAssignedCells(self, process, number, *args, **kw):
def callback(last_try): def callback(last_try):
row_list = self.neoctl.getPartitionRowList()[1] row_list = self.neoctl.getPartitionRowList()[2]
assigned_cells_number = 0 assigned_cells_number = 0
for row in row_list: for row in row_list:
for cell in row[1]: for cell in row:
if cell[0] == process.getUUID(): if cell[0] == process.getUUID():
assigned_cells_number += 1 assigned_cells_number += 1
return assigned_cells_number == number, assigned_cells_number return assigned_cells_number == number, assigned_cells_number
......
...@@ -62,8 +62,6 @@ class ClientTests(NEOFunctionalTest): ...@@ -62,8 +62,6 @@ class ClientTests(NEOFunctionalTest):
NEOFunctionalTest._tearDown(self, success) NEOFunctionalTest._tearDown(self, success)
def __setup(self): def __setup(self):
# start cluster
self.neo.setupDB()
self.neo.start() self.neo.start()
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
self.db = ZODB.DB(self.neo.getZODBStorage()) self.db = ZODB.DB(self.neo.getZODBStorage())
......
...@@ -71,7 +71,6 @@ class ClusterTests(NEOFunctionalTest): ...@@ -71,7 +71,6 @@ class ClusterTests(NEOFunctionalTest):
def testClusterBreaks(self): def testClusterBreaks(self):
self.neo = NEOCluster(['test_neo1'], self.neo = NEOCluster(['test_neo1'],
master_count=1, temp_dir=self.getTempDirectory()) master_count=1, temp_dir=self.getTempDirectory())
self.neo.setupDB()
self.neo.start() self.neo.start()
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0) self.neo.expectOudatedCells(number=0)
...@@ -82,7 +81,6 @@ class ClusterTests(NEOFunctionalTest): ...@@ -82,7 +81,6 @@ class ClusterTests(NEOFunctionalTest):
self.neo = NEOCluster(['test_neo1', 'test_neo2'], self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=2, master_count=1, replicas=0, partitions=2, master_count=1, replicas=0,
temp_dir=self.getTempDirectory()) temp_dir=self.getTempDirectory())
self.neo.setupDB()
self.neo.start() self.neo.start()
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0) self.neo.expectOudatedCells(number=0)
...@@ -93,7 +91,6 @@ class ClusterTests(NEOFunctionalTest): ...@@ -93,7 +91,6 @@ class ClusterTests(NEOFunctionalTest):
self.neo = NEOCluster(['test_neo1', 'test_neo2'], self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=2, replicas=1, master_count=1, partitions=2, replicas=1, master_count=1,
temp_dir=self.getTempDirectory()) temp_dir=self.getTempDirectory())
self.neo.setupDB()
self.neo.start() self.neo.start()
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0) self.neo.expectOudatedCells(number=0)
......
...@@ -47,7 +47,7 @@ class MasterTests(NEOFunctionalTest): ...@@ -47,7 +47,7 @@ class MasterTests(NEOFunctionalTest):
break break
neoctl.killNode(uuid) neoctl.killNode(uuid)
self.neo.expectDead(master) self.neo.expectDead(master)
self.assertRaises(RuntimeError, neoctl.killNode, primary_uuid) self.assertRaises(SystemExit, neoctl.killNode, primary_uuid)
def testStoppingPrimaryWithTwoSecondaries(self): def testStoppingPrimaryWithTwoSecondaries(self):
# Wait for masters to stabilize # Wait for masters to stabilize
......
...@@ -172,7 +172,7 @@ class StorageTests(NEOFunctionalTest): ...@@ -172,7 +172,7 @@ class StorageTests(NEOFunctionalTest):
self.neo.expectOudatedCells(2) self.neo.expectOudatedCells(2)
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
self.assertRaises(RuntimeError, self.neo.neoctl.killNode, self.assertRaises(SystemExit, self.neo.neoctl.killNode,
started[1].getUUID()) started[1].getUUID())
started[1].stop() started[1].stop()
# Cluster not operational anymore. Only cells of second storage that # Cluster not operational anymore. Only cells of second storage that
...@@ -323,7 +323,7 @@ class StorageTests(NEOFunctionalTest): ...@@ -323,7 +323,7 @@ class StorageTests(NEOFunctionalTest):
self.neo.expectStorageUnknown(started[0]) self.neo.expectStorageUnknown(started[0])
self.neo.expectAssignedCells(started[0], 0) self.neo.expectAssignedCells(started[0], 0)
self.neo.expectAssignedCells(started[1], 10) self.neo.expectAssignedCells(started[1], 10)
self.assertRaises(RuntimeError, self.neo.neoctl.dropNode, self.assertRaises(SystemExit, self.neo.neoctl.dropNode,
started[1].getUUID()) started[1].getUUID())
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
......
...@@ -30,8 +30,6 @@ class MasterClientHandlerTests(NeoUnitTestBase): ...@@ -30,8 +30,6 @@ class MasterClientHandlerTests(NeoUnitTestBase):
config = self.getMasterConfiguration(master_number=1, replicas=1) config = self.getMasterConfiguration(master_number=1, replicas=1)
self.app = Application(config) self.app = Application(config)
self.app.em.close() self.app.em.close()
self.app.pt.clear()
self.app.pt.setID(1)
self.app.em = Mock() self.app.em = Mock()
self.app.loid = '\0' * 8 self.app.loid = '\0' * 8
self.app.tm.setLastTID('\0' * 8) self.app.tm.setLastTID('\0' * 8)
......
...@@ -26,7 +26,6 @@ class MasterAppTests(NeoUnitTestBase): ...@@ -26,7 +26,6 @@ class MasterAppTests(NeoUnitTestBase):
# create an application object # create an application object
config = self.getMasterConfiguration() config = self.getMasterConfiguration()
self.app = Application(config) self.app = Application(config)
self.app.pt.clear()
def _tearDown(self, success): def _tearDown(self, success):
self.app.close() self.app.close()
......
...@@ -289,7 +289,9 @@ class MasterPartitionTableTests(NeoUnitTestBase): ...@@ -289,7 +289,9 @@ class MasterPartitionTableTests(NeoUnitTestBase):
pt.addNodeList(sn[1:3]) pt.addNodeList(sn[1:3])
self.assertPartitionTable(pt, 'U..|U..|U..|U..|U..|U..|U..') self.assertPartitionTable(pt, 'U..|U..|U..|U..|U..|U..|U..')
self.update(pt, self.tweak(pt, sn[:1])) self.update(pt, self.tweak(pt, sn[:1]))
self.assertPartitionTable(pt, '.U.|..U|.U.|..U|.U.|..U|.U.') # See note in PartitionTable.tweak() about drop_list.
#self.assertPartitionTable(pt,'.U.|..U|.U.|..U|.U.|..U|.U.')
self.assertPartitionTable(pt, 'UU.|U.U|UU.|U.U|UU.|U.U|UU.')
def test_18_tweakBigPT(self): def test_18_tweakBigPT(self):
seed = repr(time.time()) seed = repr(time.time())
......
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, NodeStates, CellStates
from neo.master.recovery import RecoveryManager
from neo.master.app import Application
class MasterRecoveryTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
# create an application object
config = self.getMasterConfiguration()
self.app = Application(config)
self.app.pt.clear()
self.recovery = RecoveryManager(self.app)
self.app.unconnected_master_node_set = set()
self.app.negotiating_master_node_set = set()
for node in self.app.nm.getMasterList():
self.app.unconnected_master_node_set.add(node.getAddress())
node.setState(NodeStates.RUNNING)
# define some variable to simulate client and storage node
self.storage_port = 10021
self.master_port = 10011
def _tearDown(self, success):
self.app.close()
NeoUnitTestBase._tearDown(self, success)
# Common methods
def identifyToMasterNode(self, node_type=NodeTypes.STORAGE, ip="127.0.0.1",
port=10021):
"""Do first step of identification to MN
"""
address = (ip, port)
uuid = self.getNewUUID(node_type)
self.app.nm.createFromNodeType(node_type, address=address, uuid=uuid,
state=NodeStates.RUNNING)
return uuid
# Tests
def test_10_answerPartitionTable(self):
# XXX: This test does much less that it seems, because all 'for' loops
# iterate over empty lists. Currently, only testRecovery covers
# some paths in NodeManager._createNode: apart from that, we could
# delete it entirely.
recovery = self.recovery
uuid = self.identifyToMasterNode(NodeTypes.MASTER, port=self.master_port)
# not from target node, ignore
uuid = self.identifyToMasterNode(NodeTypes.STORAGE, port=self.storage_port)
conn = self.getFakeConnection(uuid, self.storage_port)
node = self.app.nm.getByUUID(conn.getUUID())
offset = 1
cell_list = [(offset, uuid, CellStates.UP_TO_DATE)]
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.OUT_OF_DATE)
recovery.target_ptid = 2
node.setPending()
recovery.answerPartitionTable(conn, 1, cell_list)
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.OUT_OF_DATE)
# from target node, taken into account
conn = self.getFakeConnection(uuid, self.storage_port)
offset = 1
cell_list = [(offset, ((uuid, CellStates.UP_TO_DATE,),),)]
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.OUT_OF_DATE)
node.setPending()
recovery.answerPartitionTable(conn, None, cell_list)
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.UP_TO_DATE)
# give a bad offset, must send error
self.recovery.target_uuid = uuid
conn = self.getFakeConnection(uuid, self.storage_port)
offset = 1000000
self.assertFalse(self.app.pt.hasOffset(offset))
cell_list = [(offset, ((uuid, NodeStates.UNKNOWN,),),)]
node.setPending()
self.checkProtocolErrorRaised(recovery.answerPartitionTable, conn,
2, cell_list)
if __name__ == '__main__':
unittest.main()
...@@ -18,8 +18,8 @@ import unittest ...@@ -18,8 +18,8 @@ import unittest
from ..mock import Mock from ..mock import Mock
from .. import NeoUnitTestBase from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, Packets from neo.lib.protocol import NodeTypes, Packets
from neo.master.handlers.storage import StorageServiceHandler
from neo.master.app import Application from neo.master.app import Application
from neo.master.handlers.storage import StorageServiceHandler
class MasterStorageHandlerTests(NeoUnitTestBase): class MasterStorageHandlerTests(NeoUnitTestBase):
...@@ -29,7 +29,6 @@ class MasterStorageHandlerTests(NeoUnitTestBase): ...@@ -29,7 +29,6 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
config = self.getMasterConfiguration(master_number=1, replicas=1) config = self.getMasterConfiguration(master_number=1, replicas=1)
self.app = Application(config) self.app = Application(config)
self.app.em.close() self.app.em.close()
self.app.pt.clear()
self.app.em = Mock() self.app.em = Mock()
self.service = StorageServiceHandler(self.app) self.service = StorageServiceHandler(self.app)
......
...@@ -56,7 +56,7 @@ class StorageMasterHandlerTests(NeoUnitTestBase): ...@@ -56,7 +56,7 @@ class StorageMasterHandlerTests(NeoUnitTestBase):
self.app.pt = Mock({'getID': 1}) self.app.pt = Mock({'getID': 1})
count = len(self.app.nm.getList()) count = len(self.app.nm.getList())
self.assertRaises(ProtocolError, self.operation.notifyPartitionChanges, self.assertRaises(ProtocolError, self.operation.notifyPartitionChanges,
conn, 0, ()) conn, 0, 0, ())
self.assertEqual(self.app.pt.getID(), 1) self.assertEqual(self.app.pt.getID(), 1)
self.assertEqual(len(self.app.nm.getList()), count) self.assertEqual(len(self.app.nm.getList()), count)
calls = self.app.replicator.mockGetNamedCalls('removePartition') calls = self.app.replicator.mockGetNamedCalls('removePartition')
...@@ -84,13 +84,13 @@ class StorageMasterHandlerTests(NeoUnitTestBase): ...@@ -84,13 +84,13 @@ class StorageMasterHandlerTests(NeoUnitTestBase):
ptid = 2 ptid = 2
app.dm = Mock({ }) app.dm = Mock({ })
app.replicator = Mock({}) app.replicator = Mock({})
self.operation.notifyPartitionChanges(conn, ptid, cells) self.operation.notifyPartitionChanges(conn, ptid, 1, cells)
# ptid set # ptid set
self.assertEqual(app.pt.getID(), ptid) self.assertEqual(app.pt.getID(), ptid)
# dm call # dm call
calls = self.app.dm.mockGetNamedCalls('changePartitionTable') calls = self.app.dm.mockGetNamedCalls('changePartitionTable')
self.assertEqual(len(calls), 1) self.assertEqual(len(calls), 1)
calls[0].checkArgs(ptid, cells) calls[0].checkArgs(ptid, 1, cells)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.storage.app import Application
from neo.lib.protocol import CellStates
from neo.lib.pt import PartitionTable
class StorageAppTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
self.prepareDatabase(number=1)
# create an application object
config = self.getStorageConfiguration(master_number=1)
self.app = Application(config)
def _tearDown(self, success):
self.app.close()
del self.app
super(StorageAppTests, self)._tearDown(success)
def test_01_loadPartitionTable(self):
self.app.dm = Mock({
'getPartitionTable': [],
})
self.assertEqual(self.app.pt, None)
num_partitions = 3
num_replicas = 2
self.app.pt = PartitionTable(num_partitions, num_replicas)
self.assertFalse(self.app.pt.getNodeSet())
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertFalse(self.app.pt.hasOffset(x))
# load an empty table
self.app.loadPartitionTable()
self.assertFalse(self.app.pt.getNodeSet())
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertFalse(self.app.pt.hasOffset(x))
# add some node, will be remove when loading table
master_uuid = self.getMasterUUID()
master = self.app.nm.createMaster(uuid=master_uuid)
storage_uuid = self.getStorageUUID()
storage = self.app.nm.createStorage(uuid=storage_uuid)
client_uuid = self.getClientUUID()
self.app.pt._setCell(0, master, CellStates.UP_TO_DATE)
self.app.pt._setCell(0, storage, CellStates.UP_TO_DATE)
self.assertEqual(len(self.app.pt.getNodeSet()), 2)
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
if x == 0:
self.assertTrue(self.app.pt.hasOffset(x))
else:
self.assertFalse(self.app.pt.hasOffset(x))
# load an empty table, everything removed
self.app.loadPartitionTable()
self.assertFalse(self.app.pt.getNodeSet())
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertFalse(self.app.pt.hasOffset(x))
# add some node
self.app.pt._setCell(0, master, CellStates.UP_TO_DATE)
self.app.pt._setCell(0, storage, CellStates.UP_TO_DATE)
self.assertEqual(len(self.app.pt.getNodeSet()), 2)
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
if x == 0:
self.assertTrue(self.app.pt.hasOffset(x))
else:
self.assertFalse(self.app.pt.hasOffset(x))
# fill partition table
self.app.dm = Mock({
'getPartitionTable': [
(0, client_uuid, CellStates.UP_TO_DATE),
(1, client_uuid, CellStates.UP_TO_DATE),
(1, storage_uuid, CellStates.UP_TO_DATE),
(2, storage_uuid, CellStates.UP_TO_DATE),
(2, master_uuid, CellStates.UP_TO_DATE),
],
'getPTID': 1,
})
self.app.pt.clear()
self.app.loadPartitionTable()
self.assertTrue(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertTrue(self.app.pt.hasOffset(x))
# check each row
cell_list = self.app.pt.getCellList(0)
self.assertEqual(len(cell_list), 1)
self.assertEqual(cell_list[0].getUUID(), client_uuid)
cell_list = self.app.pt.getCellList(1)
self.assertEqual(len(cell_list), 2)
self.assertTrue(cell_list[0].getUUID() in (client_uuid, storage_uuid))
self.assertTrue(cell_list[1].getUUID() in (client_uuid, storage_uuid))
cell_list = self.app.pt.getCellList(2)
self.assertEqual(len(cell_list), 2)
self.assertTrue(cell_list[0].getUUID() in (master_uuid, storage_uuid))
self.assertTrue(cell_list[1].getUUID() in (master_uuid, storage_uuid))
if __name__ == '__main__':
unittest.main()
...@@ -48,30 +48,15 @@ class StorageDBTests(NeoUnitTestBase): ...@@ -48,30 +48,15 @@ class StorageDBTests(NeoUnitTestBase):
raise NotImplementedError raise NotImplementedError
def setNumPartitions(self, num_partitions, reset=0): def setNumPartitions(self, num_partitions, reset=0):
try: assert not hasattr(self, '_db')
db = self._db
except AttributeError:
self._db = db = self.getDB(reset) self._db = db = self.getDB(reset)
else:
if reset:
db.setup(reset)
else:
try:
n = db.getNumPartitions()
except KeyError:
n = 0
if num_partitions == n:
return
if num_partitions < n:
db.dropPartitions(n)
db.setNumPartitions(num_partitions)
self.assertEqual(num_partitions, db.getNumPartitions())
uuid = self.getStorageUUID() uuid = self.getStorageUUID()
db.setUUID(uuid) db.setUUID(uuid)
self.assertEqual(uuid, db.getUUID()) self.assertEqual(uuid, db.getUUID())
db.changePartitionTable(1, db.changePartitionTable(1, 0,
[(i, uuid, CellStates.UP_TO_DATE) for i in xrange(num_partitions)], [(i, uuid, CellStates.UP_TO_DATE) for i in xrange(num_partitions)],
reset=True) reset=True)
self.assertEqual(num_partitions, 1 + db._getMaxPartition())
db.commit() db.commit()
def checkConfigEntry(self, get_call, set_call, value): def checkConfigEntry(self, get_call, set_call, value):
...@@ -102,16 +87,6 @@ class StorageDBTests(NeoUnitTestBase): ...@@ -102,16 +87,6 @@ class StorageDBTests(NeoUnitTestBase):
db = self.getDB() db = self.getDB()
self.checkConfigEntry(db.getName, db.setName, 'TEST_NAME') self.checkConfigEntry(db.getName, db.setName, 'TEST_NAME')
def test_getPartitionTable(self):
db = self.getDB()
db.setNumPartitions(3)
uuid1, uuid2 = self.getStorageUUID(), self.getStorageUUID()
cell1 = (0, uuid1, CellStates.OUT_OF_DATE)
cell2 = (1, uuid1, CellStates.UP_TO_DATE)
db.changePartitionTable(1, [cell1, cell2], 1)
result = db.getPartitionTable()
self.assertEqual(set(result), {cell1, cell2})
def getOIDs(self, count): def getOIDs(self, count):
return map(p64, xrange(count)) return map(p64, xrange(count))
...@@ -202,52 +177,6 @@ class StorageDBTests(NeoUnitTestBase): ...@@ -202,52 +177,6 @@ class StorageDBTests(NeoUnitTestBase):
self.assertEqual(self.db.getObject(oid1, before_tid=tid2), self.assertEqual(self.db.getObject(oid1, before_tid=tid2),
OBJECT_T1_NEXT) OBJECT_T1_NEXT)
def test_setPartitionTable(self):
db = self.getDB()
db.setNumPartitions(3)
ptid = 1
uuid = self.getStorageUUID()
cell1 = 0, uuid, CellStates.OUT_OF_DATE
cell2 = 1, uuid, CellStates.UP_TO_DATE
cell3 = 1, uuid, CellStates.DISCARDED
# no partition table
self.assertEqual(list(db.getPartitionTable()), [])
# set one
db.changePartitionTable(ptid, [cell1], 1)
result = db.getPartitionTable()
self.assertEqual(list(result), [cell1])
# then another
db.changePartitionTable(ptid, [cell2], 1)
result = db.getPartitionTable()
self.assertEqual(list(result), [cell2])
# drop discarded cells
db.changePartitionTable(ptid, [cell2, cell3], 1)
result = db.getPartitionTable()
self.assertEqual(list(result), [])
def test_changePartitionTable(self):
db = self.getDB()
db.setNumPartitions(3)
ptid = 1
uuid = self.getStorageUUID()
cell1 = 0, uuid, CellStates.OUT_OF_DATE
cell2 = 1, uuid, CellStates.UP_TO_DATE
cell3 = 1, uuid, CellStates.DISCARDED
# no partition table
self.assertEqual(list(db.getPartitionTable()), [])
# set one
db.changePartitionTable(ptid, [cell1])
result = db.getPartitionTable()
self.assertEqual(list(result), [cell1])
# add more entries
db.changePartitionTable(ptid, [cell2])
result = db.getPartitionTable()
self.assertEqual(set(result), {cell1, cell2})
# drop discarded cells
db.changePartitionTable(ptid, [cell2, cell3])
result = db.getPartitionTable()
self.assertEqual(list(result), [cell1])
def test_commitTransaction(self): def test_commitTransaction(self):
oid1, oid2 = self.getOIDs(2) oid1, oid2 = self.getOIDs(2)
tid1, tid2 = self.getTIDs(2) tid1, tid2 = self.getTIDs(2)
......
...@@ -22,7 +22,7 @@ from MySQLdb.constants.ER import UNKNOWN_STORAGE_ENGINE ...@@ -22,7 +22,7 @@ from MySQLdb.constants.ER import UNKNOWN_STORAGE_ENGINE
from ..mock import Mock from ..mock import Mock
from neo.lib.protocol import ZERO_OID from neo.lib.protocol import ZERO_OID
from neo.lib.util import p64 from neo.lib.util import p64
from .. import DB_PREFIX, DB_SOCKET, DB_USER, Patch from .. import DB_PREFIX, DB_USER, Patch, setupMySQLdb
from .testStorageDBTests import StorageDBTests from .testStorageDBTests import StorageDBTests
from neo.storage.database import DatabaseFailure from neo.storage.database import DatabaseFailure
from neo.storage.database.mysqldb import MySQLDatabaseManager from neo.storage.database.mysqldb import MySQLDatabaseManager
...@@ -46,8 +46,8 @@ class StorageMySQLdbTests(StorageDBTests): ...@@ -46,8 +46,8 @@ class StorageMySQLdbTests(StorageDBTests):
engine = None engine = None
def _test_lockDatabase_open(self): def _test_lockDatabase_open(self):
self.prepareDatabase(number=1, prefix=DB_PREFIX) self.prepareDatabase(1)
database = '%s@%s0%s' % (DB_USER, DB_PREFIX, DB_SOCKET) database = self.db_template(0)
return MySQLDatabaseManager(database, self.engine) return MySQLDatabaseManager(database, self.engine)
def getDB(self, reset=0): def getDB(self, reset=0):
......
...@@ -19,12 +19,9 @@ class Handler(MasterEventHandler): ...@@ -19,12 +19,9 @@ class Handler(MasterEventHandler):
super(Handler, self).answerClusterState(conn, state) super(Handler, self).answerClusterState(conn, state)
self.app.refresh('state') self.app.refresh('state')
def answerPartitionTable(self, *args):
super(Handler, self).answerPartitionTable(*args)
self.app.refresh('pt')
def sendPartitionTable(self, *args): def sendPartitionTable(self, *args):
raise AssertionError super(Handler, self).sendPartitionTable(*args)
self.app.refresh('pt')
def notifyPartitionChanges(self, *args): def notifyPartitionChanges(self, *args):
super(Handler, self).notifyPartitionChanges(*args) super(Handler, self).notifyPartitionChanges(*args)
...@@ -50,6 +47,7 @@ class StressApplication(AdminApplication): ...@@ -50,6 +47,7 @@ class StressApplication(AdminApplication):
cluster_state = server = uuid = None cluster_state = server = uuid = None
listening_conn = True listening_conn = True
fault_probability = 1
restart_ratio = float('inf') # no firewall support restart_ratio = float('inf') # no firewall support
_stress = False _stress = False
...@@ -191,7 +189,7 @@ class StressApplication(AdminApplication): ...@@ -191,7 +189,7 @@ class StressApplication(AdminApplication):
self.loid = loid self.loid = loid
self.ltid = ltid self.ltid = ltid
self.em.setTimeout(int(time.time() + 1), self.askLastIDs) self.em.setTimeout(int(time.time() + 1), self.askLastIDs)
if self._stress: if self._stress and random.random() < self.fault_probability:
node_list = self.nm.getStorageList() node_list = self.nm.getStorageList()
random.shuffle(node_list) random.shuffle(node_list)
fw = [] fw = []
......
...@@ -40,7 +40,7 @@ from neo.lib.util import cached_property, parseMasterList, p64 ...@@ -40,7 +40,7 @@ from neo.lib.util import cached_property, parseMasterList, p64
from neo.master.recovery import RecoveryManager from neo.master.recovery import RecoveryManager
from .. import (getTempDirectory, setupMySQLdb, from .. import (getTempDirectory, setupMySQLdb,
ImporterConfigParser, NeoTestBase, Patch, ImporterConfigParser, NeoTestBase, Patch,
ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, DB_PREFIX, DB_SOCKET, DB_USER) ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, DB_PREFIX)
BIND = IP_VERSION_FORMAT_DICT[ADDRESS_TYPE], 0 BIND = IP_VERSION_FORMAT_DICT[ADDRESS_TYPE], 0
LOCAL_IP = socket.inet_pton(ADDRESS_TYPE, IP_VERSION_FORMAT_DICT[ADDRESS_TYPE]) LOCAL_IP = socket.inet_pton(ADDRESS_TYPE, IP_VERSION_FORMAT_DICT[ADDRESS_TYPE])
...@@ -321,7 +321,13 @@ class TestSerialized(Serialized): # NOTE used only in .NeoCTL ...@@ -321,7 +321,13 @@ class TestSerialized(Serialized): # NOTE used only in .NeoCTL
class Node(object): class Node(object):
def getConnectionList(self, *peers): def getConnectionList(self, *peers):
addr = lambda c: c and (c.addr if c.is_server else c.getAddress()) def addr(c):
# Do not identify only by source address because 2 TCP connections
# can have same source host:port to different destinations.
if c:
a = c.addr
b = c.getAddress()
return (b, a) if c.is_server else (ServerNode.resolv(a), b)
addr_set = {addr(c.connector) for peer in peers addr_set = {addr(c.connector) for peer in peers
for c in peer.em.connection_dict.itervalues() for c in peer.em.connection_dict.itervalues()
if isinstance(c, Connection)} if isinstance(c, Connection)}
...@@ -395,7 +401,10 @@ class ServerNode(Node): ...@@ -395,7 +401,10 @@ class ServerNode(Node):
assert not self.is_alive() assert not self.is_alive()
init_args = self._init_args init_args = self._init_args
init_args['reset'] = False init_args['reset'] = False
assert set(kw).issubset(init_args), (kw, init_args) if __debug__:
x = set(kw).difference(init_args)
assert not x or x.issubset(self.option_parser.getOptionDict()), (
kw, init_args)
init_args.update(kw) init_args.update(kw)
self.close() self.close()
self.__init__(**init_args) self.__init__(**init_args)
...@@ -730,7 +739,7 @@ class NEOCluster(object): ...@@ -730,7 +739,7 @@ class NEOCluster(object):
def __init__(self, master_count=1, partitions=1, replicas=0, upstream=None, def __init__(self, master_count=1, partitions=1, replicas=0, upstream=None,
adapter=os.getenv('NEO_TESTS_ADAPTER', 'SQLite'), adapter=os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
storage_count=None, db_list=None, clear_databases=True, storage_count=None, db_list=None, clear_databases=True,
db_user=DB_USER, db_password='', compress=True, compress=True,
importer=None, autostart=None, dedup=False, name=None): importer=None, autostart=None, dedup=False, name=None):
self.name = name or 'neo_%s' % self._allocate('name', self.name = name or 'neo_%s' % self._allocate('name',
lambda: random.randint(0, 100)) lambda: random.randint(0, 100))
...@@ -757,21 +766,20 @@ class NEOCluster(object): ...@@ -757,21 +766,20 @@ class NEOCluster(object):
db_list = ['%s%u' % (DB_PREFIX, self._allocate('db', index)) db_list = ['%s%u' % (DB_PREFIX, self._allocate('db', index))
for _ in xrange(storage_count)] for _ in xrange(storage_count)]
if adapter == 'MySQL': if adapter == 'MySQL':
setupMySQLdb(db_list, db_user, db_password, clear_databases) db = setupMySQLdb(db_list, clear_databases)
db = '%s:%s@%%s%s' % (db_user, db_password, DB_SOCKET)
elif adapter == 'SQLite': elif adapter == 'SQLite':
db = os.path.join(getTempDirectory(), '%s.sqlite') db = os.path.join(getTempDirectory(), '%s.sqlite').__mod__
else: else:
assert False, adapter assert False, adapter
if importer: if importer:
cfg = ImporterConfigParser(adapter, **importer) cfg = ImporterConfigParser(adapter, **importer)
cfg.set("neo", "database", db % tuple(db_list)) cfg.set("neo", "database", db(*db_list))
db = os.path.join(getTempDirectory(), '%s.conf') db = os.path.join(getTempDirectory(), '%s.conf').__mod__
with open(db % tuple(db_list), "w") as f: with open(db(*db_list), "w") as f:
cfg.write(f) cfg.write(f)
kw["adapter"] = "Importer" kw["adapter"] = "Importer"
kw['wait'] = 0 kw['wait'] = 0
self.storage_list = [StorageApplication(database=db % x, **kw) self.storage_list = [StorageApplication(database=db(x), **kw)
for x in db_list] for x in db_list]
self.admin_list = [AdminApplication(**kw)] self.admin_list = [AdminApplication(**kw)]
...@@ -841,7 +849,7 @@ class NEOCluster(object): ...@@ -841,7 +849,7 @@ class NEOCluster(object):
master_list = self.master_list master_list = self.master_list
if storage_list is None: if storage_list is None:
storage_list = self.storage_list storage_list = self.storage_list
def answerPartitionTable(release, orig, *args): def sendPartitionTable(release, orig, *args):
orig(*args) orig(*args)
release() release()
def dispatch(release, orig, handler, *args): def dispatch(release, orig, handler, *args):
...@@ -857,7 +865,7 @@ class NEOCluster(object): ...@@ -857,7 +865,7 @@ class NEOCluster(object):
if state in expected_state: if state in expected_state:
release() release()
with Serialized.until(MasterEventHandler, with Serialized.until(MasterEventHandler,
answerPartitionTable=answerPartitionTable) as tic1, \ sendPartitionTable=sendPartitionTable) as tic1, \
Serialized.until(RecoveryManager, dispatch=dispatch) as tic2, \ Serialized.until(RecoveryManager, dispatch=dispatch) as tic2, \
Serialized.until(MasterEventHandler, Serialized.until(MasterEventHandler,
notifyClusterInformation=notifyClusterInformation) as tic3: notifyClusterInformation=notifyClusterInformation) as tic3:
...@@ -882,9 +890,13 @@ class NEOCluster(object): ...@@ -882,9 +890,13 @@ class NEOCluster(object):
expected_state = (NodeStates.PENDING expected_state = (NodeStates.PENDING
if state == ClusterStates.RECOVERING if state == ClusterStates.RECOVERING
else NodeStates.RUNNING) else NodeStates.RUNNING)
for node in self.storage_list if storage_list is None else storage_list: for node, expected_state in (
storage_list if isinstance(storage_list, dict) else
dict.fromkeys(self.storage_list if storage_list is None else
storage_list, expected_state)
).iteritems():
state = self.getNodeState(node) state = self.getNodeState(node)
assert state == expected_state, (repr(node), state) assert state == expected_state, (repr(node), state, expected_state)
def stop(self, clear_database=False, __print_exc=traceback.print_exc, **kw): def stop(self, clear_database=False, __print_exc=traceback.print_exc, **kw):
if self.started: if self.started:
...@@ -958,7 +970,7 @@ class NEOCluster(object): ...@@ -958,7 +970,7 @@ class NEOCluster(object):
def startCluster(self): def startCluster(self):
try: try:
self.neoctl.startCluster() self.neoctl.startCluster()
except RuntimeError: except SystemExit:
Serialized.tic() Serialized.tic()
if self.neoctl.getClusterState() not in ( if self.neoctl.getClusterState() not in (
ClusterStates.BACKINGUP, ClusterStates.BACKINGUP,
...@@ -1037,18 +1049,18 @@ class NEOCluster(object): ...@@ -1037,18 +1049,18 @@ class NEOCluster(object):
"""Sort storages so that storage_list[i] has partition i for all i""" """Sort storages so that storage_list[i] has partition i for all i"""
pt = [{x.getUUID() for x in x} pt = [{x.getUUID() for x in x}
for x in self.primary_master.pt.partition_list] for x in self.primary_master.pt.partition_list]
n = len(self.storage_list)
r = [] r = []
x = [iter(pt[0])] x = [iter(pt[0])]
try:
while 1: while 1:
try: try:
r.append(next(x[-1])) r.append(next(x[-1]))
except StopIteration: except StopIteration:
del r[-1], x[-1] del r[-1], x[-1]
else: else:
if len(r) == n:
break
x.append(iter(pt[len(r)].difference(r))) x.append(iter(pt[len(r)].difference(r)))
except IndexError:
assert len(r) == len(self.storage_list)
x = {x.uuid: x for x in self.storage_list} x = {x.uuid: x for x in self.storage_list}
self.storage_list[:] = (x[r] for r in r) self.storage_list[:] = (x[r] for r in r)
return self.storage_list return self.storage_list
......
...@@ -42,6 +42,7 @@ from neo.lib.util import add64, makeChecksum, p64, u64 ...@@ -42,6 +42,7 @@ from neo.lib.util import add64, makeChecksum, p64, u64
from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError
from neo.client.transactions import Transaction from neo.client.transactions import Transaction
from neo.master.handlers.client import ClientServiceHandler from neo.master.handlers.client import ClientServiceHandler
from neo.master.pt import PartitionTable
from neo.storage.database import DatabaseFailure from neo.storage.database import DatabaseFailure
from neo.storage.handlers.client import ClientOperationHandler from neo.storage.handlers.client import ClientOperationHandler
from neo.storage.handlers.identification import IdentificationHandler from neo.storage.handlers.identification import IdentificationHandler
...@@ -471,6 +472,7 @@ class Test(NEOThreadedTest): ...@@ -471,6 +472,7 @@ class Test(NEOThreadedTest):
self.assertFalse(conn.isClosed()) self.assertFalse(conn.isClosed())
getCellSortKey = cluster.client.getCellSortKey getCellSortKey = cluster.client.getCellSortKey
self.assertEqual(getCellSortKey(s0, good), 0) self.assertEqual(getCellSortKey(s0, good), 0)
cluster.neoctl.killNode(s0.getUUID())
cluster.neoctl.dropNode(s0.getUUID()) cluster.neoctl.dropNode(s0.getUUID())
self.assertEqual([s1], cluster.client.nm.getStorageList()) self.assertEqual([s1], cluster.client.nm.getStorageList())
self.assertTrue(conn.isClosed()) self.assertTrue(conn.isClosed())
...@@ -776,6 +778,7 @@ class Test(NEOThreadedTest): ...@@ -776,6 +778,7 @@ class Test(NEOThreadedTest):
checkNodeState(NodeStates.RUNNING) checkNodeState(NodeStates.RUNNING)
self.assertEqual([], cluster.getOutdatedCells()) self.assertEqual([], cluster.getOutdatedCells())
# drop one # drop one
cluster.neoctl.killNode(s1.uuid)
cluster.neoctl.dropNode(s1.uuid) cluster.neoctl.dropNode(s1.uuid)
checkNodeState(None) checkNodeState(None)
self.tic() # Let node state update reach remaining storage self.tic() # Let node state update reach remaining storage
...@@ -1055,6 +1058,10 @@ class Test(NEOThreadedTest): ...@@ -1055,6 +1058,10 @@ class Test(NEOThreadedTest):
# Check that the storage hasn't answered to the store, # Check that the storage hasn't answered to the store,
# which means that a lock is still taken for r['x'] by t2. # which means that a lock is still taken for r['x'] by t2.
self.tic() self.tic()
try:
txn = txn.data(c1)
except (AttributeError, KeyError): # BBB: ZODB < 5
pass
txn_context = cluster.client._txn_container.get(txn) txn_context = cluster.client._txn_container.get(txn)
empty = txn_context.queue.empty() empty = txn_context.queue.empty()
ll() ll()
...@@ -1303,7 +1310,7 @@ class Test(NEOThreadedTest): ...@@ -1303,7 +1310,7 @@ class Test(NEOThreadedTest):
del conn._queue[:] # XXX del conn._queue[:] # XXX
conn.close() conn.close()
if 1: if 1:
with Patch(cluster.master.pt, make=make), \ with Patch(PartitionTable, make=make), \
Patch(InitializationHandler, Patch(InitializationHandler,
askPartitionTable=askPartitionTable) as p: askPartitionTable=askPartitionTable) as p:
cluster.start() cluster.start()
...@@ -1834,18 +1841,7 @@ class Test(NEOThreadedTest): ...@@ -1834,18 +1841,7 @@ class Test(NEOThreadedTest):
x.value += 1 x.value += 1
c2.root()['x'].value += 2 c2.root()['x'].value += 2
TransactionalResource(t1, 1, tpc_begin=begin1) TransactionalResource(t1, 1, tpc_begin=begin1)
# BUG: Very rarely, getConnectionList returns more that 1 s1m, = s1.getConnectionList(cluster.master)
# connection ("too many values to unpack"), which is
# a mystery and impossible to reproduce:
# - 1st time: v1.8.1 on a test machine (no SSL)
# - last: current revision on my laptop (SSL),
# at the first iteration of this loop
_sm = list(s1.getConnectionList(cluster.master))
try:
s1m, = _sm
except ValueError:
self.fail((_sm, list(
s1.getConnectionList(cluster.master))))
try: try:
s1.em.removeReader(s1m) s1.em.removeReader(s1m)
with ConnectionFilter() as f, \ with ConnectionFilter() as f, \
...@@ -2293,6 +2289,10 @@ class Test(NEOThreadedTest): ...@@ -2293,6 +2289,10 @@ class Test(NEOThreadedTest):
# Check that the storage hasn't answered to the store, # Check that the storage hasn't answered to the store,
# which means that a lock is still taken for r[''] by t1. # which means that a lock is still taken for r[''] by t1.
self.tic() self.tic()
try:
txn = txn.data(c3)
except (AttributeError, KeyError): # BBB: ZODB < 5
pass
txn_context = db.storage.app._txn_container.get(txn) txn_context = db.storage.app._txn_container.get(txn)
raise Abort(txn_context.queue.empty()) raise Abort(txn_context.queue.empty())
TransactionalResource(t3, 1, commit=t3_commit) TransactionalResource(t3, 1, commit=t3_commit)
...@@ -2339,8 +2339,8 @@ class Test(NEOThreadedTest): ...@@ -2339,8 +2339,8 @@ class Test(NEOThreadedTest):
for x in 'ab': for x in 'ab':
r[x] = PCounterWithResolution() r[x] = PCounterWithResolution()
t1.commit() t1.commit()
cluster.stop(replicas=1) cluster.neoctl.setNumReplicas(1)
cluster.start() self.tic()
s0, s1 = cluster.sortStorageList() s0, s1 = cluster.sortStorageList()
t1, c1 = cluster.getTransaction() t1, c1 = cluster.getTransaction()
r = c1.root() r = c1.root()
...@@ -2524,8 +2524,8 @@ class Test(NEOThreadedTest): ...@@ -2524,8 +2524,8 @@ class Test(NEOThreadedTest):
for x in 'ab': for x in 'ab':
r[x] = PCounterWithResolution() r[x] = PCounterWithResolution()
t1.commit() t1.commit()
cluster.stop(replicas=1) cluster.neoctl.setNumReplicas(1)
cluster.start() self.tic()
s0, s1 = cluster.sortStorageList() s0, s1 = cluster.sortStorageList()
t1, c1 = cluster.getTransaction() t1, c1 = cluster.getTransaction()
r = c1.root() r = c1.root()
...@@ -2826,9 +2826,9 @@ class Test(NEOThreadedTest): ...@@ -2826,9 +2826,9 @@ class Test(NEOThreadedTest):
dm = s.dm dm = s.dm
dm.commit() dm.commit()
dump_dict[s.uuid] = dm.dump() dump_dict[s.uuid] = dm.dump()
dm.erase()
with open(path % (s.getAdapter(), s.uuid)) as f: with open(path % (s.getAdapter(), s.uuid)) as f:
dm.restore(f.read()) dm.restore(f.read())
dm.setConfiguration('partitions', None) # XXX: see dm._migrate4
with NEOCluster(storage_count=3, partitions=3, replicas=1, with NEOCluster(storage_count=3, partitions=3, replicas=1,
name=self._testMethodName) as cluster: name=self._testMethodName) as cluster:
s1, s2, s3 = cluster.storage_list s1, s2, s3 = cluster.storage_list
......
This diff is collapsed.
...@@ -29,7 +29,7 @@ from neo.storage.database.manager import DatabaseManager ...@@ -29,7 +29,7 @@ from neo.storage.database.manager import DatabaseManager
from neo.storage import replicator from neo.storage import replicator
from neo.lib.connector import SocketConnector from neo.lib.connector import SocketConnector
from neo.lib.connection import ClientConnection from neo.lib.connection import ClientConnection
from neo.lib.protocol import CellStates, ClusterStates, Packets, \ from neo.lib.protocol import CellStates, ClusterStates, NodeStates, Packets, \
ZERO_OID, ZERO_TID, MAX_TID, uuid_str ZERO_OID, ZERO_TID, MAX_TID, uuid_str
from neo.lib.util import add64, p64, u64 from neo.lib.util import add64, p64, u64
from .. import Patch, TransactionalResource from .. import Patch, TransactionalResource
...@@ -81,6 +81,8 @@ class ReplicationTests(NEOThreadedTest): ...@@ -81,6 +81,8 @@ class ReplicationTests(NEOThreadedTest):
source_dict = {x.uuid: x for x in cluster.upstream.storage_list} source_dict = {x.uuid: x for x in cluster.upstream.storage_list}
for storage in cluster.storage_list: for storage in cluster.storage_list:
self.assertFalse(storage.dm._uncommitted_data) self.assertFalse(storage.dm._uncommitted_data)
if storage.pt is None:
storage.loadPartitionTable()
self.assertEqual(np, storage.pt.getPartitions()) self.assertEqual(np, storage.pt.getPartitions())
for partition in pt.getAssignedPartitionList(storage.uuid): for partition in pt.getAssignedPartitionList(storage.uuid):
cell_list = upstream_pt.getCellList(partition, readable=True) cell_list = upstream_pt.getCellList(partition, readable=True)
...@@ -96,6 +98,7 @@ class ReplicationTests(NEOThreadedTest): ...@@ -96,6 +98,7 @@ class ReplicationTests(NEOThreadedTest):
checksum_list = [ checksum_list = [
self.checksumPartition(storage_dict[x.getUUID()], offset) self.checksumPartition(storage_dict[x.getUUID()], offset)
for x in pt.getCellList(offset)] for x in pt.getCellList(offset)]
self.assertLess(1, len(checksum_list))
self.assertEqual(1, len(set(checksum_list)), self.assertEqual(1, len(set(checksum_list)),
(offset, checksum_list)) (offset, checksum_list))
...@@ -496,13 +499,13 @@ class ReplicationTests(NEOThreadedTest): ...@@ -496,13 +499,13 @@ class ReplicationTests(NEOThreadedTest):
return isinstance(packet, delayed) and \ return isinstance(packet, delayed) and \
packet.decode()[0] == offset and \ packet.decode()[0] == offset and \
conn in s1.getConnectionList(s0) conn in s1.getConnectionList(s0)
def changePartitionTable(orig, ptid, cell_list): def changePartitionTable(orig, ptid, num_replicas, cell_list):
if (offset, s0.uuid, CellStates.DISCARDED) in cell_list: if (offset, s0.uuid, CellStates.DISCARDED) in cell_list:
connection_filter.remove(delayAskFetch) connection_filter.remove(delayAskFetch)
# XXX: this is currently not done by # XXX: this is currently not done by
# default for performance reason # default for performance reason
orig.im_self.dropPartitions((offset,)) orig.im_self.dropPartitions((offset,))
return orig(ptid, cell_list) return orig(ptid, num_replicas, cell_list)
np = cluster.num_partitions np = cluster.num_partitions
s0, s1, s2 = cluster.storage_list s0, s1, s2 = cluster.storage_list
for delayed in Packets.AskFetchTransactions, Packets.AskFetchObjects: for delayed in Packets.AskFetchTransactions, Packets.AskFetchObjects:
...@@ -562,7 +565,9 @@ class ReplicationTests(NEOThreadedTest): ...@@ -562,7 +565,9 @@ class ReplicationTests(NEOThreadedTest):
for x in 'ab': for x in 'ab':
r[x] = PCounter() r[x] = PCounter()
t.commit() t.commit()
cluster.stop(replicas=1) cluster.neoctl.setNumReplicas(1)
self.tic()
cluster.stop()
cluster.start((s1, s2)) cluster.start((s1, s2))
with ConnectionFilter() as f: with ConnectionFilter() as f:
f.delayAddObject() f.delayAddObject()
...@@ -647,8 +652,9 @@ class ReplicationTests(NEOThreadedTest): ...@@ -647,8 +652,9 @@ class ReplicationTests(NEOThreadedTest):
tweak() tweak()
t.commit() t.commit()
t2.join() t2.join()
cluster.neoctl.dropNode(S[2].uuid) for s in S[2:]:
cluster.neoctl.dropNode(S[3].uuid) cluster.neoctl.killNode(s.uuid)
cluster.neoctl.dropNode(s.uuid)
cluster.neoctl.tweakPartitionTable() cluster.neoctl.tweakPartitionTable()
if done: if done:
f.remove(delay) f.remove(delay)
...@@ -979,6 +985,74 @@ class ReplicationTests(NEOThreadedTest): ...@@ -979,6 +985,74 @@ class ReplicationTests(NEOThreadedTest):
def testReplicationBlockedByUnfinished2(self): def testReplicationBlockedByUnfinished2(self):
self.testReplicationBlockedByUnfinished1(True) self.testReplicationBlockedByUnfinished1(True)
@with_cluster(partitions=6, storage_count=5, start_cluster=0)
def testSplitAndMakeResilientUsingClone(self, cluster):
"""
Test cloning of storage nodes using --new-nid instead NEO replication.
"""
s0 = cluster.storage_list[0]
s12 = cluster.storage_list[1:3]
s34 = cluster.storage_list[3:]
cluster.start(storage_list=(s0,))
cluster.importZODB()(6)
for s in s12:
s.start()
self.tic()
drop_list = [s0.uuid]
self.assertRaises(SystemExit, cluster.neoctl.tweakPartitionTable,
drop_list)
cluster.enableStorageList(s12)
def expected(changed):
s0 = 1, CellStates.UP_TO_DATE
s = CellStates.OUT_OF_DATE if changed else CellStates.UP_TO_DATE
return changed, 3 * [[s0, (2, s)], [s0, (3, s)]]
for dry_run in True, False:
self.assertEqual(expected(True),
cluster.neoctl.tweakPartitionTable(drop_list, dry_run))
self.tic()
self.assertEqual(expected(False),
cluster.neoctl.tweakPartitionTable(drop_list))
for s, d in zip(s12, s34):
s.stop()
cluster.join((s,))
s.resetNode()
d.dm.restore(s.dm.dump())
d.resetNode(new_nid=True)
s.start()
d.start()
self.tic()
self.assertEqual(cluster.getNodeState(s), NodeStates.RUNNING)
self.assertEqual(cluster.getNodeState(d), NodeStates.DOWN)
cluster.join((d,))
d.resetNode(new_nid=False)
d.start()
self.tic()
self.checkReplicas(cluster)
expected = '|'.join(['UU.U.|U.U.U'] * 3)
self.assertPartitionTable(cluster, expected)
cluster.neoctl.setNumReplicas(1)
cluster.neoctl.tweakPartitionTable(drop_list)
self.tic()
self.assertPartitionTable(cluster, expected)
s0.stop()
cluster.join((s0,))
cluster.neoctl.dropNode(s0.uuid)
expected = '|'.join(['U.U.|.U.U'] * 3)
self.assertPartitionTable(cluster, expected)
@with_cluster(partitions=3, replicas=1, storage_count=3)
def testAdminOnerousOperationCondition(self, cluster):
s = cluster.storage_list[2]
cluster.neoctl.killNode(s.uuid)
tweak = cluster.neoctl.tweakPartitionTable
self.assertRaises(SystemExit, tweak)
self.assertRaises(SystemExit, tweak, dry_run=True)
self.assertTrue(tweak((s.uuid,))[0])
self.tic()
cluster.neoctl.dropNode(s.uuid)
s = cluster.storage_list[1]
self.assertRaises(SystemExit, cluster.neoctl.dropNode, s.uuid)
@with_cluster(partitions=5, replicas=2, storage_count=3) @with_cluster(partitions=5, replicas=2, storage_count=3)
def testCheckReplicas(self, cluster): def testCheckReplicas(self, cluster):
from neo.storage import checker from neo.storage import checker
...@@ -991,8 +1065,8 @@ class ReplicationTests(NEOThreadedTest): ...@@ -991,8 +1065,8 @@ class ReplicationTests(NEOThreadedTest):
return s0.uuid return s0.uuid
def check(expected_state, expected_count): def check(expected_state, expected_count):
self.assertEqual(expected_count, len([None self.assertEqual(expected_count, len([None
for row in cluster.neoctl.getPartitionRowList()[1] for row in cluster.neoctl.getPartitionRowList()[2]
for cell in row[1] for cell in row
if cell[1] == CellStates.CORRUPTED])) if cell[1] == CellStates.CORRUPTED]))
self.assertEqual(expected_state, cluster.neoctl.getClusterState()) self.assertEqual(expected_state, cluster.neoctl.getClusterState())
np = cluster.num_partitions np = cluster.num_partitions
......
...@@ -33,8 +33,6 @@ class RecoveryTests(ZODBTestCase, StorageTestBase, RecoveryStorage): ...@@ -33,8 +33,6 @@ class RecoveryTests(ZODBTestCase, StorageTestBase, RecoveryStorage):
os.makedirs(dst_temp_dir) os.makedirs(dst_temp_dir)
self.neo_dst = NEOCluster(['test_neo1-dst'], partitions=1, replicas=0, self.neo_dst = NEOCluster(['test_neo1-dst'], partitions=1, replicas=0,
master_count=1, temp_dir=dst_temp_dir) master_count=1, temp_dir=dst_temp_dir)
self.neo_dst.stop()
self.neo_dst.setupDB()
self.neo_dst.start() self.neo_dst.start()
self._dst = self.neo.getZODBStorage() self._dst = self.neo.getZODBStorage()
self._dst_db = ZODB.DB(self._dst) self._dst_db = ZODB.DB(self._dst)
......
...@@ -78,7 +78,7 @@ else: ...@@ -78,7 +78,7 @@ else:
setup( setup(
name = 'neoppod', name = 'neoppod',
version = '1.11', version = '1.12.0',
description = __doc__.strip(), description = __doc__.strip(),
author = 'Nexedi SA', author = 'Nexedi SA',
author_email = 'neo-dev@erp5.org', author_email = 'neo-dev@erp5.org',
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment