Commit 2c823e2e authored by Julien Muchembled's avatar Julien Muchembled

Merge v1.12

parents a33c624c 6332112c
Change History
==============
1.12 (2019-04-28)
-----------------
Most changes in this version focus on the ability to migrate efficiently
and reliably a big ZODB to NEO, which required changes in the protocol.
See testSplitAndMakeResilientUsingClone for an example of scenario.
Better cluster management:
- New --new-nid storage option for fast cloning.
- The number of wanted replicas is now a property of the database, which is
modifiable when the cluster is running, and reported by `neoctl print pt`.
- Better error reporting from the master to neoctl for denied requests.
- tweak: do not touch cells of nodes that are intended to be dropped.
- tweak: do not crash when trying to remove all nodes.
- tweak: new neoctl option to ask the master to simulate.
- neoctl: better display of full partition tables.
- master: reject drop/tweak commands that could lead to unwanted status.
Importer:
- Fix possible data loss on writeback.
- v1.9 broke replication (as source) once the import is finished.
- Speed up startup when the import is already finished.
- Fix closure of ZODB, and also do it when the import is finished.
- Fix hidden "maximum recursion depth exceeded" at startup.
- Fix resumption when using SQLite.
- v1.10 broke resumption when there are new transactions since the import
started.
MySQL:
- Better support of RocksDB by specifying column families.
- Fix handling of connection strings (--database) without credentials.
1.11 (2019-03-11)
-----------------
......
......@@ -21,7 +21,6 @@ from neo.lib.exception import PrimaryFailure
from .handler import AdminEventHandler, MasterEventHandler, \
MasterRequestEventHandler
from neo.lib.bootstrap import BootstrapManager
from neo.lib.pt import PartitionTable
from neo.lib.protocol import ClusterStates, Errors, NodeTypes, Packets
from neo.lib.debug import register as registerLiveDebugger
......@@ -36,8 +35,8 @@ class Application(BaseApplication):
cls.addCommonServerOptions('admin', '127.0.0.1:9999')
_ = _.group('admin')
_.int('u', 'uuid',
help="specify an UUID to use for this process (testing purpose)")
_.int('i', 'nid',
help="specify an NID to use for this process (testing purpose)")
def __init__(self, config):
super(Application, self).__init__(
......@@ -53,7 +52,7 @@ class Application(BaseApplication):
# The partition table is initialized after getting the number of
# partitions.
self.pt = None
self.uuid = config.get('uuid')
self.uuid = config.get('nid')
logging.node(self.name, self.uuid)
self.request_handler = MasterRequestEventHandler(self)
self.master_event_handler = MasterEventHandler(self)
......@@ -66,7 +65,6 @@ class Application(BaseApplication):
super(Application, self).close()
def reset(self):
self.bootstrapped = False
self.master_conn = None
self.master_node = None
......@@ -117,40 +115,20 @@ class Application(BaseApplication):
self.cluster_state = None
# search, find, connect and identify to the primary master
bootstrap = BootstrapManager(self, NodeTypes.ADMIN, self.server)
self.master_node, self.master_conn, num_partitions, num_replicas = \
bootstrap.getPrimaryConnection()
if self.pt is None:
self.pt = PartitionTable(num_partitions, num_replicas)
elif self.pt.getPartitions() != num_partitions:
# XXX: shouldn't we recover instead of raising ?
raise RuntimeError('the number of partitions is inconsistent')
elif self.pt.getReplicas() != num_replicas:
# XXX: shouldn't we recover instead of raising ?
raise RuntimeError('the number of replicas is inconsistent')
self.master_node, self.master_conn = bootstrap.getPrimaryConnection()
# passive handler
self.master_conn.setHandler(self.master_event_handler)
self.master_conn.ask(Packets.AskClusterState())
self.master_conn.ask(Packets.AskPartitionTable())
def sendPartitionTable(self, conn, min_offset, max_offset, uuid):
# we have a pt
self.pt.log()
row_list = []
pt = self.pt
if max_offset == 0:
max_offset = self.pt.getPartitions()
try:
for offset in xrange(min_offset, max_offset):
row = []
max_offset = pt.getPartitions()
try:
for cell in self.pt.getCellList(offset):
if uuid is None or cell.getUUID() == uuid:
row.append((cell.getUUID(), cell.getState()))
except TypeError:
pass
row_list.append((offset, row))
row_list = map(pt.getRow, xrange(min_offset, max_offset))
except IndexError:
conn.send(Errors.ProtocolError('invalid partition table offset'))
else:
conn.answer(Packets.AnswerPartitionList(self.pt.getID(), row_list))
conn.answer(Packets.AnswerPartitionList(
pt.getID(), pt.getReplicas(), row_list))
......@@ -17,11 +17,12 @@
from neo.lib import logging, protocol
from neo.lib.handler import EventHandler
from neo.lib.protocol import uuid_str, Packets
from neo.lib.pt import PartitionTable
from neo.lib.exception import PrimaryFailure
def check_primary_master(func):
def wrapper(self, *args, **kw):
if self.app.bootstrapped:
if self.app.master_conn is not None:
return func(self, *args, **kw)
raise protocol.NotReadyError('Not connected to a primary master.')
return wrapper
......@@ -74,6 +75,7 @@ class AdminEventHandler(EventHandler):
tweakPartitionTable = forward_ask(Packets.TweakPartitionTable)
setClusterState = forward_ask(Packets.SetClusterState)
setNodeState = forward_ask(Packets.SetNodeState)
setNumReplicas = forward_ask(Packets.SetNumReplicas)
checkReplicas = forward_ask(Packets.CheckReplicas)
truncate = forward_ask(Packets.Truncate)
repair = forward_ask(Packets.Repair)
......@@ -112,16 +114,12 @@ class MasterEventHandler(EventHandler):
def answerClusterState(self, conn, state):
self.app.cluster_state = state
def notifyPartitionChanges(self, conn, ptid, cell_list):
self.app.pt.update(ptid, cell_list, self.app.nm)
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
pt = self.app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
def answerPartitionTable(self, conn, ptid, row_list):
self.app.pt.load(ptid, row_list, self.app.nm)
self.app.bootstrapped = True
def sendPartitionTable(self, conn, ptid, row_list):
if self.app.bootstrapped:
self.app.pt.load(ptid, row_list, self.app.nm)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def notifyClusterInformation(self, conn, cluster_state):
self.app.cluster_state = cluster_state
......
......@@ -76,7 +76,7 @@ class Application(ThreadedApplication):
self.primary_master_node = None
self.trying_master_node = None
# no self-assigned UUID, primary master will supply us one
# no self-assigned NID, primary master will supply us one
self._cache = ClientCache() if cache_size is None else \
ClientCache(max_size=cache_size)
self._loading = defaultdict(lambda: (Lock(), []))
......@@ -220,8 +220,8 @@ class Application(ThreadedApplication):
self.notifications_handler,
node=node,
dispatcher=self.dispatcher)
p = Packets.RequestIdentification(
NodeTypes.CLIENT, self.uuid, None, self.name, (), None)
p = Packets.RequestIdentification(NodeTypes.CLIENT,
self.uuid, None, self.name, None, (), ())
try:
ask(conn, p, handler=handler)
except ConnectionClosed:
......@@ -238,7 +238,6 @@ class Application(ThreadedApplication):
# operational. Might raise ConnectionClosed so that the new
# primary can be looked-up again.
logging.info('Initializing from master')
ask(conn, Packets.AskPartitionTable(), handler=handler)
ask(conn, Packets.AskLastTransaction(), handler=handler)
if self.pt.operational():
break
......@@ -264,7 +263,7 @@ class Application(ThreadedApplication):
conn = MTClientConnection(self, self.storage_event_handler, node,
dispatcher=self.dispatcher)
p = Packets.RequestIdentification(NodeTypes.CLIENT,
self.uuid, None, self.name, (), self.id_timestamp)
self.uuid, None, self.name, self.id_timestamp, (), ())
try:
self._ask(conn, p, handler=self.storage_bootstrap_handler)
except ConnectionClosed:
......
......@@ -26,10 +26,6 @@ from ..exception import NEOStorageError
class PrimaryBootstrapHandler(AnswerBaseHandler):
""" Bootstrap handler used when looking for the primary master """
def answerPartitionTable(self, conn, ptid, row_list):
assert row_list
self.app.pt.load(ptid, row_list, self.app.nm)
def answerLastTransaction(*args):
pass
......@@ -42,9 +38,6 @@ class PrimaryNotificationsHandler(MTEventHandler):
except PrimaryElected, e:
self.app.primary_master_node, = e.args
def _acceptIdentification(self, node, num_partitions, num_replicas):
self.app.pt = PartitionTable(num_partitions, num_replicas)
def answerLastTransaction(self, conn, ltid):
app = self.app
app_last_tid = app.__dict__.get('last_tid', '')
......@@ -131,9 +124,12 @@ class PrimaryNotificationsHandler(MTEventHandler):
if db is not None:
db.invalidate(tid, oid_list)
def notifyPartitionChanges(self, conn, ptid, cell_list):
if self.app.pt.filled():
self.app.pt.update(ptid, cell_list, self.app.nm)
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
pt = self.app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def notifyNodeInformation(self, conn, timestamp, node_list):
super(PrimaryNotificationsHandler, self).notifyNodeInformation(
......
......@@ -26,7 +26,7 @@ class BootstrapManager(EventHandler):
Manage the bootstrap stage, lookup for the primary master then connect to it
"""
def __init__(self, app, node_type, server=None, devpath=()):
def __init__(self, app, node_type, server=None, devpath=(), new_nid=()):
"""
Manage the bootstrap stage of a non-master node, it lookup for the
primary master node, connect to it then returns when the master node
......@@ -34,9 +34,8 @@ class BootstrapManager(EventHandler):
"""
self.server = server
self.devpath = devpath
self.new_nid = new_nid
self.node_type = node_type
self.num_replicas = None
self.num_partitions = None
app.nm.reset()
uuid = property(lambda self: self.app.uuid)
......@@ -44,7 +43,7 @@ class BootstrapManager(EventHandler):
def connectionCompleted(self, conn):
EventHandler.connectionCompleted(self, conn)
conn.ask(Packets.RequestIdentification(self.node_type, self.uuid,
self.server, self.app.name, self.devpath, None))
self.server, self.app.name, None, self.devpath, self.new_nid))
def connectionFailed(self, conn):
EventHandler.connectionFailed(self, conn)
......@@ -53,10 +52,8 @@ class BootstrapManager(EventHandler):
def connectionLost(self, conn, new_state):
self.current = None
def _acceptIdentification(self, node, num_partitions, num_replicas):
def _acceptIdentification(self, node):
assert self.current is node, (self.current, node)
self.num_partitions = num_partitions
self.num_replicas = num_replicas
def getPrimaryConnection(self):
"""
......@@ -73,8 +70,7 @@ class BootstrapManager(EventHandler):
try:
while self.current:
if self.current.isIdentified():
return (self.current, self.current.getConnection(),
self.num_partitions, self.num_replicas)
return self.current, self.current.getConnection()
poll(1)
except PrimaryElected, e:
if self.current:
......
......@@ -209,7 +209,7 @@ class BaseConnection(object):
def _getReprInfo(self):
r = [
('uuid', uuid_str(self.getUUID())),
('nid', uuid_str(self.getUUID())),
('address', ('[%s]:%s' if ':' in self.addr[0] else '%s:%s')
% self.addr if self.addr else '?'),
('handler', self.getHandler()),
......
......@@ -26,6 +26,9 @@ from .protocol import (NodeStates, NodeTypes, Packets, uuid_str,
from .util import cached_property
class AnswerDenied(Exception):
"""Helper exception to stop packet processing and answer a Denied error"""
class DelayEvent(Exception):
pass
......@@ -98,6 +101,8 @@ class EventHandler(object):
% (m.im_class.__module__, m.im_class.__name__, m.__name__)))
except NonReadableCell, e:
conn.answer(Errors.NonReadableCell())
except AnswerDenied, e:
conn.answer(Errors.Denied(str(e)))
except AssertionError:
e = sys.exc_info()
try:
......@@ -160,8 +165,7 @@ class EventHandler(object):
def _acceptIdentification(*args):
pass
def acceptIdentification(self, conn, node_type, uuid,
num_partitions, num_replicas, your_uuid):
def acceptIdentification(self, conn, node_type, uuid, your_uuid):
app = self.app
node = app.nm.getByAddress(conn.getAddress())
assert node.getConnection() is conn, (node.getConnection(), conn)
......@@ -180,7 +184,7 @@ class EventHandler(object):
elif node.getUUID() != uuid or app.uuid != your_uuid != None:
raise ProtocolError('invalid uuids')
node.setIdentified()
self._acceptIdentification(node, num_partitions, num_replicas)
self._acceptIdentification(node)
return
conn.close()
......
......@@ -486,7 +486,7 @@ class NodeManager(EventQueue):
# For the first notification, we receive a full list of nodes from
# the master. Remove all unknown nodes from a previous connection.
for node in self._node_set.difference(added_list):
if app.pt.dropNode(node):
if not node.isStorage() or app.pt.dropNode(node):
self.remove(node)
self.log()
self.executeQueuedEvents()
......
......@@ -22,7 +22,7 @@ from struct import Struct
# The protocol version must be increased whenever upgrading a node may require
# to upgrade other nodes. It is encoded as a 4-bytes big-endian integer and
# the high order byte 0 is different from TLS Handshake (0x16).
PROTOCOL_VERSION = 5
PROTOCOL_VERSION = 6
ENCODED_VERSION = Struct('!L').pack(PROTOCOL_VERSION)
# Avoid memory errors on corrupted data.
......@@ -62,6 +62,7 @@ class Enum(tuple):
@Enum
def ErrorCodes():
ACK
DENIED
NOT_READY
OID_NOT_FOUND
TID_NOT_FOUND
......@@ -616,10 +617,7 @@ PFCellList = PList('cell_list',
)
PFRowList = PList('row_list',
PStruct('row',
PNumber('offset'),
PFCellList,
),
)
PFHistoryList = PList('history_list',
......@@ -685,15 +683,15 @@ class RequestIdentification(Packet):
PUUID('uuid'),
PAddress('address'),
PString('name'),
PList('devpath', PString('devid')),
PFloat('id_timestamp'),
# storage:
PList('devpath', PString('devid')),
PList('new_nid', PNumber('offset')),
)
_answer = PStruct('accept_identification',
PFNodeType,
PUUID('my_uuid'),
PNumber('num_partitions'),
PNumber('num_replicas'),
PUUID('your_uuid'),
)
......@@ -749,23 +747,24 @@ class LastIDs(Packet):
class PartitionTable(Packet):
"""
Ask storage node the remaining data needed by master to recover.
This is also how the clients get the full partition table on connection.
:nodes: M -> S; C -> M
:nodes: M -> S
"""
_answer = PStruct('answer_partition_table',
PPTID('ptid'),
PNumber('num_replicas'),
PFRowList,
)
class NotifyPartitionTable(Packet):
"""
Send the full partition table to admin/storage nodes on connection.
Send the full partition table to admin/client/storage nodes on connection.
:nodes: M -> A, S
:nodes: M -> A, C, S
"""
_fmt = PStruct('send_partition_table',
PPTID('ptid'),
PNumber('num_replicas'),
PFRowList,
)
......@@ -777,6 +776,7 @@ class PartitionChanges(Packet):
"""
_fmt = PStruct('notify_partition_changes',
PPTID('ptid'),
PNumber('num_replicas'),
PList('cell_list',
PStruct('cell',
PNumber('offset'),
......@@ -1202,6 +1202,7 @@ class PartitionList(Packet):
_answer = PStruct('answer_partition_list',
PPTID('ptid'),
PNumber('num_replicas'),
PFRowList,
)
......@@ -1253,10 +1254,14 @@ class TweakPartitionTable(Packet):
:nodes: ctl -> A -> M
"""
_fmt = PStruct('tweak_partition_table',
PBoolean('dry_run'),
PFUUIDList,
)
_answer = Error
_answer = PStruct('answer_tweak_partition_table',
PBoolean('changed'),
PFRowList,
)
class NotifyNodeInformation(Packet):
"""
......@@ -1269,6 +1274,18 @@ class NotifyNodeInformation(Packet):
PFNodeList,
)
class SetNumReplicas(Packet):
"""
Set the number of replicas.
:nodes: ctl -> A -> M
"""
_fmt = PStruct('set_num_replicas',
PNumber('num_replicas'),
)
_answer = Error
class SetClusterState(Packet):
"""
Set the cluster state.
......@@ -1762,8 +1779,10 @@ class Packets(dict):
SetNodeState, ignore_when_closed=False)
AddPendingNodes = register(
AddPendingNodes, ignore_when_closed=False)
TweakPartitionTable = register(
TweakPartitionTable, ignore_when_closed=False)
TweakPartitionTable, AnswerTweakPartitionTable = register(
TweakPartitionTable)
SetNumReplicas = register(
SetNumReplicas, ignore_when_closed=False)
SetClusterState = register(
SetClusterState, ignore_when_closed=False)
Repair = register(
......
......@@ -86,15 +86,9 @@ class PartitionTable(object):
'a cell became non-readable whereas all cells were readable'
def __init__(self, num_partitions, num_replicas):
self._id = None
self.np = num_partitions
self.nr = num_replicas
self.num_filled_rows = 0
# Note: don't use [[]] * num_partition construct, as it duplicates
# instance *references*, so the outer list contains really just one
# inner list instance.
self.partition_list = [[] for _ in xrange(num_partitions)]
self.count_dict = {}
self.clear()
def getID(self):
return self._id
......@@ -113,7 +107,16 @@ class PartitionTable(object):
# instance *references*, so the outer list contains really just one
# inner list instance.
self.partition_list = [[] for _ in xrange(self.np)]
self.count_dict.clear()
self.count_dict = {}
def addNodeList(self, node_list):
"""Add nodes"""
added_list = []
for node in node_list:
if node not in self.count_dict:
self.count_dict[node] = 0
added_list.append(node)
return added_list
def getAssignedPartitionList(self, uuid):
""" Return the partition assigned to the specified UUID """
......@@ -203,31 +206,31 @@ class PartitionTable(object):
del self.count_dict[node]
return not count
def load(self, ptid, row_list, nm):
def _load(self, ptid, num_replicas, row_list, getByUUID):
self.__init__(len(row_list), num_replicas)
self._id = ptid
for offset, row in enumerate(row_list):
for uuid, state in row:
node = getByUUID(uuid)
self._setCell(offset, node, state)
def load(self, ptid, num_replicas, row_list, nm):
"""
Load the partition table with the specified PTID, discard all previous
content.
"""
self.clear()
self._id = ptid
for offset, row in row_list:
if offset >= self.getPartitions():
raise IndexError
for uuid, state in row:
node = nm.getByUUID(uuid)
# the node must be known by the node manager
assert node is not None
self._setCell(offset, node, state)
self._load(ptid, num_replicas, row_list, nm.getByUUID)
logging.debug('partition table loaded (ptid=%s)', ptid)
self.log()
def update(self, ptid, cell_list, nm):
def update(self, ptid, num_replicas, cell_list, nm):
"""
Update the partition with the cell list supplied. If a node
is not known, it is created in the node manager and set as unavailable
"""
assert self._id < ptid, (self._id, ptid)
self._id = ptid
self.nr = num_replicas
readable_list = []
for row in self.partition_list:
if not all(cell.isReadable() for cell in row):
......@@ -310,14 +313,11 @@ class PartitionTable(object):
return True
def getRow(self, offset):
row = self.partition_list[offset]
if row is None:
return []
return [(cell.getUUID(), cell.getState()) for cell in row]
return [(cell.getUUID(), cell.getState())
for cell in self.partition_list[offset]]
def getRowList(self):
getRow = self.getRow
return [(x, getRow(x)) for x in xrange(self.np)]
return map(self.getRow, xrange(self.np))
class MTPartitionTable(PartitionTable):
""" Thread-safe aware version of the partition table, override only methods
......
......@@ -16,6 +16,7 @@
import sys
from collections import defaultdict
from functools import partial
from time import time
from neo.lib import logging, util
......@@ -76,13 +77,11 @@ class Application(BaseApplication):
@classmethod
def _buildOptionParser(cls):
_ = cls.option_parser
_.description = "NEO Master node"
parser = cls.option_parser
parser.description = "NEO Master node"
cls.addCommonServerOptions('master', '127.0.0.1:10000', '')
_ = _.group('master')
_.int('r', 'replicas', default=0, help="replicas number")
_.int('p', 'partitions', default=100, help="partitions number")
_ = parser.group('master')
_.int('A', 'autostart',
help="minimum number of pending storage nodes to automatically"
" start new cluster (to avoid unwanted recreation of the"
......@@ -91,8 +90,12 @@ class Application(BaseApplication):
help='the name of cluster to backup')
_('M', 'upstream-masters', parse=util.parseMasterList,
help='list of master nodes in the cluster to backup')
_.int('u', 'uuid',
help="specify an UUID to use for this process (testing purpose)")
_.int('i', 'nid',
help="specify an NID to use for this process (testing purpose)")
_ = parser.group('database creation')
_.int('r', 'replicas', default=0, help="replicas number")
_.int('p', 'partitions', default=100, help="partitions number")
def __init__(self, config):
super(Application, self).__init__(
......@@ -108,7 +111,7 @@ class Application(BaseApplication):
for master_address in config['masters']:
self.nm.createMaster(address=master_address)
self._node = self.nm.createMaster(address=self.server,
uuid=config.get('uuid'))
uuid=config.get('nid'))
logging.node(self.name, self.uuid)
logging.debug('IP address is %s, port is %d', *self.server)
......@@ -117,14 +120,14 @@ class Application(BaseApplication):
replicas = config['replicas']
partitions = config['partitions']
if replicas < 0:
raise RuntimeError, 'replicas must be a positive integer'
sys.exit('replicas must be a positive integer')
if partitions <= 0:
raise RuntimeError, 'partitions must be more than zero'
self.pt = PartitionTable(partitions, replicas)
sys.exit('partitions must be more than zero')
logging.info('Configuration:')
logging.info('Partitions: %d', partitions)
logging.info('Replicas : %d', replicas)
logging.info('Name : %s', self.name)
self.newPartitionTable = partial(PartitionTable, partitions, replicas)
self.listening_conn = None
self.cluster_state = None
......@@ -196,7 +199,7 @@ class Application(BaseApplication):
node_dict[NodeTypes.MASTER].append(node_info)
return node_dict
def broadcastNodesInformation(self, node_list, exclude=None):
def broadcastNodesInformation(self, node_list):
"""
Broadcast changes for a set a nodes
Send only one packet per connection to reduce bandwidth
......@@ -209,15 +212,21 @@ class Application(BaseApplication):
# We don't skip pending storage nodes because we don't send them
# the full list of nodes when they're added, and it's also quite
# useful to notify them about new masters.
if node_list and node is not exclude:
if node_list:
node.send(Packets.NotifyNodeInformation(now, node_list))
def broadcastPartitionChanges(self, cell_list):
def broadcastPartitionChanges(self, cell_list, num_replicas=None):
"""Broadcast a Notify Partition Changes packet."""
if cell_list:
ptid = self.pt.setNextID()
self.pt.logUpdated()
packet = Packets.NotifyPartitionChanges(ptid, cell_list)
pt = self.pt
if num_replicas is not None:
pt.setReplicas(num_replicas)
elif cell_list:
num_replicas = pt.getReplicas()
else:
return
packet = Packets.NotifyPartitionChanges(
pt.setNextID(), num_replicas, cell_list)
pt.logUpdated()
for node in self.nm.getIdentifiedList():
# As for broadcastNodesInformation, we don't send the full PT
# when pending storage nodes are added, so keep them notified.
......@@ -437,16 +446,7 @@ class Application(BaseApplication):
conn.send(notification_packet)
elif conn.isServer():
continue
if node.isClient():
if state == ClusterStates.RUNNING:
handler = self.client_service_handler
elif state == ClusterStates.BACKINGUP:
handler = self.client_ro_service_handler
else:
if state != ClusterStates.STOPPING:
conn.abort()
continue
elif node.isMaster():
if node.isMaster():
if state == ClusterStates.RECOVERING:
handler = self.election_handler
else:
......@@ -454,10 +454,16 @@ class Application(BaseApplication):
elif node.isStorage() and storage_handler:
handler = storage_handler
else:
# There's a single handler type for admins.
# Client can't change handler without being first disconnected.
assert state in (
ClusterStates.STOPPING,
ClusterStates.STOPPING_BACKUP,
) or not node.isClient(), (state, node)
continue # keep handler
if type(handler) is not type(conn.getLastHandler()):
conn.setHandler(handler)
handler.connectionCompleted(conn, new=False)
handler.handlerSwitched(conn, new=False)
self.cluster_state = state
def getNewUUID(self, uuid, address, node_type):
......
......@@ -111,17 +111,12 @@ class BackupApplication(object):
else:
break
poll(1)
node, conn, num_partitions, num_replicas = \
bootstrap.getPrimaryConnection()
node, conn = bootstrap.getPrimaryConnection()
try:
app.changeClusterState(ClusterStates.BACKINGUP)
del bootstrap, node
if num_partitions != pt.getPartitions():
raise RuntimeError("inconsistent number of partitions")
self.ignore_invalidations = True
self.pt = PartitionTable(num_partitions, num_replicas)
conn.setHandler(BackupHandler(self))
conn.ask(Packets.AskPartitionTable())
conn.ask(Packets.AskLastTransaction())
# debug variable to log how big 'tid_list' can be.
self.debug_tid_count = 0
......
......@@ -23,10 +23,6 @@ from neo.lib.protocol import Packets
class MasterHandler(EventHandler):
"""This class implements a generic part of the event handlers."""
def connectionCompleted(self, conn, new=None):
if new is None:
super(MasterHandler, self).connectionCompleted(conn)
def connectionLost(self, conn, new_state=None):
if self.app.listening_conn: # if running
self._connectionLost(conn)
......@@ -59,17 +55,20 @@ class MasterHandler(EventHandler):
+ app.getNodeInformationDict(node_list)[node.getType()])
conn.send(Packets.NotifyNodeInformation(monotonic_time(), node_list))
def askPartitionTable(self, conn):
def handlerSwitched(self, conn, new):
pt = self.app.pt
conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList()))
# Except storages during recovery and secondary masters, all nodes
# receives the full partition table as soon as they're identified.
# It is also sent in 2 other cases:
# - to admins during recovery, whenever a newer PT is loaded;
# - to storage when switching from recovery to verification.
# After that, non-master nodes only receive incremental updates.
conn.send(Packets.SendPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
class BaseServiceHandler(MasterHandler):
"""This class deals with events for a service phase."""
def connectionCompleted(self, conn, new):
pt = self.app.pt
conn.send(Packets.SendPartitionTable(pt.getID(), pt.getRowList()))
"""Common handler class for storage nodes."""
def connectionLost(self, conn, new_state):
app = self.app
......
......@@ -15,14 +15,16 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import random
from functools import wraps
from . import MasterHandler
from ..app import monotonic_time, StateChangedException
from neo.lib import logging
from neo.lib.exception import StoppedOperation
from neo.lib.handler import AnswerDenied
from neo.lib.pt import PartitionTableException
from neo.lib.protocol import ClusterStates, Errors, \
NodeStates, NodeTypes, Packets, ProtocolError, uuid_str
NodeStates, NodeTypes, Packets, uuid_str
from neo.lib.util import dump
CLUSTER_STATE_WORKFLOW = {
......@@ -38,9 +40,25 @@ NODE_STATE_WORKFLOW = {
NodeTypes.STORAGE: (NodeStates.DOWN, NodeStates.UNKNOWN),
}
def check_state(*states):
def decorator(wrapped):
def wrapper(self, *args):
state = self.app.getClusterState()
if state not in states:
raise AnswerDenied('%s RPC can not be used in %s state'
% (wrapped.__name__, state))
wrapped(self, *args)
return wraps(wrapped)(wrapper)
return decorator
class AdministrationHandler(MasterHandler):
"""This class deals with messages from the admin node only"""
def handlerSwitched(self, conn, new):
assert new
super(AdministrationHandler, self).handlerSwitched(conn, new)
def connectionLost(self, conn, new_state):
node = self.app.nm.getByUUID(conn.getUUID())
if node is not None:
......@@ -58,30 +76,28 @@ class AdministrationHandler(MasterHandler):
# check request
try:
if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
raise ProtocolError('Can not switch to this state')
raise AnswerDenied('Can not switch to this state')
except KeyError:
if state != ClusterStates.STOPPING:
raise ProtocolError('Invalid state requested')
raise AnswerDenied('Invalid state requested')
# change state
if state == ClusterStates.VERIFYING:
storage_list = app.nm.getStorageList(only_identified=True)
if not storage_list:
raise ProtocolError('Cannot exit recovery without any '
'storage node')
raise AnswerDenied(
'Cannot exit recovery without any storage node')
for node in storage_list:
assert node.isPending(), node
if node.getConnection().isPending():
# XXX: It's wrong to use ProtocolError here. We must reply
# less aggressively because the admin has no way to
# know that there's still pending activity.
raise ProtocolError('Cannot exit recovery now: node %r is '
'entering cluster' % (node, ))
raise AnswerDenied(
'Cannot exit recovery now: node %r is entering cluster'
% node,)
app._startup_allowed = True
state = app.cluster_state
elif state == ClusterStates.STARTING_BACKUP:
if app.tm.hasPending() or app.nm.getClientList(True):
raise ProtocolError("Can not switch to %s state with pending"
raise AnswerDenied("Can not switch to %s state with pending"
" transactions or connected clients" % state)
conn.answer(Errors.Ack('Cluster state changed'))
......@@ -93,21 +109,24 @@ class AdministrationHandler(MasterHandler):
app = self.app
node = app.nm.getByUUID(uuid)
if node is None:
raise ProtocolError('unknown node')
raise AnswerDenied('unknown node')
if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
raise ProtocolError('can not switch node to this state')
raise AnswerDenied('can not switch node to %s state' % state)
if uuid == app.uuid:
raise ProtocolError('can not kill primary master node')
raise AnswerDenied('can not kill primary master node')
state_changed = state != node.getState()
message = ('state changed' if state_changed else
'node already in %s state' % state)
if node.isStorage():
keep = state == NodeStates.DOWN
if node.isRunning() and not keep:
raise AnswerDenied(
"a running node must be stopped before removal")
try:
cell_list = app.pt.dropNodeList([node], keep)
except PartitionTableException, e:
raise ProtocolError(str(e))
raise AnswerDenied(str(e))
node.setState(state)
if node.isConnected():
# notify itself so it can shutdown
......@@ -134,16 +153,17 @@ class AdministrationHandler(MasterHandler):
monotonic_time(), [node.asTuple()]))
app.broadcastNodesInformation([node])
# XXX: Would it be safe to allow more states ?
__change_pt_rpc = check_state(
ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP)
@__change_pt_rpc
def addPendingNodes(self, conn, uuid_list):
uuids = ', '.join(map(uuid_str, uuid_list))
logging.debug('Add nodes %s', uuids)
app = self.app
state = app.getClusterState()
# XXX: Would it be safe to allow more states ?
if state not in (ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP):
raise ProtocolError('Can not add nodes in %s state' % state)
# take all pending nodes
node_list = list(app.pt.addNodeList(node
for node in app.nm.getStorageList()
......@@ -165,31 +185,50 @@ class AdministrationHandler(MasterHandler):
for uuid in uuid_list:
node = getByUUID(uuid)
if node is None or not (node.isStorage() and node.isIdentified()):
raise ProtocolError("invalid storage node %s" % uuid_str(uuid))
raise AnswerDenied("invalid storage node %s" % uuid_str(uuid))
node_list.append(node)
repair = Packets.NotifyRepair(*args)
for node in node_list:
node.send(repair)
conn.answer(Errors.Ack(''))
def tweakPartitionTable(self, conn, uuid_list):
app = self.app
state = app.getClusterState()
# XXX: Would it be safe to allow more states ?
if state not in (ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP):
raise ProtocolError('Can not tweak partition table in %s state'
% state)
app.broadcastPartitionChanges(app.pt.tweak([node
for node in app.nm.getStorageList()
if node.getUUID() in uuid_list or not node.isRunning()]))
@__change_pt_rpc
def setNumReplicas(self, conn, num_replicas):
self.app.broadcastPartitionChanges((), num_replicas)
conn.answer(Errors.Ack(''))
def truncate(self, conn, tid):
@__change_pt_rpc
def tweakPartitionTable(self, conn, dry_run, uuid_list):
app = self.app
if app.cluster_state != ClusterStates.RUNNING:
raise ProtocolError('Can not truncate in this state')
drop_list = []
for node in app.nm.getStorageList():
if node.getUUID() in uuid_list or node.isPending():
drop_list.append(node)
elif not node.isRunning():
drop_list.append(node)
raise AnswerDenied(
'tweak: down nodes must be listed explicitly')
if dry_run:
pt = object.__new__(app.pt.__class__)
new_nodes = pt.load(app.pt.getID(), app.pt.getReplicas(),
app.pt.getRowList(), app.nm)
assert not new_nodes
pt.addNodeList(node
for node, count in app.pt.count_dict.iteritems()
if not count)
else:
pt = app.pt
try:
changed_list = pt.tweak(drop_list)
except PartitionTableException, e:
raise AnswerDenied(str(e))
if not dry_run:
app.broadcastPartitionChanges(changed_list)
conn.answer(Packets.AnswerTweakPartitionTable(
bool(changed_list), pt.getRowList()))
@check_state(ClusterStates.RUNNING)
def truncate(self, conn, tid):
conn.answer(Errors.Ack(''))
raise StoppedOperation(tid)
......@@ -237,3 +276,5 @@ class AdministrationHandler(MasterHandler):
node.send(Packets.CheckPartition(
offset, source, min_tid, max_tid))
conn.answer(Errors.Ack(''))
del __change_pt_rpc
......@@ -17,6 +17,7 @@
from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler
from neo.lib.protocol import ZERO_TID
from neo.lib.pt import PartitionTable
class BackupHandler(EventHandler):
"""Handler dedicated to upstream master during BACKINGUP state"""
......@@ -25,12 +26,15 @@ class BackupHandler(EventHandler):
if self.app.app.listening_conn: # if running
raise PrimaryFailure('connection lost')
def answerPartitionTable(self, conn, ptid, row_list):
self.app.pt.load(ptid, row_list, self.app.nm)
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
app = self.app
pt = app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
if pt.getPartitions() != app.app.pt.getPartitions():
raise RuntimeError("inconsistent number of partitions")
def notifyPartitionChanges(self, conn, ptid, cell_list):
if self.app.pt.filled():
self.app.pt.update(ptid, cell_list, self.app.nm)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def answerLastTransaction(self, conn, tid):
app = self.app
......
......@@ -22,6 +22,10 @@ from . import MasterHandler
class ClientServiceHandler(MasterHandler):
""" Handler dedicated to client during service state """
def handlerSwitched(self, conn, new):
assert new
super(ClientServiceHandler, self).handlerSwitched(conn, new)
def _connectionLost(self, conn):
# cancel its transactions and forgot the node
app = self.app
......
......@@ -17,14 +17,14 @@
from neo.lib import logging
from neo.lib.exception import PrimaryElected
from neo.lib.handler import EventHandler
from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, \
NotReadyError, Packets, ProtocolError, uuid_str
from neo.lib.protocol import CellStates, ClusterStates, NodeStates, \
NodeTypes, NotReadyError, Packets, ProtocolError, uuid_str
from ..app import monotonic_time
class IdentificationHandler(EventHandler):
def requestIdentification(self, conn, node_type, uuid,
address, name, devpath, id_timestamp):
address, name, id_timestamp, devpath, new_nid):
app = self.app
self.checkClusterName(name)
if address == app.server:
......@@ -77,6 +77,16 @@ class IdentificationHandler(EventHandler):
manager = app
state, handler = manager.identifyStorageNode(
uuid is not None and node is not None)
if not address:
if app.cluster_state == ClusterStates.RECOVERING:
raise NotReadyError
if uuid or not new_nid:
raise ProtocolError
state = NodeStates.DOWN
# We'll let the storage node close the connection. If we
# aborted it at the end of the method, BootstrapManager
# (which is used by storage nodes) could see the closure
# and try to reconnect to a master.
human_readable_node_type = ' storage (%s) ' % (state, )
elif node_type == NodeTypes.MASTER:
if app.election:
......@@ -105,24 +115,27 @@ class IdentificationHandler(EventHandler):
node.devpath = tuple(devpath)
node.id_timestamp = monotonic_time()
node.setState(state)
app.broadcastNodesInformation([node])
if new_nid:
changed_list = []
for offset in new_nid:
changed_list.append((offset, uuid, CellStates.OUT_OF_DATE))
app.pt._setCell(offset, node, CellStates.OUT_OF_DATE)
app.broadcastPartitionChanges(changed_list)
conn.setHandler(handler)
node.setConnection(conn, not node.isIdentified())
app.broadcastNodesInformation([node], node)
conn.answer(Packets.AcceptIdentification(
NodeTypes.MASTER,
app.uuid,
app.pt.getPartitions(),
app.pt.getReplicas(),
uuid))
handler._notifyNodeInformation(conn)
handler.connectionCompleted(conn, True)
handler.handlerSwitched(conn, True)
class SecondaryIdentificationHandler(EventHandler):
def requestIdentification(self, conn, node_type, uuid,
address, name, devpath, id_timestamp):
address, name, id_timestamp, devpath, new_nid):
app = self.app
self.checkClusterName(name)
if address == app.server:
......
......@@ -23,6 +23,9 @@ from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, Packets
class SecondaryHandler(MasterHandler):
"""Handler used by primary to handle secondary masters"""
def handlerSwitched(self, conn, new):
pass
def _connectionLost(self, conn):
app = self.app
node = app.nm.getByUUID(conn.getUUID())
......@@ -30,21 +33,20 @@ class SecondaryHandler(MasterHandler):
app.broadcastNodesInformation([node])
class ElectionHandler(MasterHandler):
class ElectionHandler(SecondaryHandler):
"""Handler used by primary to handle secondary masters during election"""
def connectionCompleted(self, conn, new=None):
if new is None:
def connectionCompleted(self, conn):
super(ElectionHandler, self).connectionCompleted(conn)
app = self.app
conn.ask(Packets.RequestIdentification(NodeTypes.MASTER,
app.uuid, app.server, app.name, (), app.election))
app.uuid, app.server, app.name, app.election, (), ()))
def connectionFailed(self, conn):
super(ElectionHandler, self).connectionFailed(conn)
self.connectionLost(conn)
def _acceptIdentification(self, node, *args):
def _acceptIdentification(self, node):
raise PrimaryElected(node)
def _connectionLost(self, *args):
......@@ -66,7 +68,7 @@ class ElectionHandler(MasterHandler):
class PrimaryHandler(ElectionHandler):
"""Handler used by secondaries to handle primary master"""
def _acceptIdentification(self, node, num_partitions, num_replicas):
def _acceptIdentification(self, node):
assert self.app.primary_master is node, (self.app.primary_master, node)
def _connectionLost(self, conn):
......
......@@ -26,10 +26,10 @@ from . import BaseServiceHandler
class StorageServiceHandler(BaseServiceHandler):
""" Handler dedicated to storages during service state """
def connectionCompleted(self, conn, new):
def handlerSwitched(self, conn, new):
app = self.app
if new:
super(StorageServiceHandler, self).connectionCompleted(conn, new)
super(StorageServiceHandler, self).handlerSwitched(conn, new)
node = app.nm.getByUUID(conn.getUUID())
if node.isRunning(): # node may be PENDING
app.startStorage(node)
......
......@@ -56,6 +56,10 @@ class PartitionTable(neo.lib.pt.PartitionTable):
self._id += 1
return self._id
def setReplicas(self, num_replicas):
assert num_replicas >= 0, num_replicas
self.nr = num_replicas
def make(self, node_list):
"""Make a new partition table from scratch."""
assert self._id is None and node_list, (self._id, node_list)
......@@ -108,26 +112,19 @@ class PartitionTable(neo.lib.pt.PartitionTable):
self.num_filled_rows = len(filter(None, self.partition_list))
return change_list
def load(self, ptid, row_list, nm):
def load(self, ptid, num_replicas, row_list, nm):
"""
Load a partition table from a storage node during the recovery.
Return the new storage nodes registered
"""
# check offsets
for offset, _row in row_list:
if offset >= self.getPartitions():
raise IndexError, offset
# store the partition table
self.clear()
self._id = ptid
new_nodes = []
for offset, row in row_list:
for uuid, state in row:
node = nm.getByUUID(uuid)
def getByUUID(nid):
node = nm.getByUUID(nid)
if node is None:
node = nm.createStorage(uuid=uuid)
node = nm.createStorage(uuid=nid)
new_nodes.append(node.asTuple())
self._setCell(offset, node, state)
return node
self._load(ptid, num_replicas, row_list, getByUUID)
return new_nodes
def setUpToDate(self, node, offset):
......@@ -166,15 +163,6 @@ class PartitionTable(neo.lib.pt.PartitionTable):
return cell_list
def addNodeList(self, node_list):
"""Add nodes"""
added_list = []
for node in node_list:
if node not in self.count_dict:
self.count_dict[node] = 0
added_list.append(node)
return added_list
def tweak(self, drop_list=()):
"""Optimize partition table
......@@ -183,7 +171,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
few readable cells, some cells are instead marked as FEEDING. This is
a preliminary step to drop these nodes, otherwise the partition table
could become non-operational.
- Other nodes must have the same number of cells, off by 1.
In fact, the code touching these cells is disabled (see NOTE below).
- Other nodes must have the same number of non-feeding cells, off by 1.
- When a transaction creates new objects (oids are roughly allocated
sequentially), we expect better performance by maximizing the number
of involved nodes (i.e. parallelizing writes).
......@@ -232,6 +221,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
# Collect some data in a usable form for the rest of the method.
node_list = {node: {} for node in self.count_dict
if node not in drop_list}
if not node_list:
raise neo.lib.pt.PartitionTableException("Can't remove all nodes.")
drop_list = defaultdict(list)
for offset, row in enumerate(self.partition_list):
for cell in row:
......@@ -420,6 +411,22 @@ class PartitionTable(neo.lib.pt.PartitionTable):
outdated_list[offset] -= 1
for offset, cell in cell_dict.iteritems():
discard_list[offset].append(cell)
# NOTE: The following line disables the next 2 lines, which actually
# causes cells in drop_list to be discarded, now or later;
# drop_list could be renamed into ignore_list.
# 1. Deleting data partition per partition is a lot of work, so
# why ask nodes in drop_list to do that when the goal is
# simply to trash the whole underlying database?
# 2. By excluding nodes from a tweak, it becomes possible to have
# parts of the partition table that are tweaked differently.
# This may require to temporarily change the number of
# replicas for the part being tweaked. In the future, this
# number may be specified in the 'tweak' command, to avoid
# race conditions with setUpToDate().
# Overall, a common use case is when importing a ZODB to NEO,
# to keep the initial importing node up until the database is
# split and replicated to the final nodes.
drop_list = {}
for offset, drop_list in drop_list.iteritems():
discard_list[offset] += drop_list
# We have sorted cells to discard in order to first deallocate nodes
......
......@@ -28,7 +28,7 @@ class RecoveryManager(MasterHandler):
def __init__(self, app):
# The target node's uuid to request next.
self.target_ptid = None
self.target_ptid = 0
self.ask_pt = []
self.backup_tid_dict = {}
self.truncate_dict = {}
......@@ -52,9 +52,8 @@ class RecoveryManager(MasterHandler):
"""
logging.info('begin the recovery of the status')
app = self.app
pt = app.pt
pt = app.pt = app.newPartitionTable()
app.changeClusterState(ClusterStates.RECOVERING)
pt.clear()
self.try_secondary = True
......@@ -113,7 +112,7 @@ class RecoveryManager(MasterHandler):
for node in node_list:
conn = node.getConnection()
conn.send(truncate)
self.connectionCompleted(conn, False)
self.handlerSwitched(conn, False)
continue
node_list = pt.getConnectedNodeList()
break
......@@ -140,12 +139,12 @@ class RecoveryManager(MasterHandler):
logging.info('creating a new partition table')
pt.make(node_list)
self._notifyAdmins(Packets.SendPartitionTable(
pt.getID(), pt.getRowList()))
pt.getID(), pt.getReplicas(), pt.getRowList()))
else:
cell_list = pt.outdate()
if cell_list:
self._notifyAdmins(Packets.NotifyPartitionChanges(
pt.setNextID(), cell_list))
pt.setNextID(), pt.getReplicas(), cell_list))
if app.backup_tid:
pt.setBackupTidDict(self.backup_tid_dict)
app.backup_tid = pt.getBackupTid()
......@@ -175,16 +174,16 @@ class RecoveryManager(MasterHandler):
if node is None or node.getState() == new_state:
return
node.setState(new_state)
# broadcast to all so that admin nodes gets informed
self.app.broadcastNodesInformation([node])
def connectionCompleted(self, conn, new):
def handlerSwitched(self, conn, new):
# ask the last IDs to perform the recovery
conn.ask(Packets.AskRecovery())
def answerRecovery(self, conn, ptid, backup_tid, truncate_tid):
uuid = conn.getUUID()
if self.target_ptid <= ptid:
# ptid is None if the node has an empty partition table.
if ptid and self.target_ptid <= ptid:
# Maybe a newer partition table.
if self.target_ptid == ptid and self.ask_pt:
# Another node is already asked.
......@@ -197,17 +196,14 @@ class RecoveryManager(MasterHandler):
self.backup_tid_dict[uuid] = backup_tid
self.truncate_dict[uuid] = truncate_tid
def answerPartitionTable(self, conn, ptid, row_list):
def answerPartitionTable(self, conn, ptid, num_replicas, row_list):
# If this is not from a target node, ignore it.
if ptid == self.target_ptid:
app = self.app
try:
new_nodes = app.pt.load(ptid, row_list, app.nm)
except IndexError:
raise ProtocolError('Invalid offset')
new_nodes = app.pt.load(ptid, num_replicas, row_list, app.nm)
self._notifyAdmins(
Packets.NotifyNodeInformation(monotonic_time(), new_nodes),
Packets.SendPartitionTable(ptid, row_list))
Packets.SendPartitionTable(ptid, num_replicas, row_list))
self.ask_pt = ()
uuid = conn.getUUID()
app.backup_tid = self.backup_tid_dict[uuid]
......
......@@ -16,9 +16,11 @@
import sys
from .neoctl import NeoCTL, NotReadyException
from neo.lib.node import NodeManager
from neo.lib.pt import PartitionTable
from neo.lib.util import p64, u64, tidFromTime, timeStringFromTID
from neo.lib.protocol import uuid_str, formatNodeList, \
ClusterStates, NodeTypes, UUID_NAMESPACES, ZERO_TID
ClusterStates, NodeStates, NodeTypes, UUID_NAMESPACES, ZERO_TID
action_dict = {
'print': {
......@@ -30,6 +32,7 @@ action_dict = {
},
'set': {
'cluster': 'setClusterState',
'replicas': 'setNumReplicas',
},
'check': 'checkReplicas',
'start': 'startCluster',
......@@ -46,6 +49,11 @@ uuid_int = (lambda ns: lambda uuid:
(ns[uuid[0]] << 24) + int(uuid[1:])
)({str(k)[0]: v for k, v in UUID_NAMESPACES.iteritems()})
class dummy_app:
id_timestamp = uuid = 0
class TerminalNeoCTL(object):
def __init__(self, *args, **kw):
self.neoctl = NeoCTL(*args, **kw)
......@@ -67,6 +75,15 @@ class TerminalNeoCTL(object):
asNode = staticmethod(uuid_int)
def formatPartitionTable(self, row_list):
nm = NodeManager()
nm.update(dummy_app, 1,
self.neoctl.getNodeList(node_type=NodeTypes.STORAGE))
pt = object.__new__(PartitionTable)
pt._load(None, None, row_list, nm.getByUUID)
pt.addNodeList(nm.getByStateList(NodeStates.RUNNING))
return '\n'.join(line[4:] for line in pt._format())
def formatRowList(self, row_list):
return '\n'.join('%03d |%s' % (offset,
''.join(' %s - %s |' % (uuid_str(uuid), state)
......@@ -105,10 +122,12 @@ class TerminalNeoCTL(object):
max_offset = int(max_offset)
if node is not None:
node = self.asNode(node)
ptid, row_list = self.neoctl.getPartitionRowList(
ptid, num_replicas, row_list = self.neoctl.getPartitionRowList(
min_offset=min_offset, max_offset=max_offset, node=node)
# TODO: return ptid
return self.formatRowList(row_list)
return '# ptid: %s, replicas: %s\n%s' % (ptid, num_replicas,
self.formatRowList(enumerate(row_list, min_offset))
if min_offset or max_offset else
self.formatPartitionTable(row_list))
def getNodeList(self, params):
"""
......@@ -140,6 +159,18 @@ class TerminalNeoCTL(object):
assert len(params) == 1
return self.neoctl.setClusterState(self.asClusterState(params[0]))
def setNumReplicas(self, params):
"""
Set number of replicas.
Parameters: nr
nr: positive number (0 means no redundancy)
"""
assert len(params) == 1
nr = int(params[0])
if nr < 0:
sys.exit('invalid number of replicas')
return self.neoctl.setNumReplicas(nr)
def startCluster(self, params):
"""
Starts cluster operation after a startup.
......@@ -167,10 +198,18 @@ class TerminalNeoCTL(object):
def tweakPartitionTable(self, params):
"""
Optimize partition table.
No partition will be assigned to specified storage nodes.
Parameters: [node [...]]
No change is done to the specified/down storage nodes and they don't
count as replicas. The purpose of listing nodes is usually to drop
them once the data is replicated to other nodes.
Parameters: [-n] [node [...]]
-n: dry run
"""
return self.neoctl.tweakPartitionTable(map(self.asNode, params))
dry_run = params[0] == '-n'
changed, row_list = self.neoctl.tweakPartitionTable(
map(self.asNode, params[dry_run:]), dry_run)
if changed:
return self.formatPartitionTable(row_list)
return 'No change done.'
def killNode(self, params):
"""
......
......@@ -14,6 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
from neo.lib.handler import EventHandler
from neo.lib.protocol import ErrorCodes, Packets
......@@ -44,8 +45,8 @@ class CommandEventHandler(EventHandler):
def ack(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.ACK, msg))
def protocolError(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.PROTOCOL_ERROR, msg))
def denied(self, conn, msg):
sys.exit(msg)
def notReady(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.NOT_READY, msg))
......@@ -62,3 +63,4 @@ class CommandEventHandler(EventHandler):
answerLastIDs = __answer(Packets.AnswerLastIDs)
answerLastTransaction = __answer(Packets.AnswerLastTransaction)
answerRecovery = __answer(Packets.AnswerRecovery)
answerTweakPartitionTable = __answer(Packets.AnswerTweakPartitionTable)
......@@ -91,8 +91,14 @@ class NeoCTL(BaseApplication):
raise RuntimeError(response)
return response[2]
def tweakPartitionTable(self, uuid_list=()):
response = self.__ask(Packets.TweakPartitionTable(uuid_list))
def tweakPartitionTable(self, uuid_list=(), dry_run=False):
response = self.__ask(Packets.TweakPartitionTable(dry_run, uuid_list))
if response[0] != Packets.AnswerTweakPartitionTable:
raise RuntimeError(response)
return response[1:]
def setNumReplicas(self, nr):
response = self.__ask(Packets.SetNumReplicas(nr))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
......@@ -163,7 +169,7 @@ class NeoCTL(BaseApplication):
response = self.__ask(packet)
if response[0] != Packets.AnswerPartitionList:
raise RuntimeError(response)
return response[1:3] # ptid, row_list
return response[1:]
def startCluster(self):
"""
......
......@@ -51,13 +51,11 @@ UNIT_TEST_MODULES = [
'neo.tests.master.testClientHandler',
'neo.tests.master.testMasterApp',
'neo.tests.master.testMasterPT',
'neo.tests.master.testRecovery',
'neo.tests.master.testStorageHandler',
'neo.tests.master.testTransactions',
# storage application
'neo.tests.storage.testClientHandler',
'neo.tests.storage.testMasterHandler',
'neo.tests.storage.testStorageApp',
'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
'neo.tests.storage.testTransactions',
# client application
......
......@@ -63,11 +63,16 @@ class Application(BaseApplication):
help="do not delete data of discarded cells, which is useful for"
" big databases because the current implementation is"
" inefficient (this option should disappear in the future)")
_.bool('new-nid',
help="request a new NID from a cluster that is already"
" operational, update the database with the new NID and exit,"
" which makes easier to quickly set up a replica by copying"
" the database of another node while it was stopped")
_ = parser.group('database creation')
_.int('u', 'uuid',
help="specify an UUID to use for this process. Previously"
" assigned UUID takes precedence (i.e. you should"
_.int('i', 'nid',
help="specify an NID to use for this process. Previously"
" assigned NID takes precedence (i.e. you should"
" always use reset with this switch)")
_('e', 'engine', help="database engine (MySQL only)")
_.bool('dedup',
......@@ -118,9 +123,15 @@ class Application(BaseApplication):
self.loadConfiguration()
self.devpath = self.dm.getTopologyPath()
# force node uuid from command line argument, for testing purpose only
if 'uuid' in config:
self.uuid = config['uuid']
if config.get('new_nid'):
self.new_nid = [x[0] for x in self.dm.iterAssignedCells()]
if not self.new_nid:
sys.exit('database is empty')
self.uuid = None
else:
self.new_nid = ()
if 'nid' in config: # for testing purpose only
self.uuid = config['nid']
logging.node(self.name, self.uuid)
registerLiveDebugger(on_log=self.log)
......@@ -158,36 +169,27 @@ class Application(BaseApplication):
# load configuration
self.uuid = dm.getUUID()
logging.node(self.name, self.uuid)
num_partitions = dm.getNumPartitions()
num_replicas = dm.getNumReplicas()
ptid = dm.getPTID()
# check partition table configuration
if num_partitions is not None and num_replicas is not None:
if num_partitions <= 0:
raise RuntimeError, 'partitions must be more than zero'
# create a partition table
self.pt = PartitionTable(num_partitions, num_replicas)
logging.info('Configuration loaded:')
logging.info('PTID : %s', dump(ptid))
logging.info('PTID : %s', dump(dm.getPTID()))
logging.info('Name : %s', self.name)
logging.info('Partitions: %s', num_partitions)
logging.info('Replicas : %s', num_replicas)
def loadPartitionTable(self):
"""Load a partition table from the database."""
self.pt.clear()
ptid = self.dm.getPTID()
if ptid is None:
self.pt = PartitionTable(0, 0)
return
cell_list = []
row_list = []
for offset, uuid, state in self.dm.getPartitionTable():
while len(row_list) <= offset:
row_list.append([])
# register unknown nodes
if self.nm.getByUUID(uuid) is None:
self.nm.createStorage(uuid=uuid)
cell_list.append((offset, uuid, CellStates[state]))
self.pt.update(ptid, cell_list, self.nm)
row_list[offset].append((uuid, CellStates[state]))
self.pt = object.__new__(PartitionTable)
self.pt.load(ptid, self.dm.getNumReplicas(), row_list, self.nm)
def run(self):
try:
......@@ -247,28 +249,15 @@ class Application(BaseApplication):
Note that I do not accept any connection from non-master nodes
at this stage."""
pt = self.pt
# search, find, connect and identify to the primary master
bootstrap = BootstrapManager(self, NodeTypes.STORAGE, self.server,
self.devpath)
self.master_node, self.master_conn, num_partitions, num_replicas = \
bootstrap.getPrimaryConnection()
bootstrap = BootstrapManager(self, NodeTypes.STORAGE,
None if self.new_nid else self.server,
self.devpath, self.new_nid)
self.master_node, self.master_conn = bootstrap.getPrimaryConnection()
self.dm.setUUID(self.uuid)
# Reload a partition table from the database. This is necessary
# when a previous primary master died while sending a partition
# table, because the table might be incomplete.
if pt is not None:
self.loadPartitionTable()
if num_partitions != pt.getPartitions():
raise RuntimeError('the number of partitions is inconsistent')
if pt is None or pt.getReplicas() != num_replicas:
# changing number of replicas is not an issue
self.dm.setNumPartitions(num_partitions)
self.dm.setNumReplicas(num_replicas)
self.pt = PartitionTable(num_partitions, num_replicas)
# Reload a partition table from the database,
# in case that we're in RECOVERING phase.
self.loadPartitionTable()
def initialize(self):
......
......@@ -51,7 +51,7 @@ class Checker(object):
else:
conn = ClientConnection(app, StorageOperationHandler(app), node)
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
uuid, app.server, name, (), app.id_timestamp))
uuid, app.server, name, app.id_timestamp, (), ()))
self.conn_dict[conn] = node.isIdentified()
conn_set = set(self.conn_dict)
conn_set.discard(None)
......
......@@ -216,7 +216,7 @@ class ZODB(object):
self._connect = _connect
config = section.config
if 'read_only' in config.getSectionAttributes():
has_next_oid = config.read_only = hasattr(self, 'next_oid')
has_next_oid = config.read_only = 'next_oid' in self.__dict__
if not has_next_oid:
import gc
# This will reopen read-only as soon as we know the last oid.
......@@ -378,8 +378,8 @@ class ImporterDatabaseManager(DatabaseManager):
conf = self._conf
db = self.db = buildDatabaseManager(conf['adapter'],
(conf['database'], conf.get('engine'), conf['wait']))
for x in """getConfiguration _setConfiguration setNumPartitions
query erase getPartitionTable _iterAssignedCells
for x in """getConfiguration _setConfiguration _getMaxPartition
query erase getPartitionTable iterAssignedCells
updateCellTID getUnfinishedTIDDict dropUnfinishedData
abortTransaction storeTransaction lockTransaction
loadData storeData getOrphanList _pruneData deferCommit
......@@ -396,9 +396,16 @@ class ImporterDatabaseManager(DatabaseManager):
self._writeback.committed()
self.commit = db.commit = commit
def _updateReadable(self):
def _updateReadable(*_):
raise AssertionError
def setUUID(self, nid):
old_nid = self.getUUID()
if old_nid:
assert old_nid == nid, (old_nid, nid)
else:
self.setConfiguration('nid', str(nid))
def changePartitionTable(self, *args, **kw):
self.db.changePartitionTable(*args, **kw)
if self._writeback:
......@@ -413,7 +420,7 @@ class ImporterDatabaseManager(DatabaseManager):
if self._writeback:
self._writeback.close()
self.db.close()
if isinstance(self.zodb, list): # _setup called
if isinstance(self.zodb, tuple): # _setup called
for zodb in self.zodb:
zodb.close()
......@@ -436,8 +443,12 @@ class ImporterDatabaseManager(DatabaseManager):
self.zodb_ltid = max(x.ltid for x in self.zodb)
zodb = self.zodb[-1]
self.zodb_loid = zodb.shift_oid + zodb.next_oid - 1
self.zodb_tid = self.db.getLastTID(self.zodb_ltid) or 0
if callable(self._import):
self.zodb_tid = self._getMaxPartition() is not None and \
self.db.getLastTID(self.zodb_ltid) or 0
if callable(self._import): # XXX: why ?
if self.zodb_tid == self.zodb_ltid:
self._finished()
else:
self._import = self._import()
def doOperation(self, app):
......@@ -498,12 +509,19 @@ class ImporterDatabaseManager(DatabaseManager):
if process:
process.join()
self.commit()
self._finished()
def _finished(self):
logging.warning("All data are imported. You should change"
" your configuration to use the native backend and restart.")
self._import = None
for x in """getObject getReplicationTIDList getReplicationObjectList
_fetchObject
""".split():
setattr(self, x, getattr(self.db, x))
for zodb in self.zodb:
zodb.close()
self.zodb = None
def _iter_zodb(self, zodb_list):
util.setproctitle('neostorage: import')
......@@ -667,6 +685,9 @@ class ImporterDatabaseManager(DatabaseManager):
length, partition)
return r
def _fetchObject(*_):
raise AssertionError
def getObjectHistory(self, *args, **kw):
raise BackendNotImplemented(self.getObjectHistory)
......@@ -678,6 +699,7 @@ class WriteBack(object):
_changed = False
_process = None
chunk_size = 100
def __init__(self, db, storage):
self._db = db
......@@ -705,7 +727,7 @@ class WriteBack(object):
self._event = Event()
self._idle = Event()
self._stop = Event()
self._np = self._db.getNumPartitions()
self._np = 1 + self._db._getMaxPartition()
self._db = cPickle.dumps(self._db, 2)
self._process = Process(target=self._run)
self._process.daemon = True
......@@ -737,7 +759,6 @@ class WriteBack(object):
def iterator(self):
db = self._db
np = self._np
chunk_size = max(2, 1000 // np)
offset_list = xrange(np)
while 1:
with db:
......@@ -748,23 +769,26 @@ class WriteBack(object):
if np == len(db._readable_set):
while 1:
tid_list = []
loop = False
max_tid = MAX_TID
for offset in offset_list:
x = db.getReplicationTIDList(
self.min_tid, MAX_TID, chunk_size, offset)
self.min_tid, max_tid, self.chunk_size, offset)
tid_list += x
if len(x) == chunk_size:
loop = True
if tid_list:
if len(x) == self.chunk_size:
max_tid = x[-1]
if not tid_list:
break
tid_list.sort()
for tid in tid_list:
if self._stop.is_set():
return
yield TransactionRecord(db, tid)
if tid == max_tid:
break
else:
self.min_tid = util.add64(tid, 1)
if loop:
continue
break
self.min_tid = util.add64(tid, 1)
if not self._event.is_set():
self._idle.set()
self._event.wait()
......
......@@ -102,25 +102,24 @@ class DatabaseManager(object):
finally:
db.close()
_cached_attr_list = (
'_readable_set', '_getPartition', '_getReadablePartition')
def __getattr__(self, attr):
if attr in ('_readable_set', '_getPartition', '_getReadablePartition'):
if attr in self._cached_attr_list:
self._updateReadable()
return self.__getattribute__(attr)
def _partitionTableChanged(self):
try:
del (self._readable_set,
self._getPartition,
self._getReadablePartition)
except AttributeError:
pass
def __enter__(self):
assert not self.LOCK, "not a secondary connection"
# XXX: All config caching should be done in this class,
# rather than in backend classes.
self._config.clear()
self._partitionTableChanged()
try:
for attr in self._cached_attr_list:
delattr(self, attr)
except AttributeError:
pass
def __exit__(self, t, v, tb):
if v is None:
......@@ -180,6 +179,10 @@ class DatabaseManager(object):
def erase(self):
""""""
def restore(self, dump): # for tests
self.erase()
self._restore(dump)
def _setup(self, dedup=False):
"""To be overridden by the backend to set up a database
......@@ -271,6 +274,18 @@ class DatabaseManager(object):
def _setConfiguration(self, key, value):
""""""
def _changePartitionTable(self, cell_list, reset=False):
"""Change a part of a partition table. The list of cells is
a tuple of tuples, each of which consists of an offset (row ID),
the NID of a storage node, and a cell state. If reset is True,
existing data is first thrown away.
"""
def _getPartitionTable(self):
"""Return a whole partition table as a sequence of rows. Each row
is again a tuple of an offset (row ID), the NID of a storage
node, and a cell state."""
def getUUID(self):
"""
Load a NID from a database.
......@@ -279,27 +294,20 @@ class DatabaseManager(object):
if nid is not None:
return int(nid)
@requires(_changePartitionTable, _getPartitionTable)
def setUUID(self, nid):
"""
Store a NID into a database.
"""
old_nid = self.getUUID()
if nid != old_nid:
if old_nid:
self._changePartitionTable((offset, x, tid)
for offset, x, tid in self._getPartitionTable()
if x == old_nid
for x, tid in ((x, None), (nid, tid)))
self.setConfiguration('nid', str(nid))
def getNumPartitions(self):
"""
Load the number of partitions from a database.
"""
n = self.getConfiguration('partitions')
if n is not None:
return int(n)
def setNumPartitions(self, num_partitions):
"""
Store the number of partitions into a database.
"""
self.setConfiguration('partitions', num_partitions)
self._partitionTableChanged()
def getNumReplicas(self):
"""
Load the number of replicas from a database.
......@@ -308,12 +316,6 @@ class DatabaseManager(object):
if n is not None:
return int(n)
def setNumReplicas(self, num_replicas):
"""
Store the number of replicas into a database.
"""
self.setConfiguration('replicas', num_replicas)
def getName(self):
"""
Load a name from a database.
......@@ -374,8 +376,9 @@ class DatabaseManager(object):
tids are in unpacked format.
"""
if self.getNumPartitions():
return max(map(self._getLastTID, self._readable_set))
x = self._readable_set
if x:
return max(self._getLastTID(x, max_tid) for x in x)
def _getLastIDs(self, partition):
"""Return max(tid) & max(oid) for objects of given partition
......@@ -395,7 +398,7 @@ class DatabaseManager(object):
x = self._readable_set
if x:
tid, oid = zip(*map(self._getLastIDs, x))
tid = max(self.getLastTID(None), max(tid))
tid = max(self.getLastTID(), max(tid))
oid = max(oid)
return (None if tid is None else util.p64(tid),
None if oid is None else util.p64(oid))
......@@ -511,13 +514,8 @@ class DatabaseManager(object):
return (util.p64(serial), compression, checksum, data,
None if data_serial is None else util.p64(data_serial))
def _getPartitionTable(self):
"""Return a whole partition table as a sequence of rows. Each row
is again a tuple of an offset (row ID), the NID of a storage
node, and a cell state."""
@requires(_getPartitionTable)
def _iterAssignedCells(self):
def iterAssignedCells(self):
my_nid = self.getUUID()
return ((offset, tid) for offset, nid, tid in self._getPartitionTable()
if my_nid == nid)
......@@ -537,24 +535,19 @@ class DatabaseManager(object):
finally:
readable_set.remove(offset)
def _changePartitionTable(self, cell_list, reset=False):
"""Change a part of a partition table. The list of cells is
a tuple of tuples, each of which consists of an offset (row ID),
the NID of a storage node, and a cell state. If reset is True,
existing data is first thrown away.
def _getDataLastId(self, partition):
"""
"""
def _getDataLastId(self, partition):
def _getMaxPartition(self):
"""
"""
@requires(_getDataLastId)
def _updateReadable(self):
try:
readable_set = self.__dict__['_readable_set']
except KeyError:
@requires(_getDataLastId, _getMaxPartition)
def _updateReadable(self, reset=True):
if reset:
readable_set = self._readable_set = set()
np = self.getNumPartitions()
np = 1 + self._getMaxPartition()
def _getPartition(x, np=np):
return x % np
def _getReadablePartition(x, np=np, r=readable_set):
......@@ -569,14 +562,15 @@ class DatabaseManager(object):
i = self._getDataLastId(p)
d.append(p << 48 if i is None else i + 1)
else:
readable_set = self._readable_set
readable_set.clear()
readable_set.update(x[0] for x in self._iterAssignedCells()
readable_set.update(x[0] for x in self.iterAssignedCells()
if -x[1] in READABLE)
@requires(_changePartitionTable, _getLastIDs, _getLastTID)
def changePartitionTable(self, ptid, cell_list, reset=False):
def changePartitionTable(self, ptid, num_replicas, cell_list, reset=False):
my_nid = self.getUUID()
pt = dict(self._iterAssignedCells())
pt = dict(self.iterAssignedCells())
# In backup mode, the last transactions of a readable cell may be
# incomplete.
backup_tid = self.getBackupTID()
......@@ -595,13 +589,14 @@ class DatabaseManager(object):
outofdate_tid(offset)))
for offset, nid, state in cell_list]
self._changePartitionTable(cell_list, reset)
self._updateReadable()
self._updateReadable(reset)
assert isinstance(ptid, (int, long)), ptid
self._setConfiguration('ptid', str(ptid))
self._setConfiguration('replicas', str(num_replicas))
@requires(_changePartitionTable)
def updateCellTID(self, partition, tid):
t, = (t for p, t in self._iterAssignedCells() if p == partition)
t, = (t for p, t in self.iterAssignedCells() if p == partition)
if t < 0:
return
tid = util.u64(tid)
......@@ -623,7 +618,7 @@ class DatabaseManager(object):
next_tid = util.u64(backup_tid)
if next_tid:
next_tid += 1
for offset, tid in self._iterAssignedCells():
for offset, tid in self.iterAssignedCells():
if tid >= 0: # OUT_OF_DATE
yield offset, p64(tid and tid + 1)
elif -tid in READABLE:
......@@ -865,7 +860,7 @@ class DatabaseManager(object):
assert tid, tid
cell_list = []
my_nid = self.getUUID()
for partition, state in self._iterAssignedCells():
for partition, state in self.iterAssignedCells():
if state > tid:
cell_list.append((partition, my_nid, tid))
self._deleteRange(partition, tid)
......
......@@ -117,7 +117,9 @@ class MySQLDatabaseManager(DatabaseManager):
return super(MySQLDatabaseManager, self).__getattr__(attr)
def _tryConnect(self):
kwd = {'db' : self.db, 'user' : self.user}
kwd = {'db' : self.db}
if self.user:
kwd['user'] = self.user
if self.passwd is not None:
kwd['passwd'] = self.passwd
if self.socket:
......@@ -198,6 +200,7 @@ class MySQLDatabaseManager(DatabaseManager):
self._connect()
def _commit(self):
# XXX: Should we translate OperationalError into MysqlError ?
self.conn.commit()
self._active = 0
......@@ -270,6 +273,12 @@ class MySQLDatabaseManager(DatabaseManager):
" ELSE 1-state"
" END as tid")
# Let's wait for a more important change to clean up,
# so that users can still downgrade.
if 0:
def _migrate4(self, schema_dict):
self._setConfiguration('partitions', None)
def _setup(self, dedup=False):
self._config.clear()
q = self.query
......@@ -295,6 +304,12 @@ class MySQLDatabaseManager(DatabaseManager):
p += """ PARTITION BY LIST (`partition`) (
PARTITION dummy VALUES IN (NULL))"""
if engine == "RocksDB":
cf = lambda name, rev=False: " COMMENT '%scf_neo_%s'" % (
'rev:' if rev else '', name)
else:
cf = lambda *_: ''
# The table "trans" stores information on committed transactions.
schema_dict['trans'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL,
......@@ -305,8 +320,8 @@ class MySQLDatabaseManager(DatabaseManager):
description BLOB NOT NULL,
ext BLOB NOT NULL,
ttid BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`partition`, tid)
) ENGINE=""" + p
PRIMARY KEY (`partition`, tid){}
) ENGINE={}""".format(cf('append_meta'), p)
# The table "obj" stores committed object metadata.
schema_dict['obj'] = """CREATE TABLE %s (
......@@ -315,10 +330,11 @@ class MySQLDatabaseManager(DatabaseManager):
tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (`partition`, oid, tid),
KEY tid (`partition`, tid, oid),
KEY (data_id)
) ENGINE=""" + p
PRIMARY KEY (`partition`, oid, tid){},
KEY tid (`partition`, tid, oid){},
KEY (data_id){}
) ENGINE={}""".format(cf('obj_pk', True),
cf('append_meta'), cf('append_meta'), p)
if engine == "TokuDB":
engine += " compression='tokudb_uncompressed'"
......@@ -326,18 +342,21 @@ class MySQLDatabaseManager(DatabaseManager):
# The table "data" stores object data.
# We'd like to have partial index on 'hash' column (e.g. hash(4))
# but 'UNIQUE' constraint would not work as expected.
schema_dict['data'] = """CREATE TABLE %%s (
id BIGINT UNSIGNED NOT NULL PRIMARY KEY,
schema_dict['data'] = """CREATE TABLE %s (
id BIGINT UNSIGNED NOT NULL,
hash BINARY(20) NOT NULL,
compression TINYINT UNSIGNED NULL,
value MEDIUMBLOB NOT NULL%s
) ENGINE=%s""" % (""",
UNIQUE (hash, compression)""" if dedup else "", engine)
value MEDIUMBLOB NOT NULL,
PRIMARY KEY (id){}{}
) ENGINE={}""".format(cf('append'), """,
UNIQUE (hash, compression)""" + cf('no_comp') if dedup else "",
engine)
schema_dict['bigdata'] = """CREATE TABLE %s (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
value MEDIUMBLOB NOT NULL
) ENGINE=""" + engine
id INT UNSIGNED NOT NULL AUTO_INCREMENT,
value MEDIUMBLOB NOT NULL,
PRIMARY KEY (id){}
) ENGINE={}""".format(cf('append'), p)
# The table "ttrans" stores information on uncommitted transactions.
schema_dict['ttrans'] = """CREATE TABLE %s (
......@@ -348,8 +367,9 @@ class MySQLDatabaseManager(DatabaseManager):
user BLOB NOT NULL,
description BLOB NOT NULL,
ext BLOB NOT NULL,
ttid BIGINT UNSIGNED NOT NULL
) ENGINE=""" + engine
ttid BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (ttid){}
) ENGINE={}""".format(cf('no_comp'), p)
# The table "tobj" stores uncommitted object metadata.
schema_dict['tobj'] = """CREATE TABLE %s (
......@@ -358,8 +378,8 @@ class MySQLDatabaseManager(DatabaseManager):
tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (tid, oid)
) ENGINE=""" + engine
PRIMARY KEY (tid, oid){}
) ENGINE={}""".format(cf('no_comp'), p)
if self.nonempty('config') is None:
q(schema_dict.pop('config') % 'config')
......@@ -407,6 +427,9 @@ class MySQLDatabaseManager(DatabaseManager):
q("ALTER TABLE config MODIFY value VARBINARY(%s) NULL" % len(value))
q(sql)
def _getMaxPartition(self):
return self.query("SELECT MAX(`partition`) FROM pt")[0][0]
def _getPartitionTable(self):
return self.query("SELECT * FROM pt")
......@@ -965,7 +988,7 @@ class MySQLDatabaseManager(DatabaseManager):
cmd += self._cmdline()
return subprocess.check_output(cmd)
def restore(self, sql):
def _restore(self, sql):
import subprocess
cmd = ['mysql']
cmd += self._cmdline()
......
......@@ -79,6 +79,7 @@ class SQLiteDatabaseManager(DatabaseManager):
def _connect(self):
logging.info('connecting to SQLite database %r', self.db)
self.conn = sqlite3.connect(self.db, check_same_thread=False)
self.conn.text_factory = str
self.lock(self.db)
if self.UNSAFE:
q = self.query
......@@ -144,6 +145,12 @@ class SQLiteDatabaseManager(DatabaseManager):
" WHEN 2 THEN -2" # FEEDING
" ELSE 1-state END")
# Let's wait for a more important change to clean up,
# so that users can still downgrade.
if 0:
def _migrate4(self, schema_dict, index_dict):
self._setConfiguration('partitions', None)
def _setup(self, dedup=False):
# BBB: SQLite has transactional DDL but before Python 3.6,
# the binding automatically commits between such statements.
......@@ -265,6 +272,9 @@ class SQLiteDatabaseManager(DatabaseManager):
else:
q("REPLACE INTO config VALUES (?,?)", (key, str(value)))
def _getMaxPartition(self):
return self.query("SELECT MAX(`partition`) FROM pt").next()[0]
def _getPartitionTable(self):
return self.query("SELECT * FROM pt")
......@@ -712,5 +722,5 @@ class SQLiteDatabaseManager(DatabaseManager):
main[-1:-1] = data
return '\n'.join(main) + '\n'
def restore(self, sql):
def _restore(self, sql):
self.conn.executescript(sql)
......@@ -65,14 +65,14 @@ class BaseMasterHandler(BaseHandler):
# See comment in ClientOperationHandler.connectionClosed
self.app.tm.abortFor(uuid, even_if_voted=True)
def notifyPartitionChanges(self, conn, ptid, cell_list):
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
"""This is very similar to Send Partition Table, except that
the information is only about changes from the previous."""
app = self.app
if ptid != 1 + app.pt.getID():
raise ProtocolError('wrong partition table id')
app.pt.update(ptid, cell_list, app.nm)
app.dm.changePartitionTable(ptid, cell_list)
app.pt.update(ptid, num_replicas, cell_list, app.nm)
app.dm.changePartitionTable(ptid, num_replicas, cell_list)
if app.operational:
app.replicator.notifyPartitionChanges(cell_list)
app.dm.commit()
......
......@@ -32,7 +32,7 @@ class IdentificationHandler(EventHandler):
return self.app.nm
def requestIdentification(self, conn, node_type, uuid, address, name,
devpath, id_timestamp):
id_timestamp, devpath, new_nid):
self.checkClusterName(name)
app = self.app
# reject any incoming connections if not ready
......@@ -65,6 +65,6 @@ class IdentificationHandler(EventHandler):
conn.setHandler(handler)
node.setConnection(conn, force)
# accept the identification and trigger an event
conn.answer(Packets.AcceptIdentification(NodeTypes.STORAGE, uuid and
app.uuid, app.pt.getPartitions(), app.pt.getReplicas(), uuid))
conn.answer(Packets.AcceptIdentification(
NodeTypes.STORAGE, uuid and app.uuid, uuid))
handler.connectionCompleted(conn)
......@@ -20,10 +20,10 @@ from neo.lib.protocol import Packets, ProtocolError, ZERO_TID
class InitializationHandler(BaseMasterHandler):
def sendPartitionTable(self, conn, ptid, row_list):
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
app = self.app
pt = app.pt
pt.load(ptid, row_list, app.nm)
pt.load(ptid, num_replicas, row_list, app.nm)
if not pt.filled():
raise ProtocolError('Partial partition table received')
# Install the partition table into the database for persistence.
......@@ -44,7 +44,7 @@ class InitializationHandler(BaseMasterHandler):
logging.debug('drop data for partitions %r', unassigned)
dm.dropPartitions(unassigned)
dm.changePartitionTable(ptid, cell_list, reset=True)
dm.changePartitionTable(ptid, num_replicas, cell_list, reset=True)
dm.commit()
def truncate(self, conn, tid):
......@@ -68,7 +68,8 @@ class InitializationHandler(BaseMasterHandler):
def askPartitionTable(self, conn):
pt = self.app.pt
conn.answer(Packets.AnswerPartitionTable(pt.getID(), pt.getRowList()))
conn.answer(Packets.AnswerPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
def askLockedTransactions(self, conn):
conn.answer(Packets.AnswerLockedTransactions(
......
......@@ -350,7 +350,7 @@ class Replicator(object):
try:
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
None if name else app.uuid, app.server, name or app.name,
(), app.id_timestamp))
app.id_timestamp, (), ()))
except ConnectionClosed:
if previous_node is self.current_node:
return
......
......@@ -98,9 +98,12 @@ class TransactionManager(EventQueue):
self._load_lock_dict = {}
self._replicated = {}
self._replicating = set()
def getPartition(self, oid):
from neo.lib.util import u64
np = app.pt.getPartitions()
np = self._app.pt.getPartitions()
self.getPartition = lambda oid: u64(oid) % np
return self.getPartition(oid)
def discarded(self, offset_list):
self._replicating.difference_update(offset_list)
......
......@@ -21,6 +21,7 @@ import gc
import os
import random
import socket
import subprocess
import sys
import tempfile
import unittest
......@@ -41,7 +42,7 @@ from .mock import Mock
from neo.lib import debug, logging, protocol
from neo.lib.protocol import NodeTypes, Packets, UUID_NAMESPACES
from neo.lib.util import cached_property
from time import time
from time import time, sleep
from struct import pack, unpack
from unittest.case import _ExpectedFailure, _UnexpectedSuccess
try:
......@@ -72,6 +73,9 @@ DB_ADMIN = os.getenv('NEO_DB_ADMIN', 'root')
DB_PASSWD = os.getenv('NEO_DB_PASSWD', '')
DB_USER = os.getenv('NEO_DB_USER', 'test')
DB_SOCKET = os.getenv('NEO_DB_SOCKET', '')
DB_INSTALL = os.getenv('NEO_DB_INSTALL', 'mysql_install_db')
DB_MYSQLD = os.getenv('NEO_DB_MYSQLD', '/usr/sbin/mysqld')
DB_MYCNF = os.getenv('NEO_DB_MYCNF')
IP_VERSION_FORMAT_DICT = {
socket.AF_INET: '127.0.0.1',
......@@ -134,8 +138,12 @@ def getTempDirectory():
print 'Using temp directory %r.' % temp_dir
return temp_dir
def setupMySQLdb(db_list, user=DB_USER, password='', clear_databases=True):
def setupMySQLdb(db_list, clear_databases=True):
if mysql_pool:
return mysql_pool.setup(db_list, clear_databases)
from MySQLdb.constants.ER import BAD_DB_ERROR
user = DB_USER
password = ''
kw = {'unix_socket': os.path.expanduser(DB_SOCKET)} if DB_SOCKET else {}
conn = MySQLdb.connect(user=DB_ADMIN, passwd=DB_PASSWD, **kw)
cursor = conn.cursor()
......@@ -154,6 +162,88 @@ def setupMySQLdb(db_list, user=DB_USER, password='', clear_databases=True):
cursor.close()
conn.commit()
conn.close()
return '{}:{}@%s{}'.format(user, password, DB_SOCKET).__mod__
class MySQLPool(object):
def __init__(self, pool_dir=None):
self._args = {}
self._mysqld_dict = {}
if not pool_dir:
pool_dir = getTempDirectory()
self._base = pool_dir + os.sep
self._sock_template = os.path.join(pool_dir, '%s', 'mysql.sock')
def __del__(self):
self.kill(*self._mysqld_dict)
def setup(self, db_list, clear_databases):
start_list = set(db_list).difference(self._mysqld_dict)
if start_list:
start_list = sorted(start_list)
x = []
with open(os.devnull, 'wb') as f:
for db in start_list:
base = self._base + db
datadir = os.path.join(base, 'datadir')
sock = self._sock_template % db
tmpdir = os.path.join(base, 'tmp')
args = [DB_INSTALL,
'--defaults-file=' + DB_MYCNF,
'--datadir=' + datadir,
'--socket=' + sock,
'--tmpdir=' + tmpdir,
'--log_error=' + os.path.join(base, 'error.log')]
if os.path.exists(datadir):
try:
os.remove(sock)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
os.makedirs(tmpdir)
x.append(subprocess.Popen(args,
stdout=f, stderr=subprocess.STDOUT))
args[0] = DB_MYSQLD
self._args[db] = args
for x in x:
x = x.wait()
if x:
raise subprocess.CalledProcessError(x, DB_INSTALL)
self.start(*start_list)
for db in start_list:
sock = self._sock_template % db
p = self._mysqld_dict[db]
while not os.path.exists(sock):
sleep(1)
x = p.poll()
if x is not None:
raise subprocess.CalledProcessError(x, DB_MYSQLD)
for db in db_list:
db = MySQLdb.connect(unix_socket=self._sock_template % db,
user='root')
if clear_databases:
db.query('DROP DATABASE IF EXISTS neo')
db.query('CREATE DATABASE IF NOT EXISTS neo')
db.close()
return ('root@neo' + self._sock_template).__mod__
def start(self, *db, **kw):
assert set(db).isdisjoint(self._mysqld_dict)
for db in db:
self._mysqld_dict[db] = subprocess.Popen(self._args[db], **kw)
def kill(self, *db):
processes = []
for db in db:
p = self._mysqld_dict.pop(db)
processes.append(p)
p.kill()
for p in processes:
p.wait()
mysql_pool = MySQLPool() if DB_MYCNF else None
def ImporterConfigParser(adapter, zodb, **kw):
cfg = SafeConfigParser()
......@@ -244,13 +334,15 @@ class NeoUnitTestBase(NeoTestBase):
""" create empty databases """
adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL')
if adapter == 'MySQL':
setupMySQLdb([prefix + str(i) for i in xrange(number)])
db_template = setupMySQLdb(
[prefix + str(i) for i in xrange(number)])
self.db_template = lambda i: db_template(prefix + str(i))
elif adapter == 'SQLite':
temp_dir = getTempDirectory()
self.db_template = os.path.join(getTempDirectory(),
prefix + '%s.sqlite').__mod__
for i in xrange(number):
try:
os.remove(os.path.join(temp_dir,
'%s%s.sqlite' % (prefix, i)))
os.remove(self.db_template(i))
except OSError, e:
if e.errno != errno.ENOENT:
raise
......@@ -274,21 +366,14 @@ class NeoUnitTestBase(NeoTestBase):
def getStorageConfiguration(self, cluster='main', master_number=2,
index=0, prefix=DB_PREFIX, uuid=None):
assert master_number >= 1 and master_number <= 10
assert index >= 0 and index <= 9
masters = [(buildUrlFromString(self.local_ip),
10010 + i) for i in xrange(master_number)]
adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL')
if adapter == 'MySQL':
db = '%s@%s%s%s' % (DB_USER, prefix, index, DB_SOCKET)
elif adapter == 'SQLite':
db = os.path.join(getTempDirectory(), 'test_neo%s.sqlite' % index)
else:
assert False, adapter
return {
'cluster': cluster,
'bind': (masters[0], 10020 + index),
'masters': masters,
'database': db,
'database': self.db_template(index),
'uuid': uuid,
'adapter': adapter,
'wait': 0,
......
......@@ -36,7 +36,7 @@ from neo.lib import logging
from neo.lib.protocol import ClusterStates, NodeTypes, CellStates, NodeStates, \
UUID_NAMESPACES
from neo.lib.util import dump, setproctitle
from .. import (ADDRESS_TYPE, DB_SOCKET, DB_USER, IP_VERSION_FORMAT_DICT, SSL,
from .. import (ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, SSL,
buildUrlFromString, cluster, getTempDirectory, setupMySQLdb,
ImporterConfigParser, NeoTestBase, Patch)
from neo.client.Storage import Storage
......@@ -282,7 +282,7 @@ class NEOProcess(Process):
def _args(self):
args = super(NEOProcess, self)._args()
if self.uuid:
args[:0] = '--uuid', str(self.uuid)
args[:0] = '--nid', str(self.uuid)
return args
def run(self):
......@@ -306,11 +306,11 @@ class NEOCluster(object):
SSL = None
def __init__(self, db_list, master_count=1, partitions=1, replicas=0,
db_user=DB_USER, db_password='', name=None,
name=None,
cleanup_on_delete=False, temp_dir=None, clear_databases=True,
adapter=os.getenv('NEO_TESTS_ADAPTER'),
address_type=ADDRESS_TYPE, bind_ip=None, logger=True,
importer=None):
importer=None, storage_kw={}):
if not adapter:
adapter = 'MySQL'
self.adapter = adapter
......@@ -322,20 +322,28 @@ class NEOCluster(object):
temp_dir = tempfile.mkdtemp(prefix='neo_')
print 'Using temp directory ' + temp_dir
if adapter == 'MySQL':
self.db_user = db_user
self.db_password = db_password
self.db_template = ('%s:%s@%%s%s' % (db_user, db_password,
DB_SOCKET)).__mod__
self.db_template = setupMySQLdb(db_list, clear_databases)
elif adapter == 'SQLite':
self.db_template = (lambda t: lambda db:
':memory:' if db is None else db if os.sep in db else t % db
)(os.path.join(temp_dir, '%s.sqlite'))
if clear_databases:
for db in self.db_list:
if db is None:
continue
db = self.db_template(db)
try:
os.remove(db)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
logging.debug('%r deleted', db)
else:
assert False, adapter
self.address_type = address_type
self.local_ip = local_ip = bind_ip or \
IP_VERSION_FORMAT_DICT[self.address_type]
self.setupDB(clear_databases)
if importer:
cfg = ImporterConfigParser(adapter, **importer)
cfg.set("neo", "database", self.db_template(*db_list))
......@@ -364,7 +372,8 @@ class NEOCluster(object):
# create storage nodes
for i, db in enumerate(db_list):
self._newProcess(NodeTypes.STORAGE, logger and 'storage_%u' % i,
0, adapter=adapter, database=self.db_template(db))
0, adapter=adapter, database=self.db_template(db),
**storage_kw)
# create neoctl
self.neoctl = NeoCTL((self.local_ip, admin_port), ssl=self.SSL)
......@@ -382,23 +391,10 @@ class NEOCluster(object):
self.process_dict.setdefault(node_type, []).append(
NEOProcess(command_dict[node_type], uuid=uuid, **kw))
def setupDB(self, clear_databases=True):
if self.adapter == 'MySQL':
setupMySQLdb(self.db_list, self.db_user, self.db_password,
clear_databases)
elif self.adapter == 'SQLite':
if clear_databases:
def resetDB(self):
for db in self.db_list:
if db is None:
continue
db = self.db_template(db)
try:
os.remove(db)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
logging.debug('%r deleted', db)
dm = buildDatabaseManager(self.adapter, (self.db_template(db),))
dm.setup(True)
def run(self, except_storages=()):
""" Start cluster processes except some storage nodes """
......@@ -437,7 +433,7 @@ class NEOCluster(object):
pending_count += 1
if pending_count == target[0]:
neoctl.startCluster()
except (NotReadyException, RuntimeError):
except (NotReadyException, SystemExit):
pass
if not pdb.wait(test, MAX_START_TIME):
raise AssertionError('Timeout when starting cluster')
......@@ -449,7 +445,7 @@ class NEOCluster(object):
def start(last_try):
try:
self.neoctl.startCluster()
except (NotReadyException, RuntimeError), e:
except (NotReadyException, SystemExit), e:
return False, e
return True, None
self.expectCondition(start)
......@@ -653,10 +649,10 @@ class NEOCluster(object):
def expectOudatedCells(self, number, *args, **kw):
def callback(last_try):
row_list = self.neoctl.getPartitionRowList()[1]
row_list = self.neoctl.getPartitionRowList()[2]
number_of_outdated = 0
for row in row_list:
for cell in row[1]:
for cell in row:
if cell[1] == CellStates.OUT_OF_DATE:
number_of_outdated += 1
return number_of_outdated == number, number_of_outdated
......@@ -664,10 +660,10 @@ class NEOCluster(object):
def expectAssignedCells(self, process, number, *args, **kw):
def callback(last_try):
row_list = self.neoctl.getPartitionRowList()[1]
row_list = self.neoctl.getPartitionRowList()[2]
assigned_cells_number = 0
for row in row_list:
for cell in row[1]:
for cell in row:
if cell[0] == process.getUUID():
assigned_cells_number += 1
return assigned_cells_number == number, assigned_cells_number
......
......@@ -62,8 +62,6 @@ class ClientTests(NEOFunctionalTest):
NEOFunctionalTest._tearDown(self, success)
def __setup(self):
# start cluster
self.neo.setupDB()
self.neo.start()
self.neo.expectClusterRunning()
self.db = ZODB.DB(self.neo.getZODBStorage())
......
......@@ -71,7 +71,6 @@ class ClusterTests(NEOFunctionalTest):
def testClusterBreaks(self):
self.neo = NEOCluster(['test_neo1'],
master_count=1, temp_dir=self.getTempDirectory())
self.neo.setupDB()
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
......@@ -82,7 +81,6 @@ class ClusterTests(NEOFunctionalTest):
self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=2, master_count=1, replicas=0,
temp_dir=self.getTempDirectory())
self.neo.setupDB()
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
......@@ -93,7 +91,6 @@ class ClusterTests(NEOFunctionalTest):
self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=2, replicas=1, master_count=1,
temp_dir=self.getTempDirectory())
self.neo.setupDB()
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
......
......@@ -47,7 +47,7 @@ class MasterTests(NEOFunctionalTest):
break
neoctl.killNode(uuid)
self.neo.expectDead(master)
self.assertRaises(RuntimeError, neoctl.killNode, primary_uuid)
self.assertRaises(SystemExit, neoctl.killNode, primary_uuid)
def testStoppingPrimaryWithTwoSecondaries(self):
# Wait for masters to stabilize
......
......@@ -172,7 +172,7 @@ class StorageTests(NEOFunctionalTest):
self.neo.expectOudatedCells(2)
self.neo.expectClusterRunning()
self.assertRaises(RuntimeError, self.neo.neoctl.killNode,
self.assertRaises(SystemExit, self.neo.neoctl.killNode,
started[1].getUUID())
started[1].stop()
# Cluster not operational anymore. Only cells of second storage that
......@@ -323,7 +323,7 @@ class StorageTests(NEOFunctionalTest):
self.neo.expectStorageUnknown(started[0])
self.neo.expectAssignedCells(started[0], 0)
self.neo.expectAssignedCells(started[1], 10)
self.assertRaises(RuntimeError, self.neo.neoctl.dropNode,
self.assertRaises(SystemExit, self.neo.neoctl.dropNode,
started[1].getUUID())
self.neo.expectClusterRunning()
......
......@@ -30,8 +30,6 @@ class MasterClientHandlerTests(NeoUnitTestBase):
config = self.getMasterConfiguration(master_number=1, replicas=1)
self.app = Application(config)
self.app.em.close()
self.app.pt.clear()
self.app.pt.setID(1)
self.app.em = Mock()
self.app.loid = '\0' * 8
self.app.tm.setLastTID('\0' * 8)
......
......@@ -26,7 +26,6 @@ class MasterAppTests(NeoUnitTestBase):
# create an application object
config = self.getMasterConfiguration()
self.app = Application(config)
self.app.pt.clear()
def _tearDown(self, success):
self.app.close()
......
......@@ -289,7 +289,9 @@ class MasterPartitionTableTests(NeoUnitTestBase):
pt.addNodeList(sn[1:3])
self.assertPartitionTable(pt, 'U..|U..|U..|U..|U..|U..|U..')
self.update(pt, self.tweak(pt, sn[:1]))
self.assertPartitionTable(pt, '.U.|..U|.U.|..U|.U.|..U|.U.')
# See note in PartitionTable.tweak() about drop_list.
#self.assertPartitionTable(pt,'.U.|..U|.U.|..U|.U.|..U|.U.')
self.assertPartitionTable(pt, 'UU.|U.U|UU.|U.U|UU.|U.U|UU.')
def test_18_tweakBigPT(self):
seed = repr(time.time())
......
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, NodeStates, CellStates
from neo.master.recovery import RecoveryManager
from neo.master.app import Application
class MasterRecoveryTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
# create an application object
config = self.getMasterConfiguration()
self.app = Application(config)
self.app.pt.clear()
self.recovery = RecoveryManager(self.app)
self.app.unconnected_master_node_set = set()
self.app.negotiating_master_node_set = set()
for node in self.app.nm.getMasterList():
self.app.unconnected_master_node_set.add(node.getAddress())
node.setState(NodeStates.RUNNING)
# define some variable to simulate client and storage node
self.storage_port = 10021
self.master_port = 10011
def _tearDown(self, success):
self.app.close()
NeoUnitTestBase._tearDown(self, success)
# Common methods
def identifyToMasterNode(self, node_type=NodeTypes.STORAGE, ip="127.0.0.1",
port=10021):
"""Do first step of identification to MN
"""
address = (ip, port)
uuid = self.getNewUUID(node_type)
self.app.nm.createFromNodeType(node_type, address=address, uuid=uuid,
state=NodeStates.RUNNING)
return uuid
# Tests
def test_10_answerPartitionTable(self):
# XXX: This test does much less that it seems, because all 'for' loops
# iterate over empty lists. Currently, only testRecovery covers
# some paths in NodeManager._createNode: apart from that, we could
# delete it entirely.
recovery = self.recovery
uuid = self.identifyToMasterNode(NodeTypes.MASTER, port=self.master_port)
# not from target node, ignore
uuid = self.identifyToMasterNode(NodeTypes.STORAGE, port=self.storage_port)
conn = self.getFakeConnection(uuid, self.storage_port)
node = self.app.nm.getByUUID(conn.getUUID())
offset = 1
cell_list = [(offset, uuid, CellStates.UP_TO_DATE)]
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.OUT_OF_DATE)
recovery.target_ptid = 2
node.setPending()
recovery.answerPartitionTable(conn, 1, cell_list)
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.OUT_OF_DATE)
# from target node, taken into account
conn = self.getFakeConnection(uuid, self.storage_port)
offset = 1
cell_list = [(offset, ((uuid, CellStates.UP_TO_DATE,),),)]
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.OUT_OF_DATE)
node.setPending()
recovery.answerPartitionTable(conn, None, cell_list)
cells = self.app.pt.getRow(offset)
for cell, state in cells:
self.assertEqual(state, CellStates.UP_TO_DATE)
# give a bad offset, must send error
self.recovery.target_uuid = uuid
conn = self.getFakeConnection(uuid, self.storage_port)
offset = 1000000
self.assertFalse(self.app.pt.hasOffset(offset))
cell_list = [(offset, ((uuid, NodeStates.UNKNOWN,),),)]
node.setPending()
self.checkProtocolErrorRaised(recovery.answerPartitionTable, conn,
2, cell_list)
if __name__ == '__main__':
unittest.main()
......@@ -18,8 +18,8 @@ import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, Packets
from neo.master.handlers.storage import StorageServiceHandler
from neo.master.app import Application
from neo.master.handlers.storage import StorageServiceHandler
class MasterStorageHandlerTests(NeoUnitTestBase):
......@@ -29,7 +29,6 @@ class MasterStorageHandlerTests(NeoUnitTestBase):
config = self.getMasterConfiguration(master_number=1, replicas=1)
self.app = Application(config)
self.app.em.close()
self.app.pt.clear()
self.app.em = Mock()
self.service = StorageServiceHandler(self.app)
......
......@@ -56,7 +56,7 @@ class StorageMasterHandlerTests(NeoUnitTestBase):
self.app.pt = Mock({'getID': 1})
count = len(self.app.nm.getList())
self.assertRaises(ProtocolError, self.operation.notifyPartitionChanges,
conn, 0, ())
conn, 0, 0, ())
self.assertEqual(self.app.pt.getID(), 1)
self.assertEqual(len(self.app.nm.getList()), count)
calls = self.app.replicator.mockGetNamedCalls('removePartition')
......@@ -84,13 +84,13 @@ class StorageMasterHandlerTests(NeoUnitTestBase):
ptid = 2
app.dm = Mock({ })
app.replicator = Mock({})
self.operation.notifyPartitionChanges(conn, ptid, cells)
self.operation.notifyPartitionChanges(conn, ptid, 1, cells)
# ptid set
self.assertEqual(app.pt.getID(), ptid)
# dm call
calls = self.app.dm.mockGetNamedCalls('changePartitionTable')
self.assertEqual(len(calls), 1)
calls[0].checkArgs(ptid, cells)
calls[0].checkArgs(ptid, 1, cells)
if __name__ == "__main__":
unittest.main()
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.storage.app import Application
from neo.lib.protocol import CellStates
from neo.lib.pt import PartitionTable
class StorageAppTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
self.prepareDatabase(number=1)
# create an application object
config = self.getStorageConfiguration(master_number=1)
self.app = Application(config)
def _tearDown(self, success):
self.app.close()
del self.app
super(StorageAppTests, self)._tearDown(success)
def test_01_loadPartitionTable(self):
self.app.dm = Mock({
'getPartitionTable': [],
})
self.assertEqual(self.app.pt, None)
num_partitions = 3
num_replicas = 2
self.app.pt = PartitionTable(num_partitions, num_replicas)
self.assertFalse(self.app.pt.getNodeSet())
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertFalse(self.app.pt.hasOffset(x))
# load an empty table
self.app.loadPartitionTable()
self.assertFalse(self.app.pt.getNodeSet())
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertFalse(self.app.pt.hasOffset(x))
# add some node, will be remove when loading table
master_uuid = self.getMasterUUID()
master = self.app.nm.createMaster(uuid=master_uuid)
storage_uuid = self.getStorageUUID()
storage = self.app.nm.createStorage(uuid=storage_uuid)
client_uuid = self.getClientUUID()
self.app.pt._setCell(0, master, CellStates.UP_TO_DATE)
self.app.pt._setCell(0, storage, CellStates.UP_TO_DATE)
self.assertEqual(len(self.app.pt.getNodeSet()), 2)
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
if x == 0:
self.assertTrue(self.app.pt.hasOffset(x))
else:
self.assertFalse(self.app.pt.hasOffset(x))
# load an empty table, everything removed
self.app.loadPartitionTable()
self.assertFalse(self.app.pt.getNodeSet())
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertFalse(self.app.pt.hasOffset(x))
# add some node
self.app.pt._setCell(0, master, CellStates.UP_TO_DATE)
self.app.pt._setCell(0, storage, CellStates.UP_TO_DATE)
self.assertEqual(len(self.app.pt.getNodeSet()), 2)
self.assertFalse(self.app.pt.filled())
for x in xrange(num_partitions):
if x == 0:
self.assertTrue(self.app.pt.hasOffset(x))
else:
self.assertFalse(self.app.pt.hasOffset(x))
# fill partition table
self.app.dm = Mock({
'getPartitionTable': [
(0, client_uuid, CellStates.UP_TO_DATE),
(1, client_uuid, CellStates.UP_TO_DATE),
(1, storage_uuid, CellStates.UP_TO_DATE),
(2, storage_uuid, CellStates.UP_TO_DATE),
(2, master_uuid, CellStates.UP_TO_DATE),
],
'getPTID': 1,
})
self.app.pt.clear()
self.app.loadPartitionTable()
self.assertTrue(self.app.pt.filled())
for x in xrange(num_partitions):
self.assertTrue(self.app.pt.hasOffset(x))
# check each row
cell_list = self.app.pt.getCellList(0)
self.assertEqual(len(cell_list), 1)
self.assertEqual(cell_list[0].getUUID(), client_uuid)
cell_list = self.app.pt.getCellList(1)
self.assertEqual(len(cell_list), 2)
self.assertTrue(cell_list[0].getUUID() in (client_uuid, storage_uuid))
self.assertTrue(cell_list[1].getUUID() in (client_uuid, storage_uuid))
cell_list = self.app.pt.getCellList(2)
self.assertEqual(len(cell_list), 2)
self.assertTrue(cell_list[0].getUUID() in (master_uuid, storage_uuid))
self.assertTrue(cell_list[1].getUUID() in (master_uuid, storage_uuid))
if __name__ == '__main__':
unittest.main()
......@@ -48,30 +48,15 @@ class StorageDBTests(NeoUnitTestBase):
raise NotImplementedError
def setNumPartitions(self, num_partitions, reset=0):
try:
db = self._db
except AttributeError:
assert not hasattr(self, '_db')
self._db = db = self.getDB(reset)
else:
if reset:
db.setup(reset)
else:
try:
n = db.getNumPartitions()
except KeyError:
n = 0
if num_partitions == n:
return
if num_partitions < n:
db.dropPartitions(n)
db.setNumPartitions(num_partitions)
self.assertEqual(num_partitions, db.getNumPartitions())
uuid = self.getStorageUUID()
db.setUUID(uuid)
self.assertEqual(uuid, db.getUUID())
db.changePartitionTable(1,
db.changePartitionTable(1, 0,
[(i, uuid, CellStates.UP_TO_DATE) for i in xrange(num_partitions)],
reset=True)
self.assertEqual(num_partitions, 1 + db._getMaxPartition())
db.commit()
def checkConfigEntry(self, get_call, set_call, value):
......@@ -102,16 +87,6 @@ class StorageDBTests(NeoUnitTestBase):
db = self.getDB()
self.checkConfigEntry(db.getName, db.setName, 'TEST_NAME')
def test_getPartitionTable(self):
db = self.getDB()
db.setNumPartitions(3)
uuid1, uuid2 = self.getStorageUUID(), self.getStorageUUID()
cell1 = (0, uuid1, CellStates.OUT_OF_DATE)
cell2 = (1, uuid1, CellStates.UP_TO_DATE)
db.changePartitionTable(1, [cell1, cell2], 1)
result = db.getPartitionTable()
self.assertEqual(set(result), {cell1, cell2})
def getOIDs(self, count):
return map(p64, xrange(count))
......@@ -202,52 +177,6 @@ class StorageDBTests(NeoUnitTestBase):
self.assertEqual(self.db.getObject(oid1, before_tid=tid2),
OBJECT_T1_NEXT)
def test_setPartitionTable(self):
db = self.getDB()
db.setNumPartitions(3)
ptid = 1
uuid = self.getStorageUUID()
cell1 = 0, uuid, CellStates.OUT_OF_DATE
cell2 = 1, uuid, CellStates.UP_TO_DATE
cell3 = 1, uuid, CellStates.DISCARDED
# no partition table
self.assertEqual(list(db.getPartitionTable()), [])
# set one
db.changePartitionTable(ptid, [cell1], 1)
result = db.getPartitionTable()
self.assertEqual(list(result), [cell1])
# then another
db.changePartitionTable(ptid, [cell2], 1)
result = db.getPartitionTable()
self.assertEqual(list(result), [cell2])
# drop discarded cells
db.changePartitionTable(ptid, [cell2, cell3], 1)
result = db.getPartitionTable()
self.assertEqual(list(result), [])
def test_changePartitionTable(self):
db = self.getDB()
db.setNumPartitions(3)
ptid = 1
uuid = self.getStorageUUID()
cell1 = 0, uuid, CellStates.OUT_OF_DATE
cell2 = 1, uuid, CellStates.UP_TO_DATE
cell3 = 1, uuid, CellStates.DISCARDED
# no partition table
self.assertEqual(list(db.getPartitionTable()), [])
# set one
db.changePartitionTable(ptid, [cell1])
result = db.getPartitionTable()
self.assertEqual(list(result), [cell1])
# add more entries
db.changePartitionTable(ptid, [cell2])
result = db.getPartitionTable()
self.assertEqual(set(result), {cell1, cell2})
# drop discarded cells
db.changePartitionTable(ptid, [cell2, cell3])
result = db.getPartitionTable()
self.assertEqual(list(result), [cell1])
def test_commitTransaction(self):
oid1, oid2 = self.getOIDs(2)
tid1, tid2 = self.getTIDs(2)
......
......@@ -22,7 +22,7 @@ from MySQLdb.constants.ER import UNKNOWN_STORAGE_ENGINE
from ..mock import Mock
from neo.lib.protocol import ZERO_OID
from neo.lib.util import p64
from .. import DB_PREFIX, DB_SOCKET, DB_USER, Patch
from .. import DB_PREFIX, DB_USER, Patch, setupMySQLdb
from .testStorageDBTests import StorageDBTests
from neo.storage.database import DatabaseFailure
from neo.storage.database.mysqldb import MySQLDatabaseManager
......@@ -46,8 +46,8 @@ class StorageMySQLdbTests(StorageDBTests):
engine = None
def _test_lockDatabase_open(self):
self.prepareDatabase(number=1, prefix=DB_PREFIX)
database = '%s@%s0%s' % (DB_USER, DB_PREFIX, DB_SOCKET)
self.prepareDatabase(1)
database = self.db_template(0)
return MySQLDatabaseManager(database, self.engine)
def getDB(self, reset=0):
......
......@@ -19,12 +19,9 @@ class Handler(MasterEventHandler):
super(Handler, self).answerClusterState(conn, state)
self.app.refresh('state')
def answerPartitionTable(self, *args):
super(Handler, self).answerPartitionTable(*args)
self.app.refresh('pt')
def sendPartitionTable(self, *args):
raise AssertionError
super(Handler, self).sendPartitionTable(*args)
self.app.refresh('pt')
def notifyPartitionChanges(self, *args):
super(Handler, self).notifyPartitionChanges(*args)
......@@ -50,6 +47,7 @@ class StressApplication(AdminApplication):
cluster_state = server = uuid = None
listening_conn = True
fault_probability = 1
restart_ratio = float('inf') # no firewall support
_stress = False
......@@ -191,7 +189,7 @@ class StressApplication(AdminApplication):
self.loid = loid
self.ltid = ltid
self.em.setTimeout(int(time.time() + 1), self.askLastIDs)
if self._stress:
if self._stress and random.random() < self.fault_probability:
node_list = self.nm.getStorageList()
random.shuffle(node_list)
fw = []
......
......@@ -40,7 +40,7 @@ from neo.lib.util import cached_property, parseMasterList, p64
from neo.master.recovery import RecoveryManager
from .. import (getTempDirectory, setupMySQLdb,
ImporterConfigParser, NeoTestBase, Patch,
ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, DB_PREFIX, DB_SOCKET, DB_USER)
ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, DB_PREFIX)
BIND = IP_VERSION_FORMAT_DICT[ADDRESS_TYPE], 0
LOCAL_IP = socket.inet_pton(ADDRESS_TYPE, IP_VERSION_FORMAT_DICT[ADDRESS_TYPE])
......@@ -304,7 +304,13 @@ class TestSerialized(Serialized):
class Node(object):
def getConnectionList(self, *peers):
addr = lambda c: c and (c.addr if c.is_server else c.getAddress())
def addr(c):
# Do not identify only by source address because 2 TCP connections
# can have same source host:port to different destinations.
if c:
a = c.addr
b = c.getAddress()
return (b, a) if c.is_server else (ServerNode.resolv(a), b)
addr_set = {addr(c.connector) for peer in peers
for c in peer.em.connection_dict.itervalues()
if isinstance(c, Connection)}
......@@ -377,7 +383,10 @@ class ServerNode(Node):
assert not self.is_alive()
init_args = self._init_args
init_args['reset'] = False
assert set(kw).issubset(init_args), (kw, init_args)
if __debug__:
x = set(kw).difference(init_args)
assert not x or x.issubset(self.option_parser.getOptionDict()), (
kw, init_args)
init_args.update(kw)
self.close()
self.__init__(**init_args)
......@@ -708,7 +717,7 @@ class NEOCluster(object):
def __init__(self, master_count=1, partitions=1, replicas=0, upstream=None,
adapter=os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
storage_count=None, db_list=None, clear_databases=True,
db_user=DB_USER, db_password='', compress=True,
compress=True,
importer=None, autostart=None, dedup=False, name=None):
self.name = name or 'neo_%s' % self._allocate('name',
lambda: random.randint(0, 100))
......@@ -735,21 +744,20 @@ class NEOCluster(object):
db_list = ['%s%u' % (DB_PREFIX, self._allocate('db', index))
for _ in xrange(storage_count)]
if adapter == 'MySQL':
setupMySQLdb(db_list, db_user, db_password, clear_databases)
db = '%s:%s@%%s%s' % (db_user, db_password, DB_SOCKET)
db = setupMySQLdb(db_list, clear_databases)
elif adapter == 'SQLite':
db = os.path.join(getTempDirectory(), '%s.sqlite')
db = os.path.join(getTempDirectory(), '%s.sqlite').__mod__
else:
assert False, adapter
if importer:
cfg = ImporterConfigParser(adapter, **importer)
cfg.set("neo", "database", db % tuple(db_list))
db = os.path.join(getTempDirectory(), '%s.conf')
with open(db % tuple(db_list), "w") as f:
cfg.set("neo", "database", db(*db_list))
db = os.path.join(getTempDirectory(), '%s.conf').__mod__
with open(db(*db_list), "w") as f:
cfg.write(f)
kw["adapter"] = "Importer"
kw['wait'] = 0
self.storage_list = [StorageApplication(database=db % x, **kw)
self.storage_list = [StorageApplication(database=db(x), **kw)
for x in db_list]
self.admin_list = [AdminApplication(**kw)]
......@@ -805,7 +813,7 @@ class NEOCluster(object):
master_list = self.master_list
if storage_list is None:
storage_list = self.storage_list
def answerPartitionTable(release, orig, *args):
def sendPartitionTable(release, orig, *args):
orig(*args)
release()
def dispatch(release, orig, handler, *args):
......@@ -821,7 +829,7 @@ class NEOCluster(object):
if state in expected_state:
release()
with Serialized.until(MasterEventHandler,
answerPartitionTable=answerPartitionTable) as tic1, \
sendPartitionTable=sendPartitionTable) as tic1, \
Serialized.until(RecoveryManager, dispatch=dispatch) as tic2, \
Serialized.until(MasterEventHandler,
notifyClusterInformation=notifyClusterInformation) as tic3:
......@@ -846,9 +854,13 @@ class NEOCluster(object):
expected_state = (NodeStates.PENDING
if state == ClusterStates.RECOVERING
else NodeStates.RUNNING)
for node in self.storage_list if storage_list is None else storage_list:
for node, expected_state in (
storage_list if isinstance(storage_list, dict) else
dict.fromkeys(self.storage_list if storage_list is None else
storage_list, expected_state)
).iteritems():
state = self.getNodeState(node)
assert state == expected_state, (repr(node), state)
assert state == expected_state, (repr(node), state, expected_state)
def stop(self, clear_database=False, __print_exc=traceback.print_exc, **kw):
if self.started:
......@@ -922,7 +934,7 @@ class NEOCluster(object):
def startCluster(self):
try:
self.neoctl.startCluster()
except RuntimeError:
except SystemExit:
Serialized.tic()
if self.neoctl.getClusterState() not in (
ClusterStates.BACKINGUP,
......@@ -1001,18 +1013,18 @@ class NEOCluster(object):
"""Sort storages so that storage_list[i] has partition i for all i"""
pt = [{x.getUUID() for x in x}
for x in self.primary_master.pt.partition_list]
n = len(self.storage_list)
r = []
x = [iter(pt[0])]
try:
while 1:
try:
r.append(next(x[-1]))
except StopIteration:
del r[-1], x[-1]
else:
if len(r) == n:
break
x.append(iter(pt[len(r)].difference(r)))
except IndexError:
assert len(r) == len(self.storage_list)
x = {x.uuid: x for x in self.storage_list}
self.storage_list[:] = (x[r] for r in r)
return self.storage_list
......
......@@ -42,6 +42,7 @@ from neo.lib.util import add64, makeChecksum, p64, u64
from neo.client.exception import NEOPrimaryMasterLost, NEOStorageError
from neo.client.transactions import Transaction
from neo.master.handlers.client import ClientServiceHandler
from neo.master.pt import PartitionTable
from neo.storage.database import DatabaseFailure
from neo.storage.handlers.client import ClientOperationHandler
from neo.storage.handlers.identification import IdentificationHandler
......@@ -471,6 +472,7 @@ class Test(NEOThreadedTest):
self.assertFalse(conn.isClosed())
getCellSortKey = cluster.client.getCellSortKey
self.assertEqual(getCellSortKey(s0, good), 0)
cluster.neoctl.killNode(s0.getUUID())
cluster.neoctl.dropNode(s0.getUUID())
self.assertEqual([s1], cluster.client.nm.getStorageList())
self.assertTrue(conn.isClosed())
......@@ -776,6 +778,7 @@ class Test(NEOThreadedTest):
checkNodeState(NodeStates.RUNNING)
self.assertEqual([], cluster.getOutdatedCells())
# drop one
cluster.neoctl.killNode(s1.uuid)
cluster.neoctl.dropNode(s1.uuid)
checkNodeState(None)
self.tic() # Let node state update reach remaining storage
......@@ -1123,6 +1126,10 @@ class Test(NEOThreadedTest):
# Check that the storage hasn't answered to the store,
# which means that a lock is still taken for r['x'] by t2.
self.tic()
try:
txn = txn.data(c1)
except (AttributeError, KeyError): # BBB: ZODB < 5
pass
txn_context = cluster.client._txn_container.get(txn)
empty = txn_context.queue.empty()
ll()
......@@ -1371,7 +1378,7 @@ class Test(NEOThreadedTest):
del conn._queue[:] # XXX
conn.close()
if 1:
with Patch(cluster.master.pt, make=make), \
with Patch(PartitionTable, make=make), \
Patch(InitializationHandler,
askPartitionTable=askPartitionTable) as p:
cluster.start()
......@@ -1902,18 +1909,7 @@ class Test(NEOThreadedTest):
x.value += 1
c2.root()['x'].value += 2
TransactionalResource(t1, 1, tpc_begin=begin1)
# BUG: Very rarely, getConnectionList returns more that 1
# connection ("too many values to unpack"), which is
# a mystery and impossible to reproduce:
# - 1st time: v1.8.1 on a test machine (no SSL)
# - last: current revision on my laptop (SSL),
# at the first iteration of this loop
_sm = list(s1.getConnectionList(cluster.master))
try:
s1m, = _sm
except ValueError:
self.fail((_sm, list(
s1.getConnectionList(cluster.master))))
s1m, = s1.getConnectionList(cluster.master)
try:
s1.em.removeReader(s1m)
with ConnectionFilter() as f, \
......@@ -2361,6 +2357,10 @@ class Test(NEOThreadedTest):
# Check that the storage hasn't answered to the store,
# which means that a lock is still taken for r[''] by t1.
self.tic()
try:
txn = txn.data(c3)
except (AttributeError, KeyError): # BBB: ZODB < 5
pass
txn_context = db.storage.app._txn_container.get(txn)
raise Abort(txn_context.queue.empty())
TransactionalResource(t3, 1, commit=t3_commit)
......@@ -2407,8 +2407,8 @@ class Test(NEOThreadedTest):
for x in 'ab':
r[x] = PCounterWithResolution()
t1.commit()
cluster.stop(replicas=1)
cluster.start()
cluster.neoctl.setNumReplicas(1)
self.tic()
s0, s1 = cluster.sortStorageList()
t1, c1 = cluster.getTransaction()
r = c1.root()
......@@ -2592,8 +2592,8 @@ class Test(NEOThreadedTest):
for x in 'ab':
r[x] = PCounterWithResolution()
t1.commit()
cluster.stop(replicas=1)
cluster.start()
cluster.neoctl.setNumReplicas(1)
self.tic()
s0, s1 = cluster.sortStorageList()
t1, c1 = cluster.getTransaction()
r = c1.root()
......@@ -2894,9 +2894,9 @@ class Test(NEOThreadedTest):
dm = s.dm
dm.commit()
dump_dict[s.uuid] = dm.dump()
dm.erase()
with open(path % (s.getAdapter(), s.uuid)) as f:
dm.restore(f.read())
dm.setConfiguration('partitions', None) # XXX: see dm._migrate4
with NEOCluster(storage_count=3, partitions=3, replicas=1,
name=self._testMethodName) as cluster:
s1, s2, s3 = cluster.storage_list
......
......@@ -17,13 +17,15 @@
from cPickle import Pickler, Unpickler
from cStringIO import StringIO
from itertools import izip_longest
import os, random, shutil, time, unittest
import os, random, shutil, threading, time, unittest
import transaction, ZODB
from neo.client.exception import NEOPrimaryMasterLost
from neo.lib import logging
from neo.lib.util import u64
from neo.lib.util import cached_property, p64, u64
from neo.master.transactions import TransactionManager
from neo.storage.database import getAdapterKlass, importer, manager
from neo.storage.database.importer import Repickler, TransactionRecord
from neo.storage.database.importer import \
Repickler, TransactionRecord, WriteBack
from .. import expectedFailure, getTempDirectory, random_tree, Patch
from . import NEOCluster, NEOThreadedTest
from ZODB import serialize
......@@ -128,31 +130,51 @@ class ImporterTests(NEOThreadedTest):
self.assertIs(Obj, load())
self.assertDictEqual(state, load())
def _importFromFileStorage(self, multi=(),
root_filter=None, sub_filter=None):
import_hash = '1d4ff03730fe6bcbf235e3739fbe5f5b'
@cached_property
def getFS(self):
fs_dir = os.path.join(getTempDirectory(), self.id())
shutil.rmtree(fs_dir, 1) # for --loop
os.mkdir(fs_dir)
def getFS(db='root'):
path = os.path.join(fs_dir, '%s.fs' % db)
return path, {
"storage": "<filestorage>\npath %s\n</filestorage>" % path
}
return getFS
def getData(self, tree=random_tree.generateTree(random.Random(0))):
txn_size = 10
tree = random_tree.generateTree(random.Random(0))
i = len(tree) // 3
assert i > txn_size
before_tree = tree[:i]
after_tree = tree[i:]
fs_dir = os.path.join(getTempDirectory(), self.id())
shutil.rmtree(fs_dir, 1) # for --loop
os.mkdir(fs_dir)
def beforeCheck(h, count=52):
self.assertEqual(count, h())
self.assertEqual('1d4ff03730fe6bcbf235e3739fbe5f5b', h.hexdigest())
def finalCheck(r):
h = random_tree.hashTree(r)
self.assertEqual(93, h())
self.assertEqual('6bf0f0cb2d6c1aae9e52c412ef0e25b6', h.hexdigest())
return (
beforeCheck,
lambda r, *f: random_tree.importTree(r, before_tree, txn_size, *f),
finalCheck,
lambda r: random_tree.importTree(r, after_tree, txn_size),
)
def _importFromFileStorage(self, multi=(),
root_filter=None, sub_filter=None):
beforeCheck, before, finalCheck, after = self.getData()
iter_list = []
db_list = []
# Setup several FileStorage databases.
for i, db in enumerate(('root',) + multi):
fs_path = os.path.join(fs_dir, '%s.fs' % db)
fs_path, cfg = self.getFS(db)
c = ZODB.DB(FileStorage(fs_path)).open()
r = c.root()['tree'] = random_tree.Node()
transaction.commit()
iter_list.append(random_tree.importTree(r, before_tree, txn_size,
sub_filter(db) if i else root_filter))
db_list.append((db, r, {
"storage": "<filestorage>\npath %s\n</filestorage>" % fs_path
}))
iter_list.append(before(r, sub_filter(db) if i else root_filter))
db_list.append((db, r, cfg))
# Populate FileStorage databases.
for i, iter_list in enumerate(izip_longest(*iter_list)):
for r in iter_list:
......@@ -167,9 +189,7 @@ class ImporterTests(NEOThreadedTest):
for x in multi:
cfg['_%s' % x] = str(u64(r[x]._p_oid))
else:
h = random_tree.hashTree(r)
h()
self.assertEqual(import_hash, h.hexdigest())
beforeCheck(random_tree.hashTree(r))
importer['writeback'] = 'true'
else:
cfg["oid"] = str(u64(r[db]._p_oid))
......@@ -179,7 +199,7 @@ class ImporterTests(NEOThreadedTest):
del db_list, iter_list
#del zodb[0][1][zodb.pop()[0]]
# Start NEO cluster with transparent import.
with NEOCluster(importer=importer) as cluster:
with NEOCluster(importer=importer, partitions=2) as cluster:
# Suspend import for a while, so that import
# is finished in the middle of the below 'for' loop.
# Use a slightly different main loop for storage so that it
......@@ -214,12 +234,10 @@ class ImporterTests(NEOThreadedTest):
logging.info("start migration")
dm.doOperation(cluster.storage)
# Adjust if needed. Must remain > 0.
self.assertEqual(22, h())
self.assertEqual(import_hash, h.hexdigest())
beforeCheck(h, 22)
# New writes after the switch to NEO.
last_import = -1
for i, r in enumerate(random_tree.importTree(
r, after_tree, txn_size)):
for i, r in enumerate(after(r)):
t.commit()
if cluster.storage.dm._import:
last_import = i
......@@ -228,11 +246,6 @@ class ImporterTests(NEOThreadedTest):
assert i < last_import * 3 < 2 * i, (last_import, i)
self.assertFalse(cluster.storage.dm._import)
storage._cache.clear()
def finalCheck(r):
h = random_tree.hashTree(r)
self.assertEqual(93, h())
self.assertEqual('6bf0f0cb2d6c1aae9e52c412ef0e25b6',
h.hexdigest())
finalCheck(r)
if dm._writeback:
dm.commit()
......@@ -243,10 +256,10 @@ class ImporterTests(NEOThreadedTest):
db.close()
@unittest.skipUnless(importer.FORK, 'no os.fork')
def test1(self):
def testMultiProcessWriteBack(self):
self._importFromFileStorage()
def testThreadedWriteback(self):
def testThreadedWritebackAndDBReconnection(self):
# Also check reconnection to the underlying DB for relevant backends.
tid_list = []
def __init__(orig, tr, db, tid):
......@@ -274,6 +287,24 @@ class ImporterTests(NEOThreadedTest):
self.assertFalse(p.applied)
self.assertEqual(len(tid_list), 11)
def testThreadedWritebackWithUnbalancedPartitions(self):
N = 7
nonlocal_ = [0]
def committed(orig, self):
if nonlocal_[0] > N:
orig(self)
def _nextTID(orig, self, *args):
if args:
return orig(self, *args)
nonlocal_[0] += 1
return orig(self, p64(nonlocal_[0] == N), 2)
with Patch(importer, FORK=False), \
Patch(TransactionManager, _nextTID=_nextTID), \
Patch(WriteBack, chunk_size=N-2), \
Patch(WriteBack, committed=committed):
self._importFromFileStorage()
self.assertEqual(nonlocal_[0], 10)
def testMerge(self):
multi = 1, 2, 3
self._importFromFileStorage(multi,
......@@ -285,5 +316,52 @@ class ImporterTests(NEOThreadedTest):
# merge several DB.
testMerge = expectedFailure(NEOPrimaryMasterLost)(testMerge)
def testIncremental(self):
"""
This reproduces an undocumented way to speed up the import of a single
ZODB by doing most of the work before switching to NEO.
"""
beforeCheck, before, finalCheck, after = self.getData()
fs_path, cfg = self.getFS()
c = ZODB.DB(FileStorage(fs_path)).open()
r = c.root()['tree'] = random_tree.Node()
transaction.commit()
for _ in before(r):
transaction.commit()
c.db().close()
importer = {'zodb': [('root', cfg)]}
# Start NEO cluster with transparent import.
with NEOCluster(importer=importer, partitions=2) as cluster:
s = cluster.storage
l = threading.Lock()
l.acquire()
def _finished(orig):
orig()
l.release()
with Patch(s.dm, _finished=_finished):
cluster.start()
l.acquire()
t, c = cluster.getTransaction()
r = c.root()['tree']
beforeCheck(random_tree.hashTree(r))
c = ZODB.DB(FileStorage(fs_path)).open()
for _ in after(c.root()['tree']):
transaction.commit()
c.db().close()
# TODO: Add a storage option that only does this and exits.
# Such command would also check that there's no data after
# what's already imported.
s.dm.setConfiguration('zodb', None)
s.stop()
cluster.join((s,))
s.resetNode()
with Patch(s.dm, _finished=_finished):
s.start()
self.tic()
l.acquire()
t.begin()
finalCheck(r)
if __name__ == "__main__":
unittest.main()
......@@ -29,7 +29,7 @@ from neo.storage.database.manager import DatabaseManager
from neo.storage import replicator
from neo.lib.connector import SocketConnector
from neo.lib.connection import ClientConnection
from neo.lib.protocol import CellStates, ClusterStates, Packets, \
from neo.lib.protocol import CellStates, ClusterStates, NodeStates, Packets, \
ZERO_OID, ZERO_TID, MAX_TID, uuid_str
from neo.lib.util import add64, p64, u64
from .. import Patch, TransactionalResource
......@@ -74,6 +74,8 @@ class ReplicationTests(NEOThreadedTest):
source_dict = {x.uuid: x for x in cluster.upstream.storage_list}
for storage in cluster.storage_list:
self.assertFalse(storage.dm._uncommitted_data)
if storage.pt is None:
storage.loadPartitionTable()
self.assertEqual(np, storage.pt.getPartitions())
for partition in pt.getAssignedPartitionList(storage.uuid):
cell_list = upstream_pt.getCellList(partition, readable=True)
......@@ -89,6 +91,7 @@ class ReplicationTests(NEOThreadedTest):
checksum_list = [
self.checksumPartition(storage_dict[x.getUUID()], offset)
for x in pt.getCellList(offset)]
self.assertLess(1, len(checksum_list))
self.assertEqual(1, len(set(checksum_list)),
(offset, checksum_list))
......@@ -445,13 +448,13 @@ class ReplicationTests(NEOThreadedTest):
return isinstance(packet, delayed) and \
packet.decode()[0] == offset and \
conn in s1.getConnectionList(s0)
def changePartitionTable(orig, ptid, cell_list):
def changePartitionTable(orig, ptid, num_replicas, cell_list):
if (offset, s0.uuid, CellStates.DISCARDED) in cell_list:
connection_filter.remove(delayAskFetch)
# XXX: this is currently not done by
# default for performance reason
orig.im_self.dropPartitions((offset,))
return orig(ptid, cell_list)
return orig(ptid, num_replicas, cell_list)
np = cluster.num_partitions
s0, s1, s2 = cluster.storage_list
for delayed in Packets.AskFetchTransactions, Packets.AskFetchObjects:
......@@ -511,7 +514,9 @@ class ReplicationTests(NEOThreadedTest):
for x in 'ab':
r[x] = PCounter()
t.commit()
cluster.stop(replicas=1)
cluster.neoctl.setNumReplicas(1)
self.tic()
cluster.stop()
cluster.start((s1, s2))
with ConnectionFilter() as f:
f.delayAddObject()
......@@ -596,8 +601,9 @@ class ReplicationTests(NEOThreadedTest):
tweak()
t.commit()
t2.join()
cluster.neoctl.dropNode(S[2].uuid)
cluster.neoctl.dropNode(S[3].uuid)
for s in S[2:]:
cluster.neoctl.killNode(s.uuid)
cluster.neoctl.dropNode(s.uuid)
cluster.neoctl.tweakPartitionTable()
if done:
f.remove(delay)
......@@ -928,6 +934,74 @@ class ReplicationTests(NEOThreadedTest):
def testReplicationBlockedByUnfinished2(self):
self.testReplicationBlockedByUnfinished1(True)
@with_cluster(partitions=6, storage_count=5, start_cluster=0)
def testSplitAndMakeResilientUsingClone(self, cluster):
"""
Test cloning of storage nodes using --new-nid instead NEO replication.
"""
s0 = cluster.storage_list[0]
s12 = cluster.storage_list[1:3]
s34 = cluster.storage_list[3:]
cluster.start(storage_list=(s0,))
cluster.importZODB()(6)
for s in s12:
s.start()
self.tic()
drop_list = [s0.uuid]
self.assertRaises(SystemExit, cluster.neoctl.tweakPartitionTable,
drop_list)
cluster.enableStorageList(s12)
def expected(changed):
s0 = 1, CellStates.UP_TO_DATE
s = CellStates.OUT_OF_DATE if changed else CellStates.UP_TO_DATE
return changed, 3 * [[s0, (2, s)], [s0, (3, s)]]
for dry_run in True, False:
self.assertEqual(expected(True),
cluster.neoctl.tweakPartitionTable(drop_list, dry_run))
self.tic()
self.assertEqual(expected(False),
cluster.neoctl.tweakPartitionTable(drop_list))
for s, d in zip(s12, s34):
s.stop()
cluster.join((s,))
s.resetNode()
d.dm.restore(s.dm.dump())
d.resetNode(new_nid=True)
s.start()
d.start()
self.tic()
self.assertEqual(cluster.getNodeState(s), NodeStates.RUNNING)
self.assertEqual(cluster.getNodeState(d), NodeStates.DOWN)
cluster.join((d,))
d.resetNode(new_nid=False)
d.start()
self.tic()
self.checkReplicas(cluster)
expected = '|'.join(['UU.U.|U.U.U'] * 3)
self.assertPartitionTable(cluster, expected)
cluster.neoctl.setNumReplicas(1)
cluster.neoctl.tweakPartitionTable(drop_list)
self.tic()
self.assertPartitionTable(cluster, expected)
s0.stop()
cluster.join((s0,))
cluster.neoctl.dropNode(s0.uuid)
expected = '|'.join(['U.U.|.U.U'] * 3)
self.assertPartitionTable(cluster, expected)
@with_cluster(partitions=3, replicas=1, storage_count=3)
def testAdminOnerousOperationCondition(self, cluster):
s = cluster.storage_list[2]
cluster.neoctl.killNode(s.uuid)
tweak = cluster.neoctl.tweakPartitionTable
self.assertRaises(SystemExit, tweak)
self.assertRaises(SystemExit, tweak, dry_run=True)
self.assertTrue(tweak((s.uuid,))[0])
self.tic()
cluster.neoctl.dropNode(s.uuid)
s = cluster.storage_list[1]
self.assertRaises(SystemExit, cluster.neoctl.dropNode, s.uuid)
@with_cluster(partitions=5, replicas=2, storage_count=3)
def testCheckReplicas(self, cluster):
from neo.storage import checker
......@@ -940,8 +1014,8 @@ class ReplicationTests(NEOThreadedTest):
return s0.uuid
def check(expected_state, expected_count):
self.assertEqual(expected_count, len([None
for row in cluster.neoctl.getPartitionRowList()[1]
for cell in row[1]
for row in cluster.neoctl.getPartitionRowList()[2]
for cell in row
if cell[1] == CellStates.CORRUPTED]))
self.assertEqual(expected_state, cluster.neoctl.getClusterState())
np = cluster.num_partitions
......
......@@ -33,8 +33,6 @@ class RecoveryTests(ZODBTestCase, StorageTestBase, RecoveryStorage):
os.makedirs(dst_temp_dir)
self.neo_dst = NEOCluster(['test_neo1-dst'], partitions=1, replicas=0,
master_count=1, temp_dir=dst_temp_dir)
self.neo_dst.stop()
self.neo_dst.setupDB()
self.neo_dst.start()
self._dst = self.neo.getZODBStorage()
self._dst_db = ZODB.DB(self._dst)
......
......@@ -78,7 +78,7 @@ else:
setup(
name = 'neoppod',
version = '1.11',
version = '1.12.0',
description = __doc__.strip(),
author = 'Nexedi SA',
author_email = 'neo-dev@erp5.org',
......
......@@ -129,7 +129,7 @@ class MatrixImportBenchmark(BenchmarkRunner):
finally:
zodb.stop()
# Clear DB if no error happened.
zodb.setupDB()
zodb.resetDB()
return end - start
except:
traceback.print_exc()
......
......@@ -53,7 +53,7 @@ class ImportBenchmark(BenchmarkRunner):
finally:
neo.stop()
# Clear DB if no error happened.
neo.setupDB()
neo.resetDB()
return result
except:
return 'Perf: import failed', ''.join(traceback.format_exc())
......
......@@ -7,6 +7,7 @@ from contextlib import contextmanager
from datetime import datetime
from functools import partial
from multiprocessing import Lock, RawArray
from multiprocessing.queues import SimpleQueue
from struct import Struct
from netfilterqueue import NetfilterQueue
import gevent.socket # preload for subprocesses
......@@ -19,7 +20,7 @@ from neo.lib.protocol import NodeTypes
from neo.lib.util import timeStringFromTID, p64, u64
from neo.storage.app import DATABASE_MANAGER_DICT, \
Application as StorageApplication
from neo.tests import getTempDirectory
from neo.tests import getTempDirectory, mysql_pool
from neo.tests.ConflictFree import ConflictFreeLog
from neo.tests.functional import AlreadyStopped, NEOCluster, Process
from neo.tests.stress import StressApplication
......@@ -312,13 +313,15 @@ class NEOCluster(NEOCluster):
class Application(StressApplication):
_blocking = None
_blocking = _kill_mysqld = None
def __init__(self, client_count, thread_count, restart_ratio, logrotate,
*args, **kw):
def __init__(self, client_count, thread_count,
fault_probability, restart_ratio, kill_mysqld,
logrotate, *args, **kw):
self.client_count = client_count
self.thread_count = thread_count
self.logrotate = logrotate
self.fault_probability = fault_probability
self.restart_ratio = restart_ratio
self.cluster = cluster = NEOCluster(*args, **kw)
# Make the firewall also affect connections between storage nodes.
......@@ -326,7 +329,24 @@ class Application(StressApplication):
def __init__(self, config):
dscpPatch(1)
StorageApplication__init__(self, config)
StorageApplication.__init__ = __init__
if kill_mysqld:
from neo.scripts import neostorage
from neo.storage.database import mysqldb
neostorage_main = neostorage.main
self._kill_mysqld = kill_mysqld = SimpleQueue()
def main():
pid = os.getpid()
try:
neostorage_main()
except mysqldb.OperationalError as e:
code = e.args[0]
except mysqldb.MysqlError as e:
code = e.code
if mysqldb.SERVER_LOST != code != mysqldb.SERVER_GONE_ERROR:
raise
kill_mysqld.put(pid)
neostorage.main = main
super(Application, self).__init__(cluster.SSL,
util.parseMasterList(cluster.master_nodes))
......@@ -398,6 +418,10 @@ class Application(StressApplication):
t = threading.Thread(target=self._logrotate_thread)
t.daemon = 1
t.start()
if self._kill_mysqld:
t = threading.Thread(target=self._watch_storage_thread)
t.daemon = 1
t.start()
def stopCluster(self, wait=None):
self.restart_lock.acquire()
......@@ -471,9 +495,26 @@ class Application(StressApplication):
except ValueError:
pass
def _watch_storage_thread(self):
get = self._kill_mysqld.get
storage_list = self.cluster.getStorageProcessList()
while 1:
pid = get()
p, = (p for p in storage_list if p.pid == pid)
p.wait()
p.start()
def restartStorages(self, nids):
processes = [p for p in self.cluster.getStorageProcessList()
storage_list = self.cluster.getStorageProcessList()
if self._kill_mysqld:
db_list = [db for db, p in zip(self.cluster.db_list, storage_list)
if p.uuid in nids]
mysql_pool.kill(*db_list)
time.sleep(1)
with open(os.devnull, "wb") as f:
mysql_pool.start(*db_list, stderr=f)
else:
processes = [p for p in storage_list if p.uuid in nids]
for p in processes: p.kill(signal.SIGKILL)
time.sleep(1)
for p in processes: p.wait()
......@@ -548,6 +589,7 @@ def main():
default=socket.AF_INET, const=socket.AF_INET6, help='(default: IPv4)')
_('-a', '--adapter', choices=adapters, default=default_adapter)
_('-d', '--datadir', help="(default: same as unit tests)")
_('-e', '--engine', help="database engine (MySQL only)")
_('-l', '--logdir', help="(default: same as --datadir)")
_('-m', '--masters', type=int, default=1)
_('-s', '--storages', type=int, default=8)
......@@ -571,9 +613,14 @@ def main():
help='number of client processes')
_('-t', '--threads', type=int, default=1,
help='number of thread workers per client process')
_('-f', '--fault-probability', type=ratio, default=1, metavar='P',
help='probability to cause faults every second')
_('-r', '--restart-ratio', type=ratio, default=.5, metavar='RATIO',
help='probability to kill/restart a storage node, rather than just'
' RSTing a TCP connection with this node')
_('--kill-mysqld', action='store_true',
help='if r != 0 and if NEO_DB_MYCNF is set,'
' kill mysqld rather than storage node')
_('-C', '--console', type=int, default=0,
help='console port (localhost) (default: any)')
_('-D', '--delay', type=float, default=.01,
......@@ -594,18 +641,31 @@ def main():
db_list = ['stress_neo%s' % x for x in xrange(args.storages)]
if args.datadir:
if args.adapter != 'SQLite':
parser.error('--datadir is only for SQLite adapter')
db_list = [os.path.join(args.datadir, x + '.sqlite') for x in db_list]
if args.adapter == 'SQLite':
db_list = [os.path.join(args.datadir, x + '.sqlite')
for x in db_list]
elif mysql_pool:
mysql_pool.__init__(args.datadir)
else:
parser.error(
'--datadir: meaningless when using an existing MySQL server')
kw = dict(db_list=db_list, name='stress',
partitions=args.partitions, replicas=args.replicas,
adapter=args.adapter, address_type=args.address_type,
temp_dir=args.logdir or args.datadir or getTempDirectory())
temp_dir=args.logdir or args.datadir or getTempDirectory(),
storage_kw={'engine': args.engine, 'wait': -1})
if args.command == 'run':
NFQueue.delay = args.delay
app = Application(args.clients, args.threads, args.restart_ratio,
error = args.kill_mysqld and (
'invalid adapter' if args.adapter != 'MySQL' else
None if mysql_pool else 'NEO_DB_MYCNF not set'
)
if error:
parser.error('--kill-mysqld: ' + error)
app = Application(args.clients, args.threads,
args.fault_probability, args.restart_ratio, args.kill_mysqld,
int(round(args.logrotate * 3600, 0)), **kw)
t = threading.Thread(target=console, args=(args.console, app))
t.daemon = 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment