Commit 04f72a4c by Julien Muchembled

New feature to check that partitions are replicated properly

This includes an API change of Node.isIdentified, which now tells whether
identification packets have been exchanged or not.
All handlers must be updated to implement '_acceptIdentification' instead of
overriding EventHandler.acceptIdentification: this patch only does it for
StorageOperationHandler
1 parent 2241c3a1
...@@ -133,6 +133,11 @@ RC - Review output of pylint (CODE) ...@@ -133,6 +133,11 @@ RC - Review output of pylint (CODE)
be done ? hope to find a storage with valid checksum ? assume that data be done ? hope to find a storage with valid checksum ? assume that data
is correct in storage but was altered when it travelled through network is correct in storage but was altered when it travelled through network
as we loaded it ?). as we loaded it ?).
- Check replicas: (HIGH AVAILABILITY)
- Automatically tell corrupted cells to fix their data when a good source
is known.
- Add an option to also check all rows of trans/obj/data, instead of only
keys (trans.tid & obj.{tid,oid}).
Master Master
- Master node data redundancy (HIGH AVAILABILITY) - Master node data redundancy (HIGH AVAILABILITY)
......
...@@ -83,6 +83,7 @@ class AdminEventHandler(EventHandler): ...@@ -83,6 +83,7 @@ class AdminEventHandler(EventHandler):
addPendingNodes = forward_ask(Packets.AddPendingNodes) addPendingNodes = forward_ask(Packets.AddPendingNodes)
setClusterState = forward_ask(Packets.SetClusterState) setClusterState = forward_ask(Packets.SetClusterState)
checkReplicas = forward_ask(Packets.CheckReplicas)
class MasterEventHandler(EventHandler): class MasterEventHandler(EventHandler):
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from functools import wraps
import neo.lib import neo.lib
from .protocol import ( from .protocol import (
NodeStates, Packets, ErrorCodes, Errors, BrokenNodeDisallowedError, NodeStates, Packets, ErrorCodes, Errors, BrokenNodeDisallowedError,
...@@ -121,6 +122,19 @@ class EventHandler(object): ...@@ -121,6 +122,19 @@ class EventHandler(object):
# Packet handlers. # Packet handlers.
def acceptIdentification(self, conn, node_type, *args):
try:
acceptIdentification = self._acceptIdentification
except AttributeError:
raise UnexpectedPacketError('no handler found')
node = self.app.nm.getByAddress(conn.getAddress())
assert node.getConnection() is conn, (node.getConnection(), conn)
if node.getType() == node_type:
node.setIdentified()
acceptIdentification(node, *args)
return
conn.close()
def ping(self, conn): def ping(self, conn):
conn.answer(Packets.Pong()) conn.answer(Packets.Pong())
......
...@@ -37,6 +37,7 @@ class Node(object): ...@@ -37,6 +37,7 @@ class Node(object):
self._uuid = uuid self._uuid = uuid
self._manager = manager self._manager = manager
self._last_state_change = time() self._last_state_change = time()
self._identified = False
manager.add(self) manager.add(self)
def notify(self, packet): def notify(self, packet):
...@@ -98,6 +99,7 @@ class Node(object): ...@@ -98,6 +99,7 @@ class Node(object):
""" """
assert self._connection is not None assert self._connection is not None
del self._connection del self._connection
self._identified = False
self._manager._updateIdentified(self) self._manager._updateIdentified(self)
def setConnection(self, connection, force=None): def setConnection(self, connection, force=None):
...@@ -113,6 +115,8 @@ class Node(object): ...@@ -113,6 +115,8 @@ class Node(object):
conn = self._connection conn = self._connection
if conn is None: if conn is None:
self._connection = connection self._connection = connection
if connection.isServer():
self.setIdentified()
else: else:
assert force is not None, \ assert force is not None, \
attributeTracker.whoSet(self, '_connection') attributeTracker.whoSet(self, '_connection')
...@@ -127,7 +131,11 @@ class Node(object): ...@@ -127,7 +131,11 @@ class Node(object):
if not force or conn.getPeerId() is not None or \ if not force or conn.getPeerId() is not None or \
type(conn.getHandler()) is not type(connection.getHandler()): type(conn.getHandler()) is not type(connection.getHandler()):
raise ProtocolError("already connected") raise ProtocolError("already connected")
conn.setOnClose(lambda: setattr(self, '_connection', connection)) def on_closed():
self._connection = connection
assert connection.isServer()
self.setIdentified()
conn.setOnClose(on_closed)
conn.close() conn.close()
assert not connection.isClosed(), connection assert not connection.isClosed(), connection
connection.setOnClose(self.onConnectionClosed) connection.setOnClose(self.onConnectionClosed)
...@@ -147,11 +155,15 @@ class Node(object): ...@@ -147,11 +155,15 @@ class Node(object):
return self._connection is not None and (connecting or return self._connection is not None and (connecting or
not self._connection.connecting) not self._connection.connecting)
def setIdentified(self):
assert self._connection is not None
self._identified = True
def isIdentified(self): def isIdentified(self):
""" """
Returns True is the node is connected and identified Returns True if identification packets have been exchanged
""" """
return self._connection is not None and self._uuid is not None return self._identified
def __repr__(self): def __repr__(self):
return '<%s(uuid=%s, address=%s, state=%s, connection=%r) at %x>' % ( return '<%s(uuid=%s, address=%s, state=%s, connection=%r) at %x>' % (
...@@ -396,10 +408,13 @@ class NodeManager(object): ...@@ -396,10 +408,13 @@ class NodeManager(object):
def _updateIdentified(self, node): def _updateIdentified(self, node):
uuid = node.getUUID() uuid = node.getUUID()
if node.isIdentified(): if uuid:
self._identified_dict[uuid] = node # XXX: It's probably a bug to include connecting nodes but there's
else: # no API yet to update manager when connection is established.
self._identified_dict.pop(uuid, None) if node.isConnected(connecting=True):
self._identified_dict[uuid] = node
else:
self._identified_dict.pop(uuid, None)
def _updateAddress(self, node, old_address): def _updateAddress(self, node, old_address):
self.__update(self._address_dict, old_address, node.getAddress(), node) self.__update(self._address_dict, old_address, node.getAddress(), node)
......
...@@ -25,7 +25,7 @@ from struct import Struct ...@@ -25,7 +25,7 @@ from struct import Struct
from .util import Enum, getAddressType from .util import Enum, getAddressType
# The protocol version (major, minor). # The protocol version (major, minor).
PROTOCOL_VERSION = (6, 1) PROTOCOL_VERSION = (7, 1)
# Size restrictions. # Size restrictions.
MIN_PACKET_SIZE = 10 MIN_PACKET_SIZE = 10
...@@ -49,6 +49,7 @@ class ErrorCodes(Enum): ...@@ -49,6 +49,7 @@ class ErrorCodes(Enum):
BROKEN_NODE = Enum.Item(5) BROKEN_NODE = Enum.Item(5)
ALREADY_PENDING = Enum.Item(7) ALREADY_PENDING = Enum.Item(7)
REPLICATION_ERROR = Enum.Item(8) REPLICATION_ERROR = Enum.Item(8)
CHECKING_ERROR = Enum.Item(9)
ErrorCodes = ErrorCodes() ErrorCodes = ErrorCodes()
class ClusterStates(Enum): class ClusterStates(Enum):
...@@ -83,6 +84,7 @@ class CellStates(Enum): ...@@ -83,6 +84,7 @@ class CellStates(Enum):
OUT_OF_DATE = Enum.Item(2) OUT_OF_DATE = Enum.Item(2)
FEEDING = Enum.Item(3) FEEDING = Enum.Item(3)
DISCARDED = Enum.Item(4) DISCARDED = Enum.Item(4)
CORRUPTED = Enum.Item(5)
CellStates = CellStates() CellStates = CellStates()
class LockState(Enum): class LockState(Enum):
...@@ -108,6 +110,7 @@ cell_state_prefix_dict = { ...@@ -108,6 +110,7 @@ cell_state_prefix_dict = {
CellStates.OUT_OF_DATE: 'O', CellStates.OUT_OF_DATE: 'O',
CellStates.FEEDING: 'F', CellStates.FEEDING: 'F',
CellStates.DISCARDED: 'D', CellStates.DISCARDED: 'D',
CellStates.CORRUPTED: 'C',
} }
# Other constants. # Other constants.
...@@ -1239,6 +1242,35 @@ class Pack(Packet): ...@@ -1239,6 +1242,35 @@ class Pack(Packet):
PBoolean('status'), PBoolean('status'),
) )
class CheckReplicas(Packet):
"""
ctl -> A
A -> M
"""
_fmt = PStruct('check_replicas',
PDict('partition_dict',
PNumber('partition'),
PUUID('source'),
),
PTID('min_tid'),
PTID('max_tid'),
)
_answer = Error
class CheckPartition(Packet):
"""
M -> S
"""
_fmt = PStruct('check_partition',
PNumber('partition'),
PStruct('source',
PString('upstream_name'),
PAddress('address'),
),
PTID('min_tid'),
PTID('max_tid'),
)
class CheckTIDRange(Packet): class CheckTIDRange(Packet):
""" """
Ask some stats about a range of transactions. Ask some stats about a range of transactions.
...@@ -1251,15 +1283,13 @@ class CheckTIDRange(Packet): ...@@ -1251,15 +1283,13 @@ class CheckTIDRange(Packet):
S -> S S -> S
""" """
_fmt = PStruct('ask_check_tid_range', _fmt = PStruct('ask_check_tid_range',
PNumber('partition'),
PNumber('length'),
PTID('min_tid'), PTID('min_tid'),
PTID('max_tid'), PTID('max_tid'),
PNumber('length'),
PNumber('partition'),
) )
_answer = PStruct('answer_check_tid_range', _answer = PStruct('answer_check_tid_range',
PTID('min_tid'),
PNumber('length'),
PNumber('count'), PNumber('count'),
PChecksum('checksum'), PChecksum('checksum'),
PTID('max_tid'), PTID('max_tid'),
...@@ -1277,22 +1307,30 @@ class CheckSerialRange(Packet): ...@@ -1277,22 +1307,30 @@ class CheckSerialRange(Packet):
S -> S S -> S
""" """
_fmt = PStruct('ask_check_serial_range', _fmt = PStruct('ask_check_serial_range',
POID('min_oid'),
PTID('min_serial'),
PTID('max_tid'),
PNumber('length'),
PNumber('partition'), PNumber('partition'),
PNumber('length'),
PTID('min_tid'),
PTID('max_tid'),
POID('min_oid'),
) )
_answer = PStruct('answer_check_serial_range', _answer = PStruct('answer_check_serial_range',
POID('min_oid'),
PTID('min_serial'),
PNumber('length'),
PNumber('count'), PNumber('count'),
PChecksum('tid_checksum'),
PTID('max_tid'),
PChecksum('oid_checksum'), PChecksum('oid_checksum'),
POID('max_oid'), POID('max_oid'),
PChecksum('serial_checksum'), )
PTID('max_serial'),
class PartitionCorrupted(Packet):
"""
S -> M
"""
_fmt = PStruct('partition_corrupted',
PNumber('partition'),
PList('cell_list',
PUUID('uuid'),
),
) )
class LastTransaction(Packet): class LastTransaction(Packet):
...@@ -1601,10 +1639,16 @@ class Packets(dict): ...@@ -1601,10 +1639,16 @@ class Packets(dict):
TIDListFrom) TIDListFrom)
AskPack, AnswerPack = register( AskPack, AnswerPack = register(
Pack, ignore_when_closed=False) Pack, ignore_when_closed=False)
CheckReplicas = register(
CheckReplicas)
CheckPartition = register(
CheckPartition)
AskCheckTIDRange, AnswerCheckTIDRange = register( AskCheckTIDRange, AnswerCheckTIDRange = register(
CheckTIDRange) CheckTIDRange)
AskCheckSerialRange, AnswerCheckSerialRange = register( AskCheckSerialRange, AnswerCheckSerialRange = register(
CheckSerialRange) CheckSerialRange)
NotifyPartitionCorrupted = register(
PartitionCorrupted)
NotifyReady = register( NotifyReady = register(
NotifyReady) NotifyReady)
AskLastTransaction, AnswerLastTransaction = register( AskLastTransaction, AnswerLastTransaction = register(
......
...@@ -34,7 +34,7 @@ class Cell(object): ...@@ -34,7 +34,7 @@ class Cell(object):
def __init__(self, node, state = CellStates.UP_TO_DATE): def __init__(self, node, state = CellStates.UP_TO_DATE):
self.node = node self.node = node
self.setState(state) self.state = state
def __repr__(self): def __repr__(self):
return "<Cell(uuid=%s, address=%s, state=%s)>" % ( return "<Cell(uuid=%s, address=%s, state=%s)>" % (
...@@ -59,6 +59,13 @@ class Cell(object): ...@@ -59,6 +59,13 @@ class Cell(object):
def isFeeding(self): def isFeeding(self):
return self.state == CellStates.FEEDING return self.state == CellStates.FEEDING
def isCorrupted(self):
return self.state == CellStates.CORRUPTED
def isReadable(self):
return self.state == CellStates.UP_TO_DATE or \
self.state == CellStates.FEEDING
def getNode(self): def getNode(self):
return self.node return self.node
...@@ -122,6 +129,12 @@ class PartitionTable(object): ...@@ -122,6 +129,12 @@ class PartitionTable(object):
except IndexError: except IndexError:
return False return False
def getNodeSet(self):
return set(x.getNode() for row in self.partition_list for x in row)
def getConnectedNodeList(self):
return [node for node in self.getNodeSet() if node.isConnected()]
def getNodeList(self): def getNodeList(self):
"""Return all used nodes.""" """Return all used nodes."""
return [node for node, count in self.count_dict.iteritems() \ return [node for node, count in self.count_dict.iteritems() \
...@@ -129,8 +142,7 @@ class PartitionTable(object): ...@@ -129,8 +142,7 @@ class PartitionTable(object):
def getCellList(self, offset, readable=False): def getCellList(self, offset, readable=False):
if readable: if readable:
return [cell for cell in self.partition_list[offset] return filter(Cell.isReadable, self.partition_list[offset])
if not cell.isOutOfDate()]
return list(self.partition_list[offset]) return list(self.partition_list[offset])
def getPartition(self, oid_or_tid): def getPartition(self, oid_or_tid):
...@@ -280,7 +292,7 @@ class PartitionTable(object): ...@@ -280,7 +292,7 @@ class PartitionTable(object):
return False return False
for row in self.partition_list: for row in self.partition_list:
for cell in row: for cell in row:
if not cell.isOutOfDate() and cell.getNode().isRunning(): if cell.isReadable() and cell.getNode().isRunning():
break break
else: else:
return False return False
......
...@@ -279,7 +279,7 @@ class BackupApplication(object): ...@@ -279,7 +279,7 @@ class BackupApplication(object):
primary = primary_node is node primary = primary_node is node
result = None if primary else app.pt.setUpToDate(node, offset) result = None if primary else app.pt.setUpToDate(node, offset)
if app.getClusterState() == ClusterStates.BACKINGUP: if app.getClusterState() == ClusterStates.BACKINGUP:
assert not cell.isOutOfDate() assert cell.isReadable()
if result: # was out-of-date if result: # was out-of-date
max_tid, = [x.backup_tid for x in cell_list max_tid, = [x.backup_tid for x in cell_list
if x.getNode() is primary_node] if x.getNode() is primary_node]
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import random
import neo import neo
from . import MasterHandler from . import MasterHandler
...@@ -162,3 +163,48 @@ class AdministrationHandler(MasterHandler): ...@@ -162,3 +163,48 @@ class AdministrationHandler(MasterHandler):
# broadcast the new partition table # broadcast the new partition table
app.broadcastPartitionChanges(cell_list) app.broadcastPartitionChanges(cell_list)
conn.answer(Errors.Ack('Nodes added: %s' % (uuids, ))) conn.answer(Errors.Ack('Nodes added: %s' % (uuids, )))
def checkReplicas(self, conn, partition_dict, min_tid, max_tid):
app = self.app
pt = app.pt
backingup = app.cluster_state == ClusterStates.BACKINGUP
if not max_tid:
max_tid = pt.getCheckTid(partition_dict) if backingup else \
app.getLastTransaction()
if min_tid > max_tid:
neo.lib.logging.warning("nothing to check: min_tid=%s > max_tid=%s",
dump(min_tid), dump(max_tid))
else:
getByUUID = app.nm.getByUUID
node_set = set()
for offset, source in partition_dict.iteritems():
# XXX: For the moment, code checking replicas is unable to fix
# corrupted partitions (when a good cell is known)
# so only check readable ones.
# (see also Checker._nextPartition of storage)
cell_list = pt.getCellList(offset, True)
#cell_list = [cell for cell in pt.getCellList(offset)
# if not cell.isOutOfDate()]
if len(cell_list) + (backingup and not source) <= 1:
continue
for cell in cell_list:
node = cell.getNode()
if node in node_set:
break
else:
node_set.add(node)
if source:
source = '', getByUUID(source).getAddress()
else:
readable = [cell for cell in cell_list if cell.isReadable()]
if 1 == len(readable) < len(cell_list):
source = '', readable[0].getAddress()
elif backingup:
source = app.backup_app.name, random.choice(
app.backup_app.pt.getCellList(offset, readable=True)
).getAddress()
else:
source = '', None
node.getConnection().notify(Packets.CheckPartition(
offset, source, min_tid, max_tid))
conn.answer(Errors.Ack(''))
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import neo.lib import neo.lib
from neo.lib.protocol import ClusterStates, Packets, ProtocolError from neo.lib.protocol import CellStates, ClusterStates, Packets, ProtocolError
from neo.lib.exception import OperationFailure from neo.lib.exception import OperationFailure
from neo.lib.util import dump from neo.lib.util import dump
from neo.lib.connector import ConnectorConnectionClosedException from neo.lib.connector import ConnectorConnectionClosedException
...@@ -72,6 +72,17 @@ class StorageServiceHandler(BaseServiceHandler): ...@@ -72,6 +72,17 @@ class StorageServiceHandler(BaseServiceHandler):
# transaction locked on this storage node # transaction locked on this storage node
self.app.tm.lock(ttid, conn.getUUID()) self.app.tm.lock(ttid, conn.getUUID())
def notifyPartitionCorrupted(self, conn, partition, cell_list):
change_list = []
for cell in self.app.pt.getCellList(partition):
if cell.getUUID() in cell_list:
cell.setState(CellStates.CORRUPTED)
change_list.append((partition, cell.getUUID(),
CellStates.CORRUPTED))
self.app.broadcastPartitionChanges(change_list)
if not self.app.pt.operational():
raise OperationFailure('cannot continue operation')
def notifyReplicationDone(self, conn, offset, tid): def notifyReplicationDone(self, conn, offset, tid):
app = self.app app = self.app
node = app.nm.getByUUID(conn.getUUID()) node = app.nm.getByUUID(conn.getUUID())
......
...@@ -25,12 +25,13 @@ class Cell(neo.lib.pt.Cell): ...@@ -25,12 +25,13 @@ class Cell(neo.lib.pt.Cell):
replicating = ZERO_TID replicating = ZERO_TID
def setState(self, state): def setState(self, state):
try: readable = self.isReadable()
if CellStates.OUT_OF_DATE == state != self.state: super(Cell, self).setState(state)
if readable and not self.isReadable():
try:
del self.backup_tid, self.replicating del self.backup_tid, self.replicating
except AttributeError: except AttributeError:
pass pass
return super(Cell, self).setState(state)
neo.lib.pt.Cell = Cell neo.lib.pt.Cell = Cell
...@@ -196,7 +197,7 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -196,7 +197,7 @@ class PartitionTable(neo.lib.pt.PartitionTable):
CellStates.OUT_OF_DATE)) CellStates.OUT_OF_DATE))
node_count += 1 node_count += 1
elif node_count + 1 < max_count: elif node_count + 1 < max_count:
if feeding_cell is not None or max_cell.isOutOfDate(): if feeding_cell is not None or not max_cell.isReadable():
# If there is a feeding cell already or it is # If there is a feeding cell already or it is
# out-of-date, just drop the node. # out-of-date, just drop the node.
row.remove(max_cell) row.remove(max_cell)
...@@ -239,10 +240,10 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -239,10 +240,10 @@ class PartitionTable(neo.lib.pt.PartitionTable):
else: else:
# Remove an excessive feeding cell. # Remove an excessive feeding cell.
removed_cell_list.append(cell) removed_cell_list.append(cell)
elif cell.isOutOfDate(): elif cell.isUpToDate():
out_of_date_cell_list.append(cell)
else:
up_to_date_cell_list.append(cell) up_to_date_cell_list.append(cell)
else:
out_of_date_cell_list.append(cell)
# If all cells are up-to-date, a feeding cell is not required. # If all cells are up-to-date, a feeding cell is not required.
if len(out_of_date_cell_list) == 0 and feeding_cell is not None: if len(out_of_date_cell_list) == 0 and feeding_cell is not None:
...@@ -311,7 +312,7 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -311,7 +312,7 @@ class PartitionTable(neo.lib.pt.PartitionTable):
lost = lost_node lost = lost_node
cell_list = [] cell_list = []
for cell in row: for cell in row:
if not cell.isOutOfDate(): if cell.isReadable():
if cell.getNode().isRunning(): if cell.getNode().isRunning():
lost = None lost = None
else : else :
...@@ -330,7 +331,7 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -330,7 +331,7 @@ class PartitionTable(neo.lib.pt.PartitionTable):
yield offset, cell yield offset, cell
break break
def getUpToDateCellNodeSet(self): def getReadableCellNodeSet(self):
""" """
Return a set of all nodes which are part of at least one UP TO DATE Return a set of all nodes which are part of at least one UP TO DATE
partition. partition.
...@@ -338,17 +339,7 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -338,17 +339,7 @@ class PartitionTable(neo.lib.pt.PartitionTable):
return set(cell.getNode() return set(cell.getNode()
for row in self.partition_list for row in self.partition_list
for cell in row for cell in row
if not cell.isOutOfDate()) if cell.isReadable())
def getOutOfDateCellNodeSet(self):
"""
Return a set of all nodes which are part of at least one OUT OF DATE
partition.
"""
return set(cell.getNode()
for row in self.partition_list
for cell in row
if cell.isOutOfDate())
def setBackupTidDict(self, backup_tid_dict): def setBackupTidDict(self, backup_tid_dict):
for row in self.partition_list: for row in self.partition_list:
...@@ -358,8 +349,16 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -358,8 +349,16 @@ class PartitionTable(neo.lib.pt.PartitionTable):
def getBackupTid(self): def getBackupTid(self):
try: try:
return min(max(cell.backup_tid for cell in row return min(max(cell.backup_tid for cell in row if cell.isReadable())
if not cell.isOutOfDate())
for row in self.partition_list) for row in self.partition_list)
except ValueError: except ValueError:
return ZERO_TID return ZERO_TID
def getCheckTid(self, partition_list):
try:
return min(min(cell.backup_tid
for cell in self.partition_list[offset]
if cell.isReadable())
for offset in partition_list)
except ValueError:
return ZERO_TID
...@@ -65,39 +65,39 @@ class RecoveryManager(MasterHandler): ...@@ -65,39 +65,39 @@ class RecoveryManager(MasterHandler):
if pt.filled(): if pt.filled():
# A partition table exists, we are starting an existing # A partition table exists, we are starting an existing
# cluster. # cluster.
partition_node_set = pt.getUpToDateCellNodeSet() partition_node_set = pt.getReadableCellNodeSet()
pending_node_set = set(x for x in partition_node_set pending_node_set = set(x for x in partition_node_set
if x.isPending()) if x.isPending())
if app._startup_allowed or \ if app._startup_allowed or \
partition_node_set == pending_node_set: partition_node_set == pending_node_set:
allowed_node_set = pending_node_set allowed_node_set = pending_node_set
extra_node_set = pt.getOutOfDateCellNodeSet() node_list = pt.getConnectedNodeList
elif app._startup_allowed: elif app._startup_allowed:
# No partition table and admin allowed startup, we are # No partition table and admin allowed startup, we are
# creating a new cluster out of all pending nodes. # creating a new cluster out of all pending nodes.
allowed_node_set = set(app.nm.getStorageList( allowed_node_set = set(app.nm.getStorageList(
only_identified=True)) only_identified=True))
extra_node_set = set() node_list = lambda: allowed_node_set
if allowed_node_set: if allowed_node_set:
for node in allowed_node_set: for node in allowed_node_set:
assert node.isPending(), node assert node.isPending(), node
if node.getConnection().isPending(): if node.getConnection().isPending():
break break
else: else:
allowed_node_set |= extra_node_set node_list = node_list()
break break
neo.lib.logging.info('startup allowed') neo.lib.logging.info('startup allowed')
for node in allowed_node_set: for node in node_list:
node.setRunning() node.setRunning()
app.broadcastNodesInformation(allowed_node_set) app.broadcastNodesInformation(node_list)
if pt.getID() is None: if pt.getID() is None:
neo.lib.logging.info('creating a new partition table') neo.lib.logging.info('creating a new partition table')
# reset IDs generators & build new partition with running nodes # reset IDs generators & build new partition with running nodes
app.tm.setLastOID(ZERO_OID) app.tm.setLastOID(ZERO_OID)
pt.make(allowed_node_set) pt.make(node_list)
self._broadcastPartitionTable(pt.getID(), pt.getRowList()) self._broadcastPartitionTable(pt.getID(), pt.getRowList())
elif app.backup_tid: elif app.backup_tid:
pt.setBackupTidDict(self.backup_tid_dict) pt.setBackupTidDict(self.backup_tid_dict)
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
from .neoctl import NeoCTL, NotReadyException from .neoctl import NeoCTL, NotReadyException
from neo.lib.util import bin, dump from neo.lib.util import bin, dump
from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, ZERO_TID
action_dict = { action_dict = {
'print': { 'print': {
...@@ -30,6 +30,7 @@ action_dict = { ...@@ -30,6 +30,7 @@ action_dict = {
'node': 'setNodeState', 'node': 'setNodeState',
'cluster': 'setClusterState', 'cluster': 'setClusterState',
}, },
'check': 'checkReplicas',
'start': 'startCluster', 'start': 'startCluster',
'add': 'enableStorageList', 'add': 'enableStorageList',
'drop': 'dropNode', 'drop': 'dropNode',
...@@ -187,6 +188,33 @@ class TerminalNeoCTL(object): ...@@ -187,6 +188,33 @@ class TerminalNeoCTL(object):
""" """
return self.formatUUID(self.neoctl.getPrimary()) return self.formatUUID(self.neoctl.getPrimary())
def checkReplicas(self, params):
"""
Parameters: [partition]:[reference] ... [min_tid [max_tid]]
"""
partition_dict = {}
params = iter(params)
min_tid = ZERO_TID
max_tid = None
for p in params:
try:
partition, source = p.split(':')
except ValueError:
min_tid = p64(p)
try:
max_tid = p64(params.next())
except StopIteration:
pass
break
source = bin(source) if source else None
if partition:
partition_dict[int(partition)] = source
else:
assert not partition_dict
np = len(self.neoctl.getPartitionRowList()[1])
partition_dict = dict.fromkeys(xrange(np), source)
self.neoctl.checkReplicas(partition_dict, min_tid, max_tid)
class Application(object): class Application(object):
"""The storage node application.""" """The storage node application."""
......
...@@ -163,3 +163,8 @@ class NeoCTL(object): ...@@ -163,3 +163,8 @@ class NeoCTL(object):
raise RuntimeError(response) raise RuntimeError(response)
return response[1] return response[1]
def checkReplicas(self, *args):
response = self.__ask(Packets.CheckReplicas(*args))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
...@@ -28,6 +28,7 @@ from neo.lib.connector import getConnectorHandler ...@@ -28,6 +28,7 @@ from neo.lib.connector import getConnectorHandler
from neo.lib.pt import PartitionTable from neo.lib.pt import PartitionTable
from neo.lib.util import dump from neo.lib.util import dump
from neo.lib.bootstrap import BootstrapManager from neo.lib.bootstrap import BootstrapManager
from .checker import Checker
from .database import buildDatabaseManager from .database import buildDatabaseManager
from .exception import AlreadyPendingError from .exception import AlreadyPendingError
from .handlers import identification, verification, initialization from .handlers import identification, verification, initialization
...@@ -66,6 +67,7 @@ class Application(object): ...@@ -66,6 +67,7 @@ class Application(object):
# partitions. # partitions.
self.pt = None self.pt = None
self.checker = Checker(self)
self.replicator = Replicator(self) self.replicator = Replicator(self)
self.listening_conn = None self.listening_conn = None
self.master_conn = None self.master_conn = None
...@@ -207,6 +209,8 @@ class Application(object): ...@@ -207,6 +209,8 @@ class Application(object):
neo.lib.logging.error('operation stopped: %s', msg) neo.lib.logging.error('operation stopped: %s', msg)
except PrimaryFailure, msg: except PrimaryFailure, msg:
neo.lib.logging.error('primary master is down: %s', msg) neo.lib.logging.error('primary master is down: %s', msg)
finally:
self.checker = Checker(self)
def connectToPrimary(self): def connectToPrimary(self):
"""Find a primary master node, and connect to it. """Find a primary master node, and connect to it.
...@@ -369,6 +373,11 @@ class Application(object): ...@@ -369,6 +373,11 @@ class Application(object):
return return
self.task_queue.appendleft(iterator) self.task_queue.appendleft(iterator)
def closeClient(self, connection):
if connection is not self.replicator.getCurrentConnection() and \
connection not in self.checker.conn_dict:
connection.closeClient()
def shutdown(self, erase=False): def shutdown(self, erase=False):
"""Close all connections and exit""" """Close all connections and exit"""
for c in self.em.getConnectionList(): for c in self.em.getConnectionList():
......
##############################################################################
#
# Copyright (c) 2011 Nexedi SARL and Contributors. All Rights Reserved.
# Julien Muchembled <jm@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial