Commit 3a39ac9a authored by Julien Muchembled's avatar Julien Muchembled

Warn when a cell becomes non-readable whereas all cells were readable

An issue that happened for the first time on a storage node didn't always cause
other nodes to flush their logs, which made debugging difficult.
parent 1eed0239
...@@ -79,6 +79,10 @@ class Cell(object): ...@@ -79,6 +79,10 @@ class Cell(object):
class PartitionTable(object): class PartitionTable(object):
"""This class manages a partition table.""" """This class manages a partition table."""
# Flushing logs whenever a cell becomes out-of-date would flood them.
_first_outdated_message = \
'a cell became non-readable whereas all cells were readable'
def __init__(self, num_partitions, num_replicas): def __init__(self, num_partitions, num_replicas):
self._id = None self._id = None
self.np = num_partitions self.np = num_partitions
...@@ -216,16 +220,27 @@ class PartitionTable(object): ...@@ -216,16 +220,27 @@ class PartitionTable(object):
""" """
assert self._id < ptid, (self._id, ptid) assert self._id < ptid, (self._id, ptid)
self._id = ptid self._id = ptid
readable_list = []
for row in self.partition_list:
if not all(cell.isReadable() for cell in row):
del readable_list[:]
break
readable_list += row
for offset, uuid, state in cell_list: for offset, uuid, state in cell_list:
node = nm.getByUUID(uuid) node = nm.getByUUID(uuid)
assert node is not None, 'No node found for uuid ' + uuid_str(uuid) assert node is not None, 'No node found for uuid ' + uuid_str(uuid)
self._setCell(offset, node, state) self._setCell(offset, node, state)
logging.debug('partition table updated (ptid=%s)', ptid) self.logUpdated()
self.log() if not all(cell.isReadable() for cell in readable_list):
logging.warning(self._first_outdated_message)
def filled(self): def filled(self):
return self.num_filled_rows == self.np return self.num_filled_rows == self.np
def logUpdated(self):
logging.debug('partition table updated (ptid=%s)', self._id)
self.log()
def log(self): def log(self):
logging.debug(self.format()) logging.debug(self.format())
......
...@@ -260,10 +260,9 @@ class Application(BaseApplication): ...@@ -260,10 +260,9 @@ class Application(BaseApplication):
def broadcastPartitionChanges(self, cell_list): def broadcastPartitionChanges(self, cell_list):
"""Broadcast a Notify Partition Changes packet.""" """Broadcast a Notify Partition Changes packet."""
logging.debug('broadcastPartitionChanges')
if cell_list: if cell_list:
self.pt.log()
ptid = self.pt.setNextID() ptid = self.pt.setNextID()
self.pt.logUpdated()
packet = Packets.NotifyPartitionChanges(ptid, cell_list) packet = Packets.NotifyPartitionChanges(ptid, cell_list)
for node in self.nm.getIdentifiedList(): for node in self.nm.getIdentifiedList():
if node.isRunning() and not node.isMaster(): if node.isRunning() and not node.isMaster():
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
from collections import defaultdict from collections import defaultdict
import neo.lib.pt import neo.lib.pt
from neo.lib import logging
from neo.lib.protocol import CellStates, ZERO_TID from neo.lib.protocol import CellStates, ZERO_TID
...@@ -278,6 +279,9 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -278,6 +279,9 @@ class PartitionTable(neo.lib.pt.PartitionTable):
to serve. This allows a cluster restart. to serve. This allows a cluster restart.
""" """
change_list = [] change_list = []
fully_readable = all(cell.isReadable()
for row in self.partition_list
for cell in row)
for offset, row in enumerate(self.partition_list): for offset, row in enumerate(self.partition_list):
lost = lost_node lost = lost_node
cell_list = [] cell_list = []
...@@ -292,6 +296,8 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -292,6 +296,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
cell.setState(CellStates.OUT_OF_DATE) cell.setState(CellStates.OUT_OF_DATE)
change_list.append((offset, cell.getUUID(), change_list.append((offset, cell.getUUID(),
CellStates.OUT_OF_DATE)) CellStates.OUT_OF_DATE))
if fully_readable and change_list:
logging.warning(self._first_outdated_message)
return change_list return change_list
def iterNodeCell(self, node): def iterNodeCell(self, node):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment