Commit 3a39ac9a authored by Julien Muchembled's avatar Julien Muchembled

Warn when a cell becomes non-readable whereas all cells were readable

An issue that happened for the first time on a storage node didn't always cause
other nodes to flush their logs, which made debugging difficult.
parent 1eed0239
......@@ -79,6 +79,10 @@ class Cell(object):
class PartitionTable(object):
"""This class manages a partition table."""
# Flushing logs whenever a cell becomes out-of-date would flood them.
_first_outdated_message = \
'a cell became non-readable whereas all cells were readable'
def __init__(self, num_partitions, num_replicas):
self._id = None
self.np = num_partitions
......@@ -216,16 +220,27 @@ class PartitionTable(object):
"""
assert self._id < ptid, (self._id, ptid)
self._id = ptid
readable_list = []
for row in self.partition_list:
if not all(cell.isReadable() for cell in row):
del readable_list[:]
break
readable_list += row
for offset, uuid, state in cell_list:
node = nm.getByUUID(uuid)
assert node is not None, 'No node found for uuid ' + uuid_str(uuid)
self._setCell(offset, node, state)
logging.debug('partition table updated (ptid=%s)', ptid)
self.log()
self.logUpdated()
if not all(cell.isReadable() for cell in readable_list):
logging.warning(self._first_outdated_message)
def filled(self):
return self.num_filled_rows == self.np
def logUpdated(self):
logging.debug('partition table updated (ptid=%s)', self._id)
self.log()
def log(self):
logging.debug(self.format())
......
......@@ -260,10 +260,9 @@ class Application(BaseApplication):
def broadcastPartitionChanges(self, cell_list):
"""Broadcast a Notify Partition Changes packet."""
logging.debug('broadcastPartitionChanges')
if cell_list:
self.pt.log()
ptid = self.pt.setNextID()
self.pt.logUpdated()
packet = Packets.NotifyPartitionChanges(ptid, cell_list)
for node in self.nm.getIdentifiedList():
if node.isRunning() and not node.isMaster():
......
......@@ -16,6 +16,7 @@
from collections import defaultdict
import neo.lib.pt
from neo.lib import logging
from neo.lib.protocol import CellStates, ZERO_TID
......@@ -278,6 +279,9 @@ class PartitionTable(neo.lib.pt.PartitionTable):
to serve. This allows a cluster restart.
"""
change_list = []
fully_readable = all(cell.isReadable()
for row in self.partition_list
for cell in row)
for offset, row in enumerate(self.partition_list):
lost = lost_node
cell_list = []
......@@ -292,6 +296,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
cell.setState(CellStates.OUT_OF_DATE)
change_list.append((offset, cell.getUUID(),
CellStates.OUT_OF_DATE))
if fully_readable and change_list:
logging.warning(self._first_outdated_message)
return change_list
def iterNodeCell(self, node):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment