Commit 08742377 authored by Julien Muchembled's avatar Julien Muchembled

More bugfixes to backup mode

- catch OperationFailure
- reset transaction manager when leaving backup mode
- send appropriate target tid to a storage that updates a outdated cell
- clean up partition table when leaving BACKINGUP state unexpectedly
- make sure all readable cells of a partition have the same 'backup_tid'
  if they have the same data, so that we know when internal replication is
  finished when leaving backup mode
- fix storage not finished internal replication when leaving backup mode
parent 92a828dc
......@@ -23,6 +23,9 @@ RC - Review output of pylint (CODE)
Consider the need to implement a keep-alive system (packets sent
automatically when there is no activity on the connection for a period
of time).
- When all cells are OUT_OF_DATE in backup mode, the one with most data
could become UP_TO_DATE with appropriate backup_tid, so that the cluster
stays operational (FEATURE).
- Finish renaming UUID into NID everywhere (CODE)
- Consider using multicast for cluster-wide notifications. (BANDWITH)
Currently, multi-receivers notifications are sent in unicast to each
......
......@@ -26,7 +26,7 @@ except ImportError:
pass
# The protocol version (major, minor).
PROTOCOL_VERSION = (13, 1)
PROTOCOL_VERSION = (14, 1)
# Size restrictions.
MIN_PACKET_SIZE = 10
......@@ -1497,12 +1497,12 @@ class ReplicationDone(Packet):
class Truncate(Packet):
"""
XXX: Used for both make storage consistent and leave backup mode
M -> S
"""
_fmt = PStruct('ask_truncate',
_fmt = PStruct('truncate',
PTID('tid'),
)
_answer = PFEmpty
StaticRegistry = {}
......@@ -1723,7 +1723,7 @@ class Packets(dict):
AddTransaction)
AddObject = register(
AddObject)
AskTruncate, AnswerTruncate = register(
Truncate = register(
Truncate)
def Errors():
......
......@@ -279,10 +279,6 @@ class Application(object):
try:
while True:
poll(1)
except OperationFailure:
# If not operational, send Stop Operation packets to storage
# nodes and client nodes. Abort connections to client nodes.
logging.critical('No longer operational')
except StateChangedException, e:
if e.args[0] != ClusterStates.STARTING_BACKUP:
raise
......@@ -337,13 +333,20 @@ class Application(object):
self.runManager(RecoveryManager)
while True:
self.runManager(VerificationManager)
if self.backup_tid:
if self.backup_app is None:
raise RuntimeError("No upstream cluster to backup"
" defined in configuration")
self.backup_app.provideService()
else:
try:
if self.backup_tid:
if self.backup_app is None:
raise RuntimeError("No upstream cluster to backup"
" defined in configuration")
self.backup_app.provideService()
# Reset connection with storages (and go through a
# recovery phase) when leaving backup mode in order
# to get correct last oid/tid.
self.runManager(RecoveryManager)
continue
self.provideService()
except OperationFailure:
logging.critical('No longer operational')
for node in self.nm.getIdentifiedList():
if node.isStorage() or node.isClient():
node.notify(Packets.StopOperation())
......@@ -463,8 +466,6 @@ class Application(object):
while self.tm.hasPending():
self.em.poll(1)
except OperationFailure:
# If not operational, send Stop Operation packets to storage
# nodes and client nodes. Abort connections to client nodes.
logging.critical('No longer operational')
logging.info("asking remaining nodes to shutdown")
......
......@@ -21,9 +21,10 @@ from neo.lib import logging
from neo.lib.bootstrap import BootstrapManager
from neo.lib.connector import getConnectorHandler
from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler
from neo.lib.node import NodeManager
from neo.lib.protocol import CellStates, ClusterStates, NodeTypes, Packets
from neo.lib.protocol import uuid_str, INVALID_TID, ZERO_TID
from neo.lib.protocol import CellStates, ClusterStates, \
NodeStates, NodeTypes, Packets, uuid_str, INVALID_TID, ZERO_TID
from neo.lib.util import add64, dump
from .app import StateChangedException
from .pt import PartitionTable
......@@ -144,26 +145,32 @@ class BackupApplication(object):
while pt.getCheckTid(xrange(pt.getPartitions())) < tid:
poll(1)
last_tid = app.getLastTransaction()
handler = EventHandler(app)
if tid < last_tid:
assert tid != ZERO_TID
logging.warning("Truncating at %s (last_tid was %s)",
dump(app.backup_tid), dump(last_tid))
p = Packets.AskTruncate(tid)
connection_list = []
for node in app.nm.getStorageList(only_identified=True):
conn = node.getConnection()
conn.ask(p)
connection_list.append(conn)
for conn in connection_list:
while conn.isPending():
poll(1)
app.setLastTransaction(tid)
# XXX: We want to go through a recovery phase in order to
# initialize the transaction manager, but this is only
# possible if storages already know that we left backup
# mode. To that purpose, we always send a Truncate packet,
# even if there's nothing to truncate.
p = Packets.Truncate(tid)
for node in app.nm.getStorageList(only_identified=True):
conn = node.getConnection()
conn.setHandler(handler)
node.setState(NodeStates.TEMPORARILY_DOWN)
# Packets will be sent at the beginning of the recovery
# phase.
conn.notify(p)
conn.abort()
# If any error happened before reaching this line, we'd go back
# to backup mode, which is the right mode to recover.
del app.backup_tid
break
finally:
del self.primary_partition_dict, self.tid_list
pt.clearReplicating()
def nodeLost(self, node):
getCellList = self.app.pt.getCellList
......@@ -205,9 +212,25 @@ class BackupApplication(object):
node_list = []
for cell in pt.getCellList(offset, readable=True):
node = cell.getNode()
assert node.isConnected()
assert cell.backup_tid < last_max_tid or \
cell.backup_tid == prev_tid
assert node.isConnected(), node
if cell.backup_tid == prev_tid:
# Let's given 4 TID t0,t1,t2,t3: if a cell is only
# modified by t0 & t3 and has all data for t0, 4 values
# are possible for its 'backup_tid' until it replicates
# up to t3: t0, t1, t2 or t3 - 1
# Choosing the smallest one (t0) is easier to implement
# but when leaving backup mode, we would always lose
# data if the last full transaction does not modify
# all partitions. t1 is wrong for the same reason.
# So we have chosen the highest one (t3 - 1).
# t2 should also work but maybe harder to implement.
cell.backup_tid = add64(tid, -1)
logging.debug(
"partition %u: updating backup_tid of %r to %s",
offset, cell, dump(cell.backup_tid))
else:
assert cell.backup_tid < last_max_tid, (
cell.backup_tid, last_max_tid, prev_tid, tid)
if app.isStorageReady(node.getUUID()):
node_list.append(node)
assert node_list
......
......@@ -16,7 +16,7 @@
from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler
from neo.lib.protocol import CellStates
from neo.lib.protocol import CellStates, ZERO_TID
class BackupHandler(EventHandler):
"""Handler dedicated to upstream master during BACKINGUP state"""
......@@ -39,7 +39,10 @@ class BackupHandler(EventHandler):
def answerLastTransaction(self, conn, tid):
app = self.app
app.invalidatePartitions(tid, set(xrange(app.pt.getPartitions())))
if tid != ZERO_TID:
app.invalidatePartitions(tid, set(xrange(app.pt.getPartitions())))
else: # upstream DB is empty
assert app.app.getLastTransaction() == tid
def invalidateObjects(self, conn, tid, oid_list):
app = self.app
......
......@@ -48,6 +48,8 @@ class IdentificationHandler(MasterHandler):
handler = app.client_service_handler
human_readable_node_type = ' client '
elif node_type == NodeTypes.STORAGE:
if app.cluster_state == ClusterStates.STOPPING_BACKUP:
raise NotReadyError
node_ctor = app.nm.createStorage
manager = app._current_manager
if manager is None:
......
......@@ -58,9 +58,13 @@ class StorageServiceHandler(BaseServiceHandler):
app.backup_tid))
def askUnfinishedTransactions(self, conn):
tm = self.app.tm
pending_list = tm.registerForNotification(conn.getUUID())
last_tid = tm.getLastTID()
app = self.app
if app.backup_tid:
last_tid = app.pt.getBackupTid()
pending_list = ()
else:
last_tid = app.tm.getLastTID()
pending_list = app.tm.registerForNotification(conn.getUUID())
p = Packets.AnswerUnfinishedTransactions(last_tid, pending_list)
conn.answer(p)
......@@ -99,9 +103,6 @@ class StorageServiceHandler(BaseServiceHandler):
logging.debug("%s is up for offset %s", node, offset)
self.app.broadcastPartitionChanges(cell_list)
def answerTruncate(self, conn):
pass
def answerPack(self, conn, status):
app = self.app
if app.packing is not None:
......
......@@ -311,11 +311,20 @@ class PartitionTable(neo.lib.pt.PartitionTable):
for cell in row
if cell.isReadable())
def clearReplicating(self):
for row in self.partition_list:
for cell in row:
try:
del cell.replicating
except AttributeError:
pass
def setBackupTidDict(self, backup_tid_dict):
for row in self.partition_list:
for cell in row:
cell.backup_tid = backup_tid_dict.get(cell.getUUID(),
ZERO_TID)
if cell.isReadable():
cell.backup_tid = backup_tid_dict.get(cell.getUUID(),
ZERO_TID)
def getBackupTid(self):
try:
......
......@@ -312,6 +312,9 @@ class Application(object):
_poll()
finally:
del self.task_queue
# XXX: Although no handled exception should happen between
# replicator.populate() and the beginning of this 'try'
# clause, the replicator should be reset in a safer place.
self.replicator = Replicator(self)
# Abort any replication, whether we are feeding or out-of-date.
for node in self.nm.getStorageList(only_identified=True):
......
......@@ -436,7 +436,7 @@ class DatabaseManager(object):
def truncate(self, tid):
assert tid not in (None, ZERO_TID), tid
assert self.getBackupTID()
self.setBackupTID(tid)
self.setBackupTID(None) # XXX
for partition in xrange(self.getNumPartitions()):
self._deleteRange(partition, tid)
self.commit()
......
......@@ -68,10 +68,10 @@ class MasterOperationHandler(BaseMasterHandler):
dict((p, a and (a, upstream_name))
for p, a in source_dict.iteritems()))
def askTruncate(self, conn, tid):
def truncate(self, conn, tid):
self.app.replicator.cancel()
self.app.dm.truncate(tid)
conn.answer(Packets.AnswerTruncate())
conn.close()
def checkPartition(self, conn, *args):
self.app.checker(*args)
......@@ -354,16 +354,17 @@ class Replicator(object):
self.getCurrentConnection().close()
def stop(self):
d = None, None
# Close any open connection to an upstream storage,
# possibly aborting current replication.
node = self.current_node
if node is not None is node.getUUID():
self.cancel()
# Cancel all replication orders from upstream cluster.
for offset in self.replicate_dict.keys():
addr, name = self.source_dict.get(offset, d)
addr, name = self.source_dict.get(offset, (None, None))
if name:
tid = self.replicate_dict.pop(offset)
logging.info('cancel replication of partition %u from %r'
' up to %s', offset, addr, dump(tid))
# Close any open connection to an upstream storage,
# possibly aborting current replication.
node = self.current_node
if node is not None is node.getUUID():
self.cancel()
# Make UP_TO_DATE cells really UP_TO_DATE
self._nextPartition()
......@@ -20,6 +20,7 @@ import time
import threading
import transaction
import unittest
from collections import defaultdict
from functools import wraps
from neo.lib import logging
from neo.storage.checker import CHECK_COUNT
......@@ -200,6 +201,10 @@ class ReplicationTests(NEOThreadedTest):
try:
upstream.start()
importZODB = upstream.importZODB(random=random)
# Do not start with an empty DB so that 'primary_dict' below is not
# empty on the first iteration.
importZODB(1)
upstream.client.setPoll(0)
backup = NEOCluster(partitions=np, replicas=2, storage_count=4,
upstream=upstream)
try:
......@@ -214,11 +219,10 @@ class ReplicationTests(NEOThreadedTest):
p = Patch(upstream.master.tm,
_on_commit=onTransactionCommitted)
else:
primary_dict = {}
primary_dict = defaultdict(list)
for k, v in sorted(backup.master.backup_app
.primary_partition_dict.iteritems()):
primary_dict.setdefault(storage_list.index(v._uuid),
[]).append(k)
primary_dict[storage_list.index(v._uuid)].append(k)
if event % 2:
storage = slave(primary_dict).pop()
else:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment