Commit 8a645d9f authored by Julien Muchembled's avatar Julien Muchembled

tweak: do not touch cells of nodes that are intended to be dropped

parent c2c9e99d
...@@ -180,7 +180,8 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -180,7 +180,8 @@ class PartitionTable(neo.lib.pt.PartitionTable):
few readable cells, some cells are instead marked as FEEDING. This is few readable cells, some cells are instead marked as FEEDING. This is
a preliminary step to drop these nodes, otherwise the partition table a preliminary step to drop these nodes, otherwise the partition table
could become non-operational. could become non-operational.
- Other nodes must have the same number of cells, off by 1. In fact, the code touching these cells is disabled (see NOTE below).
- Other nodes must have the same number of non-feeding cells, off by 1.
- When a transaction creates new objects (oids are roughly allocated - When a transaction creates new objects (oids are roughly allocated
sequentially), we expect better performance by maximizing the number sequentially), we expect better performance by maximizing the number
of involved nodes (i.e. parallelizing writes). of involved nodes (i.e. parallelizing writes).
...@@ -417,6 +418,22 @@ class PartitionTable(neo.lib.pt.PartitionTable): ...@@ -417,6 +418,22 @@ class PartitionTable(neo.lib.pt.PartitionTable):
outdated_list[offset] -= 1 outdated_list[offset] -= 1
for offset, cell in cell_dict.iteritems(): for offset, cell in cell_dict.iteritems():
discard_list[offset].append(cell) discard_list[offset].append(cell)
# NOTE: The following line disables the next 2 lines, which actually
# causes cells in drop_list to be discarded, now or later;
# drop_list could be renamed into ignore_list.
# 1. Deleting data partition per partition is a lot of work, so
# why ask nodes in drop_list to do that when the goal is
# simply to trash the whole underlying database?
# 2. By excluding nodes from a tweak, it becomes possible to have
# parts of the partition table that are tweaked differently.
# This may require to temporarily change the number of
# replicas for the part being tweaked. In the future, this
# number may be specified in the 'tweak' command, to avoid
# race conditions with setUpToDate().
# Overall, a common use case is when importing a ZODB to NEO,
# to keep the initial importing node up until the database is
# split and replicated to the final nodes.
drop_list = {}
for offset, drop_list in drop_list.iteritems(): for offset, drop_list in drop_list.iteritems():
discard_list[offset] += drop_list discard_list[offset] += drop_list
# We have sorted cells to discard in order to first deallocate nodes # We have sorted cells to discard in order to first deallocate nodes
......
...@@ -180,7 +180,9 @@ class TerminalNeoCTL(object): ...@@ -180,7 +180,9 @@ class TerminalNeoCTL(object):
def tweakPartitionTable(self, params): def tweakPartitionTable(self, params):
""" """
Optimize partition table. Optimize partition table.
No partition will be assigned to specified storage nodes. No change is done to the specified/down storage nodes and they don't
count as replicas. The purpose of listing nodes is usually to drop
them once the data is replicated to other nodes.
Parameters: [node [...]] Parameters: [node [...]]
""" """
return self.neoctl.tweakPartitionTable(map(self.asNode, params)) return self.neoctl.tweakPartitionTable(map(self.asNode, params))
......
...@@ -289,7 +289,9 @@ class MasterPartitionTableTests(NeoUnitTestBase): ...@@ -289,7 +289,9 @@ class MasterPartitionTableTests(NeoUnitTestBase):
pt.addNodeList(sn[1:3]) pt.addNodeList(sn[1:3])
self.assertPartitionTable(pt, 'U..|U..|U..|U..|U..|U..|U..') self.assertPartitionTable(pt, 'U..|U..|U..|U..|U..|U..|U..')
self.update(pt, self.tweak(pt, sn[:1])) self.update(pt, self.tweak(pt, sn[:1]))
self.assertPartitionTable(pt, '.U.|..U|.U.|..U|.U.|..U|.U.') # See note in PartitionTable.tweak() about drop_list.
#self.assertPartitionTable(pt,'.U.|..U|.U.|..U|.U.|..U|.U.')
self.assertPartitionTable(pt, 'UU.|U.U|UU.|U.U|UU.|U.U|UU.')
def test_18_tweakBigPT(self): def test_18_tweakBigPT(self):
seed = repr(time.time()) seed = repr(time.time())
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment