Commit f4a5fe3b authored by Julien Muchembled's avatar Julien Muchembled

reflink: new tool, mainly for Garbage Collection

parent 6aa46516
...@@ -27,12 +27,7 @@ A NEO cluster is composed of the following types of nodes: ...@@ -27,12 +27,7 @@ A NEO cluster is composed of the following types of nodes:
Well... Something needing to store/load data in a NEO cluster. Well... Something needing to store/load data in a NEO cluster.
ZODB API is fully implemented except: ZODB API is fully implemented, except blobs.
- pack: only old revisions of objects are removed (it should be possible
to use `zc.zodbdgc <https://pypi.python.org/pypi/zc.zodbdgc>`_
for garbage collection)
- blobs: not implemented (not considered yet)
Any ZODB like FileStorage can be converted to NEO instantaneously, Any ZODB like FileStorage can be converted to NEO instantaneously,
which means the database is operational before all data are imported. which means the database is operational before all data are imported.
...@@ -175,6 +170,14 @@ Note also that you can't mix non-SSL nodes and SSL nodes, even between a ...@@ -175,6 +170,14 @@ Note also that you can't mix non-SSL nodes and SSL nodes, even between a
upstream cluster and a backup one. In doing so, connections can get stuck, upstream cluster and a backup one. In doing so, connections can get stuck,
or fail with malformed packets or SSL handshake errors. or fail with malformed packets or SSL handshake errors.
Pack
----
The implementation of ZODB pack in NEO is a bit special. NEO itself only
implements deletion of historical data, i.e. no Garbage Collection.
A separate tool called `reflink` is provided in order to perform GC in a
more efficient way than `zc.zodbdgc <https://pypi.python.org/pypi/zc.zodbdgc>`_.
Only 1 log file per process Only 1 log file per process
--------------------------- ---------------------------
......
...@@ -160,6 +160,8 @@ class ClientBackupServiceHandler(ClientReadOnlyServiceHandler): ...@@ -160,6 +160,8 @@ class ClientBackupServiceHandler(ClientReadOnlyServiceHandler):
# like in MasterHandler but returns backup_tid instead of last_tid # like in MasterHandler but returns backup_tid instead of last_tid
def askLastTransaction(self, conn): def askLastTransaction(self, conn):
# XXX: It may return a value that does not point to
# an existing transaction.
assert self.app.backup_tid is not None # we are in BACKUPING mode assert self.app.backup_tid is not None # we are in BACKUPING mode
backup_tid = self.app.pt.getBackupTid(min) backup_tid = self.app.pt.getBackupTid(min)
conn.answer(Packets.AnswerLastTransaction(backup_tid)) conn.answer(Packets.AnswerLastTransaction(backup_tid))
This diff is collapsed.
...@@ -24,6 +24,8 @@ from cPickle import dumps ...@@ -24,6 +24,8 @@ from cPickle import dumps
from email import message_from_string from email import message_from_string
from itertools import count from itertools import count
from functools import partial, wraps from functools import partial, wraps
from urllib import urlencode
from urlparse import urlunsplit
from zlib import decompress from zlib import decompress
import transaction, ZODB import transaction, ZODB
import neo.admin.app, neo.master.app, neo.storage.app import neo.admin.app, neo.master.app, neo.storage.app
...@@ -856,6 +858,15 @@ class NEOCluster(object): ...@@ -856,6 +858,15 @@ class NEOCluster(object):
def __exit__(self, t, v, tb): def __exit__(self, t, v, tb):
self.stop(None) self.stop(None)
def zurl(self):
q = [] if self.compress else [('compress', 'false')]
if self.SSL:
q += zip(('ca', 'cert', 'key'), self.SSL)
return urlunsplit(('neo', '%s@%s' % (
self.name,
self.master_nodes.replace(' ', ','),
), '', urlencode(q), ''))
def resetNeoCTL(self): def resetNeoCTL(self):
self.neoctl = NeoCTL(self.admin.getVirtualAddress(), ssl=self.SSL) self.neoctl = NeoCTL(self.admin.getVirtualAddress(), ssl=self.SSL)
...@@ -880,13 +891,11 @@ class NEOCluster(object): ...@@ -880,13 +891,11 @@ class NEOCluster(object):
ClusterStates.RUNNING, ClusterStates.BACKINGUP) ClusterStates.RUNNING, ClusterStates.BACKINGUP)
def notifyClusterInformation(release, orig, handler, conn, state): def notifyClusterInformation(release, orig, handler, conn, state):
orig(handler, conn, state) orig(handler, conn, state)
if state in expected_state: if state in expected_state and handler.app is self.admin_list[0]:
release() release()
with Serialized.until(MasterEventHandler, with Serialized.until(MasterEventHandler,
sendPartitionTable=sendPartitionTable) as tic1, \ sendPartitionTable=sendPartitionTable) as tic1, \
Serialized.until(RecoveryManager, dispatch=dispatch) as tic2, \ Serialized.until(RecoveryManager, dispatch=dispatch) as tic2:
Serialized.until(MasterEventHandler,
notifyClusterInformation=notifyClusterInformation) as tic3:
for node in master_list: for node in master_list:
node.start() node.start()
for node in self.admin_list: for node in self.admin_list:
...@@ -896,6 +905,9 @@ class NEOCluster(object): ...@@ -896,6 +905,9 @@ class NEOCluster(object):
node.start() node.start()
tic2() tic2()
if not recovering: if not recovering:
with Serialized.until(MasterEventHandler,
notifyClusterInformation=notifyClusterInformation,
) as tic3:
self.startCluster() self.startCluster()
tic3() tic3()
self.checkStarted(expected_state, storage_list) self.checkStarted(expected_state, storage_list)
...@@ -1061,6 +1073,10 @@ class NEOCluster(object): ...@@ -1061,6 +1073,10 @@ class NEOCluster(object):
txn = transaction.TransactionManager() txn = transaction.TransactionManager()
return txn, (self.db if db is None else db).open(txn) return txn, (self.db if db is None else db).open(txn)
def emptyCache(self, conn):
self.client._cache.clear()
conn.cacheMinimize()
def moduloTID(self, partition): def moduloTID(self, partition):
"""Force generation of TIDs that will be stored in given partition""" """Force generation of TIDs that will be stored in given partition"""
partition = p64(partition) partition = p64(partition)
...@@ -1158,6 +1174,8 @@ class NEOThreadedTest(NeoTestBase): ...@@ -1158,6 +1174,8 @@ class NEOThreadedTest(NeoTestBase):
def run(self): def run(self):
try: try:
self.__result = apply(*self.__target) self.__result = apply(*self.__target)
except SystemExit:
self.__result = None
except: except:
self.__exc_info = sys.exc_info() self.__exc_info = sys.exc_info()
if self.__exc_info[0] is NEOThreadedTest.failureException: if self.__exc_info[0] is NEOThreadedTest.failureException:
......
...@@ -1738,8 +1738,7 @@ class Test(NEOThreadedTest): ...@@ -1738,8 +1738,7 @@ class Test(NEOThreadedTest):
if 1: if 1:
t, c = cluster.getTransaction() t, c = cluster.getTransaction()
m2c, = cluster.master.getConnectionList(cluster.client) m2c, = cluster.master.getConnectionList(cluster.client)
cluster.client._cache.clear() cluster.emptyCache(c)
c.cacheMinimize()
if not hasattr(sys, 'getrefcount'): # PyPy if not hasattr(sys, 'getrefcount'): # PyPy
# See persistent commit ff64867cca3179b1a6379c93b6ef90db565da36c # See persistent commit ff64867cca3179b1a6379c93b6ef90db565da36c
import gc; gc.collect() import gc; gc.collect()
......
This diff is collapsed.
neo/scripts/reflink.py
\ No newline at end of file
...@@ -53,6 +53,7 @@ extras_require = { ...@@ -53,6 +53,7 @@ extras_require = {
'client': zodb_require, 'client': zodb_require,
'ctl': [], 'ctl': [],
'master': [], 'master': [],
'reflink': zodb_require,
'storage-sqlite': [], 'storage-sqlite': [],
'storage-mysqldb': ['mysqlclient'], 'storage-mysqldb': ['mysqlclient'],
'storage-pymysql': ['PyMySQL'], 'storage-pymysql': ['PyMySQL'],
...@@ -105,6 +106,7 @@ setup( ...@@ -105,6 +106,7 @@ setup(
'neostorage=neo.scripts.neostorage:main', 'neostorage=neo.scripts.neostorage:main',
'neotestrunner=neo.scripts.runner:main', 'neotestrunner=neo.scripts.runner:main',
'neosimple=neo.scripts.simple:main', 'neosimple=neo.scripts.simple:main',
'reflink=neo.scripts.reflink:main',
'stat_zodb=neo.tests.stat_zodb:main', 'stat_zodb=neo.tests.stat_zodb:main',
], ],
'zodburi.resolvers': [ 'zodburi.resolvers': [
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment