pax_global_header 0000666 0000000 0000000 00000000064 13465024610 0014513 g ustar 00root root 0000000 0000000 52 comment=bd5ba87ae6e0b8169b0da15fed0e32e56e7964af
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/ 0000775 0000000 0000000 00000000000 13465024610 0021243 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/ 0000775 0000000 0000000 00000000000 13465024610 0022024 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0024123 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/admin/ 0000775 0000000 0000000 00000000000 13465024610 0023114 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/admin/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0025213 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/admin/app.py 0000664 0000000 0000000 00000011214 13465024610 0024245 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.app import BaseApplication, buildOptionParser
from neo.lib.connection import ListeningConnection
from neo.lib.exception import PrimaryFailure
from .handler import AdminEventHandler, MasterEventHandler, \
MasterRequestEventHandler
from neo.lib.bootstrap import BootstrapManager
from neo.lib.protocol import ClusterStates, Errors, NodeTypes, Packets
from neo.lib.debug import register as registerLiveDebugger
@buildOptionParser
class Application(BaseApplication):
"""The storage node application."""
@classmethod
def _buildOptionParser(cls):
_ = cls.option_parser
_.description = "NEO Admin node"
cls.addCommonServerOptions('admin', '127.0.0.1:9999')
_ = _.group('admin')
_.int('i', 'nid',
help="specify an NID to use for this process (testing purpose)")
def __init__(self, config):
super(Application, self).__init__(
config.get('ssl'), config.get('dynamic_master_list'))
for address in config['masters']:
self.nm.createMaster(address=address)
self.name = config['cluster']
self.server = config['bind']
logging.debug('IP address is %s, port is %d', *self.server)
# The partition table is initialized after getting the number of
# partitions.
self.pt = None
self.uuid = config.get('nid')
logging.node(self.name, self.uuid)
self.request_handler = MasterRequestEventHandler(self)
self.master_event_handler = MasterEventHandler(self)
self.cluster_state = None
self.reset()
registerLiveDebugger(on_log=self.log)
def close(self):
self.listening_conn = None
super(Application, self).close()
def reset(self):
self.master_conn = None
self.master_node = None
def log(self):
self.em.log()
self.nm.log()
if self.pt is not None:
self.pt.log()
def run(self):
try:
self._run()
except Exception:
logging.exception('Pre-mortem data:')
self.log()
logging.flush()
raise
def _run(self):
"""Make sure that the status is sane and start a loop."""
if len(self.name) == 0:
raise RuntimeError, 'cluster name must be non-empty'
# Make a listening port.
handler = AdminEventHandler(self)
self.listening_conn = ListeningConnection(self, handler, self.server)
while self.cluster_state != ClusterStates.STOPPING:
self.connectToPrimary()
try:
while True:
self.em.poll(1)
except PrimaryFailure:
logging.error('primary master is down')
self.listening_conn.close()
while not self.em.isIdle():
self.em.poll(1)
def connectToPrimary(self):
"""Find a primary master node, and connect to it.
If a primary master node is not elected or ready, repeat
the attempt of a connection periodically.
Note that I do not accept any connection from non-master nodes
at this stage.
"""
self.cluster_state = None
# search, find, connect and identify to the primary master
bootstrap = BootstrapManager(self, NodeTypes.ADMIN, self.server)
self.master_node, self.master_conn = bootstrap.getPrimaryConnection()
# passive handler
self.master_conn.setHandler(self.master_event_handler)
self.master_conn.ask(Packets.AskClusterState())
def sendPartitionTable(self, conn, min_offset, max_offset, uuid):
pt = self.pt
if max_offset == 0:
max_offset = pt.getPartitions()
try:
row_list = map(pt.getRow, xrange(min_offset, max_offset))
except IndexError:
conn.send(Errors.ProtocolError('invalid partition table offset'))
else:
conn.answer(Packets.AnswerPartitionList(
pt.getID(), pt.getReplicas(), row_list))
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/admin/handler.py 0000664 0000000 0000000 00000011605 13465024610 0025106 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging, protocol
from neo.lib.handler import EventHandler
from neo.lib.protocol import uuid_str, Packets
from neo.lib.pt import PartitionTable
from neo.lib.exception import PrimaryFailure
def check_primary_master(func):
def wrapper(self, *args, **kw):
if self.app.master_conn is not None:
return func(self, *args, **kw)
raise protocol.NotReadyError('Not connected to a primary master.')
return wrapper
def forward_ask(klass):
return check_primary_master(lambda self, conn, *args, **kw:
self.app.master_conn.ask(klass(*args, **kw),
conn=conn, msg_id=conn.getPeerId()))
class AdminEventHandler(EventHandler):
"""This class deals with events for administrating cluster."""
@check_primary_master
def askPartitionList(self, conn, min_offset, max_offset, uuid):
logging.info("ask partition list from %s to %s for %s",
min_offset, max_offset, uuid_str(uuid))
self.app.sendPartitionTable(conn, min_offset, max_offset, uuid)
@check_primary_master
def askNodeList(self, conn, node_type):
if node_type is None:
node_type = 'all'
node_filter = None
else:
node_filter = lambda n: n.getType() is node_type
logging.info("ask list of %s nodes", node_type)
node_list = self.app.nm.getList(node_filter)
node_information_list = [node.asTuple() for node in node_list ]
p = Packets.AnswerNodeList(node_information_list)
conn.answer(p)
@check_primary_master
def askClusterState(self, conn):
conn.answer(Packets.AnswerClusterState(self.app.cluster_state))
@check_primary_master
def askPrimary(self, conn):
master_node = self.app.master_node
conn.answer(Packets.AnswerPrimary(master_node.getUUID()))
@check_primary_master
def flushLog(self, conn):
self.app.master_conn.send(Packets.FlushLog())
super(AdminEventHandler, self).flushLog(conn)
askLastIDs = forward_ask(Packets.AskLastIDs)
askLastTransaction = forward_ask(Packets.AskLastTransaction)
addPendingNodes = forward_ask(Packets.AddPendingNodes)
askRecovery = forward_ask(Packets.AskRecovery)
tweakPartitionTable = forward_ask(Packets.TweakPartitionTable)
setClusterState = forward_ask(Packets.SetClusterState)
setNodeState = forward_ask(Packets.SetNodeState)
setNumReplicas = forward_ask(Packets.SetNumReplicas)
checkReplicas = forward_ask(Packets.CheckReplicas)
truncate = forward_ask(Packets.Truncate)
repair = forward_ask(Packets.Repair)
class MasterEventHandler(EventHandler):
""" This class is just used to dispatch message to right handler"""
def _connectionLost(self, conn):
app = self.app
if app.listening_conn: # if running
assert app.master_conn in (conn, None)
conn.cancelRequests("connection to master lost")
app.reset()
app.uuid = None
raise PrimaryFailure
def connectionFailed(self, conn):
self._connectionLost(conn)
def connectionClosed(self, conn):
self._connectionLost(conn)
def dispatch(self, conn, packet, kw={}):
if 'conn' in kw:
# expected answer
if packet.isResponse():
packet.setId(kw['msg_id'])
kw['conn'].answer(packet)
else:
self.app.request_handler.dispatch(conn, packet, kw)
else:
# unexpected answers and notifications
super(MasterEventHandler, self).dispatch(conn, packet, kw)
def answerClusterState(self, conn, state):
self.app.cluster_state = state
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
pt = self.app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def notifyClusterInformation(self, conn, cluster_state):
self.app.cluster_state = cluster_state
class MasterRequestEventHandler(EventHandler):
""" This class handle all answer from primary master node"""
# XXX: to be deleted ?
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/ 0000775 0000000 0000000 00000000000 13465024610 0023302 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/Storage.py 0000664 0000000 0000000 00000022072 13465024610 0025263 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from ZODB import BaseStorage, ConflictResolution, POSException
from ZODB.POSException import ConflictError, UndoError
from zope.interface import implementer
import ZODB.interfaces
from neo.lib import logging
from .app import Application
from .exception import NEOStorageNotFoundError, NEOStorageDoesNotExistError
def raiseReadOnlyError(*args, **kw):
raise POSException.ReadOnlyError()
@implementer(
ZODB.interfaces.IStorage,
# ZODB.interfaces.IStorageRestoreable,
ZODB.interfaces.IStorageIteration,
ZODB.interfaces.IStorageUndoable,
ZODB.interfaces.IExternalGC,
ZODB.interfaces.ReadVerifyingStorage,
)
class Storage(BaseStorage.BaseStorage,
ConflictResolution.ConflictResolvingStorage):
"""Wrapper class for neoclient."""
def __init__(self, master_nodes, name, read_only=False,
compress=None, logfile=None, _app=None, **kw):
"""
Do not pass those parameters (used internally):
_app
"""
if compress is None:
compress = True
if logfile:
logging.setup(logfile)
BaseStorage.BaseStorage.__init__(self, 'NEOStorage(%s)' % (name, ))
# Warning: _is_read_only is used in BaseStorage, do not rename it.
self._is_read_only = read_only
if read_only:
for method_id in (
'new_oid',
'tpc_begin',
'tpc_vote',
'tpc_abort',
'store',
'deleteObject',
'undo',
'undoLog',
):
setattr(self, method_id, raiseReadOnlyError)
if _app is None:
ssl = [kw.pop(x, None) for x in ('ca', 'cert', 'key')]
_app = Application(master_nodes, name, compress=compress,
ssl=ssl if any(ssl) else None, **kw)
self.app = _app
@property
def _cache(self):
return self.app._cache
def load(self, oid, version=''):
# XXX: interface definition states that version parameter is
# mandatory, while some ZODB tests do not provide it. For now, make
# it optional.
assert version == '', 'Versions are not supported'
try:
return self.app.load(oid)[:2]
except NEOStorageNotFoundError:
raise POSException.POSKeyError(oid)
except Exception:
logging.exception('oid=%r', oid)
raise
def new_oid(self):
try:
return self.app.new_oid()
except Exception:
logging.exception('')
raise
def tpc_begin(self, transaction, tid=None, status=' '):
"""
Note: never blocks in NEO.
"""
try:
return self.app.tpc_begin(self, transaction, tid, status)
except Exception:
logging.exception('transaction=%r, tid=%r', transaction, tid)
raise
def tpc_vote(self, transaction):
try:
return self.app.tpc_vote(transaction)
except ConflictError:
raise
except Exception:
logging.exception('transaction=%r', transaction)
raise
def tpc_abort(self, transaction):
try:
return self.app.tpc_abort(transaction)
except Exception:
logging.exception('transaction=%r', transaction)
raise
def tpc_finish(self, transaction, f=None):
try:
return self.app.tpc_finish(transaction, f)
except Exception:
logging.exception('transaction=%r', transaction)
raise
def store(self, oid, serial, data, version, transaction):
assert version == '', 'Versions are not supported'
try:
return self.app.store(oid, serial, data, version, transaction)
except ConflictError:
raise
except Exception:
logging.exception('oid=%r, serial=%r, transaction=%r',
oid, serial, transaction)
raise
def deleteObject(self, oid, serial, transaction):
try:
self.app.store(oid, serial, None, None, transaction)
except Exception:
logging.exception('oid=%r, serial=%r, transaction=%r',
oid, serial, transaction)
raise
# multiple revisions
def loadSerial(self, oid, serial):
try:
return self.app.load(oid, serial)[0]
except NEOStorageNotFoundError:
raise POSException.POSKeyError(oid)
except Exception:
logging.exception('oid=%r, serial=%r', oid, serial)
raise
def loadBefore(self, oid, tid):
try:
return self.app.load(oid, None, tid)
except NEOStorageDoesNotExistError:
raise POSException.POSKeyError(oid)
except NEOStorageNotFoundError:
return None
except Exception:
logging.exception('oid=%r, tid=%r', oid, tid)
raise
@property
def iterator(self):
return self.app.iterator
# undo
def undo(self, transaction_id, txn):
try:
return self.app.undo(transaction_id, txn)
except (ConflictError, UndoError):
raise
except Exception:
logging.exception('transaction_id=%r, txn=%r', transaction_id, txn)
raise
def undoLog(self, first=0, last=-20, filter=None):
try:
return self.app.undoLog(first, last, filter)
except Exception:
logging.exception('first=%r, last=%r', first, last)
raise
def supportsUndo(self):
return True
def loadEx(self, oid, version):
try:
data, serial, _ = self.app.load(oid)
except NEOStorageNotFoundError:
raise POSException.POSKeyError(oid)
except Exception:
logging.exception('oid=%r', oid)
raise
return data, serial, ''
def __len__(self):
try:
return self.app.getObjectCount()
except Exception:
logging.exception('')
raise
def registerDB(self, db, limit=None):
self.app.registerDB(db, limit)
def history(self, oid, *args, **kw):
try:
return self.app.history(oid, *args, **kw)
except NEOStorageNotFoundError:
raise POSException.POSKeyError(oid)
except Exception:
logging.exception('oid=%r', oid)
raise
def sync(self):
return self.app.sync()
def copyTransactionsFrom(self, source, verbose=False):
""" Zope compliant API """
try:
return self.app.importFrom(self, source)
except Exception:
logging.exception('source=%r', source)
raise
def pack(self, t, referencesf, gc=False):
if gc:
logging.warning('Garbage Collection is not available in NEO,'
' please use an external tool. Packing without GC.')
try:
self.app.pack(t)
except Exception:
logging.exception('pack_time=%r', t)
raise
def lastSerial(self):
# seems unused
raise NotImplementedError
def lastTransaction(self):
# Used in ZODB unit tests
return self.app.last_tid
def _clear_temp(self):
raise NotImplementedError
def set_max_oid(self, possible_new_max_oid):
# seems used only by FileStorage
raise NotImplementedError
def close(self):
# WARNING: This does not handle the case where an app is shared by
# several Storage instances, but this is something that only
# happens in threaded tests (and this method is not called on
# extra Storages).
app = self.app
if app is not None:
self.app = None
app.close()
def getTid(self, oid):
try:
return self.app.getLastTID(oid)
except NEOStorageNotFoundError:
raise KeyError
except Exception:
logging.exception('oid=%r', oid)
raise
def checkCurrentSerialInTransaction(self, oid, serial, transaction):
try:
self.app.checkCurrentSerialInTransaction(oid, serial, transaction)
except Exception:
logging.exception('oid=%r, serial=%r, transaction=%r',
oid, serial, transaction)
raise
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/__init__.py 0000664 0000000 0000000 00000007257 13465024610 0025426 0 ustar 00root root 0000000 0000000 ##############################################################################
#
# Copyright (C) 2001, 2002 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
def patch():
# For msgpack & Py2/ZODB5.
try:
from zodbpickle import binary
binary._pack = bytes.__str__
except ImportError:
pass
from hashlib import md5
from ZODB.Connection import Connection
H = lambda f: md5(f.func_code.co_code).hexdigest()
# Allow serial to be returned as late as tpc_finish
#
# This makes possible for storage to allocate serial inside tpc_finish,
# removing the requirement to serialise second commit phase (tpc_vote
# to tpc_finish/tpc_abort).
h = H(Connection.tpc_finish)
def tpc_finish(self, transaction):
"""Indicate confirmation that the transaction is done."""
def callback(tid):
if self._mvcc_storage:
# Inter-connection invalidation is not needed when the
# storage provides MVCC.
return
d = dict.fromkeys(self._modified)
self._db.invalidate(tid, d, self)
# It's important that the storage calls the passed function
# while it still has its lock. We don't want another thread
# to be able to read any updated data until we've had a chance
# to send an invalidation message to all of the other
# connections!
#
serial = self._storage.tpc_finish(transaction, callback)
if serial is not None:
assert isinstance(serial, bytes), repr(serial)
for oid_iterator in (self._modified, self._creating):
for oid in oid_iterator:
obj = self._cache.get(oid, None)
# Ignore missing objects and don't update ghosts.
if obj is not None and obj._p_changed is not None:
obj._p_changed = 0
obj._p_serial = serial
#
self._tpc_cleanup()
global OLD_ZODB
OLD_ZODB = h in (
'ab9b1b8d82c40e5fffa84f7bc4ea3a8b', # Python 2.7
)
if OLD_ZODB:
Connection.tpc_finish = tpc_finish
elif hasattr(Connection, '_handle_serial'): # merged upstream ?
assert hasattr(Connection, '_warn_about_returned_serial')
# sync() is used to provide a "network barrier", which is required for
# NEO & ZEO to make sure our view of the storage includes all changes done
# so far by other clients. But a round-trip to the server introduces
# latency so it must not be done when it's not useful. Note also that a
# successful commit (which ends with a response from the master) already
# acts as a "network barrier".
# BBB: What this monkey-patch does has been merged in ZODB5.
if not hasattr(Connection, '_flush_invalidations'):
return
assert H(Connection.afterCompletion) in (
'cd3a080b80fd957190ff3bb867149448', # Python 2.7
)
def afterCompletion(self, *ignored):
self._readCurrent.clear()
# PATCH: do not call sync()
self._flush_invalidations()
Connection.afterCompletion = afterCompletion
patch()
import app # set up signal handlers early enough to do it in the main thread
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/app.py 0000664 0000000 0000000 00000127215 13465024610 0024444 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import heapq
import random
import time
from collections import defaultdict
try:
from ZODB._compat import dumps, loads, _protocol
except ImportError:
from cPickle import dumps, loads
_protocol = 1
from ZODB.POSException import UndoError, ConflictError, ReadConflictError
from . import OLD_ZODB
if OLD_ZODB:
from ZODB.ConflictResolution import ResolvedSerial
from persistent.TimeStamp import TimeStamp
from neo.lib import logging
from neo.lib.compress import decompress_list, getCompress
from neo.lib.protocol import NodeTypes, Packets, \
INVALID_PARTITION, MAX_TID, ZERO_HASH, ZERO_TID
from neo.lib.util import makeChecksum, dump
from neo.lib.locking import Empty, Lock
from neo.lib.connection import MTClientConnection, ConnectionClosed
from neo.lib.exception import NodeNotReady
from .exception import (NEOStorageError, NEOStorageCreationUndoneError,
NEOStorageReadRetry, NEOStorageNotFoundError, NEOPrimaryMasterLost)
from .handlers import storage, master
from neo.lib.threaded_app import ThreadedApplication
from .cache import ClientCache
from .transactions import TransactionContainer
from neo.lib.util import p64, u64, parseMasterList
CHECKED_SERIAL = object()
# How long before we might retry a connection to a node to which connection
# failed in the past.
MAX_FAILURE_AGE = 600
try:
from Signals.Signals import SignalHandler
except ImportError:
SignalHandler = None
if SignalHandler:
import signal
SignalHandler.registerHandler(signal.SIGUSR2, logging.reopen)
class Application(ThreadedApplication):
"""The client node application."""
# For tests only. Do not touch. We want tpc_finish to always recover when
# the transaction is really committed, no matter for how long the master
# is unreachable.
max_reconnection_to_master = float('inf')
def __init__(self, master_nodes, name, compress=True, cache_size=None,
**kw):
super(Application, self).__init__(parseMasterList(master_nodes),
name, **kw)
# Internal Attributes common to all thread
self._db = None
self.primary_master_node = None
self.trying_master_node = None
# no self-assigned NID, primary master will supply us one
self._cache = ClientCache() if cache_size is None else \
ClientCache(max_size=cache_size)
self._loading_oid = None
self.new_oids = ()
self.last_oid = '\0' * 8
self.storage_event_handler = storage.StorageEventHandler(self)
self.storage_bootstrap_handler = storage.StorageBootstrapHandler(self)
self.storage_handler = storage.StorageAnswersHandler(self)
self.primary_handler = master.PrimaryAnswersHandler(self)
self.primary_bootstrap_handler = master.PrimaryBootstrapHandler(self)
self.notifications_handler = master.PrimaryNotificationsHandler( self)
self._txn_container = TransactionContainer()
# Lock definition :
# _load_lock is used to make loading and storing atomic
lock = Lock()
self._load_lock_acquire = lock.acquire
self._load_lock_release = lock.release
# _oid_lock is used in order to not call multiple oid
# generation at the same time
lock = Lock()
self._oid_lock_acquire = lock.acquire
self._oid_lock_release = lock.release
lock = Lock()
# _cache_lock is used for the client cache
self._cache_lock_acquire = lock.acquire
self._cache_lock_release = lock.release
# _connecting_to_master_node is used to prevent simultaneous master
# node connection attempts
self._connecting_to_master_node = Lock()
# same for storage nodes
self._connecting_to_storage_node = Lock()
self._node_failure_dict = {}
self.compress = getCompress(compress)
def __getattr__(self, attr):
if attr in ('last_tid', 'pt'):
self._getMasterConnection()
# XXX: There's still a risk that we get disconnected from the
# master at this precise moment and for 'pt', we'd raise
# AttributeError. Should we catch it and loop until it
# succeeds?
return self.__getattribute__(attr)
def log(self):
super(Application, self).log()
logging.info("%r", self._cache)
for txn_context in self._txn_container.itervalues():
logging.info("%r", txn_context)
@property
def txn_contexts(self):
# do not iter lazily to avoid race condition
return self._txn_container.values
def _waitAnyMessage(self, queue, block=True):
"""
Handle all pending packets.
block
If True (default), will block until at least one packet was
received.
"""
pending = self.dispatcher.pending
get = queue.get
_handlePacket = self._handlePacket
while pending(queue):
try:
conn, packet, kw = get(block)
except Empty:
break
block = False
try:
_handlePacket(conn, packet, kw)
except (ConnectionClosed, NEOStorageReadRetry):
# We also catch NEOStorageReadRetry for ObjectUndoSerial.
pass
def _waitAnyTransactionMessage(self, txn_context, block=True):
"""
Just like _waitAnyMessage, but for per-transaction exchanges, rather
than per-thread.
"""
queue = txn_context.queue
self.setHandlerData(txn_context)
try:
self._waitAnyMessage(queue, block=block)
finally:
# Don't leave access to thread context, even if a raise happens.
self.setHandlerData(None)
if txn_context.conflict_dict:
self._handleConflicts(txn_context)
def _askStorage(self, conn, packet, **kw):
""" Send a request to a storage node and process its answer """
return self._ask(conn, packet, handler=self.storage_handler, **kw)
def _askPrimary(self, packet, **kw):
""" Send a request to the primary master and process its answer """
return self._ask(self._getMasterConnection(), packet,
handler=self.primary_handler, **kw)
def _getMasterConnection(self):
""" Connect to the primary master node on demand """
# For performance reasons, get 'master_conn' without locking.
result = self.master_conn
if result is None:
# If not connected, 'master_conn' must be tested again while we have
# the lock, to avoid concurrent threads reconnecting.
with self._connecting_to_master_node:
result = self.master_conn
if result is None:
self.new_oids = ()
result = self.master_conn = self._connectToPrimaryNode()
return result
def _connectToPrimaryNode(self):
"""
Lookup for the current primary master node
"""
logging.debug('connecting to primary master...')
self.start()
index = -1
fail_count = 0
ask = self._ask
handler = self.primary_bootstrap_handler
while 1:
self.ignore_invalidations = True
# Get network connection to primary master
while fail_count < self.max_reconnection_to_master:
self.nm.reset()
if self.primary_master_node is not None:
# If I know a primary master node, pinpoint it.
node = self.primary_master_node
self.primary_master_node = None
else:
# Otherwise, check one by one.
master_list = self.nm.getMasterList()
if not master_list:
# XXX: On shutdown, it already happened that this list
# is empty, leading to ZeroDivisionError. This
# looks a minor issue so let's wait to have more
# information.
logging.error('%r', self.__dict__)
index = (index + 1) % len(master_list)
node = master_list[index]
# Connect to master
conn = MTClientConnection(self,
self.notifications_handler,
node=node,
dispatcher=self.dispatcher)
p = Packets.RequestIdentification(NodeTypes.CLIENT,
self.uuid, None, self.name, None, (), ())
try:
ask(conn, p, handler=handler)
except ConnectionClosed:
fail_count += 1
else:
self.primary_master_node = node
break
else:
raise NEOPrimaryMasterLost(
"Too many connection failures to the primary master")
logging.info('Connected to %s', self.primary_master_node)
try:
# Request identification and required informations to be
# operational. Might raise ConnectionClosed so that the new
# primary can be looked-up again.
logging.info('Initializing from master')
ask(conn, Packets.AskLastTransaction(), handler=handler)
if self.pt.operational():
break
except ConnectionClosed:
logging.error('Connection to %s lost', self.trying_master_node)
self.primary_master_node = None
fail_count += 1
logging.info("Connected and ready")
return conn
def getStorageConnection(self, node):
conn = node._connection # XXX
if node.isRunning() if conn is None else not node._identified:
with self._connecting_to_storage_node:
conn = node._connection # XXX
if conn is None:
return self._connectToStorageNode(node)
return conn
def _connectToStorageNode(self, node):
if self.master_conn is None:
raise NEOPrimaryMasterLost
conn = MTClientConnection(self, self.storage_event_handler, node,
dispatcher=self.dispatcher)
p = Packets.RequestIdentification(NodeTypes.CLIENT,
self.uuid, None, self.name, self.id_timestamp, (), ())
try:
self._ask(conn, p, handler=self.storage_bootstrap_handler)
except ConnectionClosed:
logging.error('Connection to %r failed', node)
except NodeNotReady:
logging.info('%r not ready', node)
else:
logging.info('Connected %r', node)
# Make sure this node will be considered for the next reads
# even if there was a previous recent failure.
self._node_failure_dict.pop(node.getUUID(), None)
return conn
self._node_failure_dict[node.getUUID()] = time.time() + MAX_FAILURE_AGE
def getCellSortKey(self, cell, random=random.random):
# Prefer a node that didn't fail recently.
failure = self._node_failure_dict.get(cell.getUUID())
if failure:
if time.time() < failure:
# Or order by date of connection failure.
return failure
# Do not use 'del' statement: we didn't lock, so another
# thread might have removed uuid from _node_failure_dict.
self._node_failure_dict.pop(cell.getUUID(), None)
# A random one, connected or not, is a trivial and quite efficient way
# to distribute the load evenly. On write accesses, a client connects
# to all nodes of touched cells, but before that, or if a client is
# specialized to only do read-only accesses, it should not limit
# itself to only use the first connected nodes.
return random()
def registerDB(self, db, limit):
self._db = db
def getDB(self):
return self._db
def new_oid(self):
"""Get a new OID."""
self._oid_lock_acquire()
try:
for oid in self.new_oids:
break
else:
# Get new oid list from master node
# we manage a list of oid here to prevent
# from asking too many time new oid one by one
# from master node
self._askPrimary(Packets.AskNewOIDs(100))
for oid in self.new_oids:
break
else:
raise NEOStorageError('new_oid failed')
self.last_oid = oid
return oid
finally:
self._oid_lock_release()
def getObjectCount(self):
# return the last OID used, this is inaccurate
return int(u64(self.last_oid))
def _askStorageForRead(self, object_id, packet, askStorage=None):
pt = self.pt
# BBB: On Py2, it can be a subclass of bytes (binary from zodbpickle).
if isinstance(object_id, bytes):
object_id = pt.getPartition(object_id)
if askStorage is None:
askStorage = self._askStorage
# Failure condition with minimal overhead: most of the time, only the
# following line is executed. In case of storage errors, we retry each
# node at least once, without looping forever.
failed = 0
while 1:
cell_list = pt.getCellList(object_id, True)
cell_list.sort(key=self.getCellSortKey)
for cell in cell_list:
node = cell.getNode()
conn = self.getStorageConnection(node)
if conn is not None:
try:
return askStorage(conn, packet)
except ConnectionClosed:
pass
except NEOStorageReadRetry, e:
if e.args[0]:
continue
failed += 1
if not pt.filled():
raise NEOPrimaryMasterLost
if len(cell_list) < failed: # too many failures
raise NEOStorageError('no storage available')
# Do not retry too quickly, for example
# when there's an incoming PT update.
self.sync()
def load(self, oid, tid=None, before_tid=None):
"""
Internal method which manage load, loadSerial and loadBefore.
OID and TID (serial) parameters are expected packed.
oid
OID of object to get.
tid
If given, the exact serial at which OID is desired.
before_tid should be None.
before_tid
If given, the excluded upper bound serial at which OID is desired.
serial should be None.
Return value: (3-tuple)
- Object data (None if object creation was undone).
- Serial of given data.
- Next serial at which object exists, or None. Only set when tid
parameter is not None.
Exceptions:
NEOStorageError
technical problem
NEOStorageNotFoundError
object exists but no data satisfies given parameters
NEOStorageDoesNotExistError
object doesn't exist
NEOStorageCreationUndoneError
object existed, but its creation was undone
Note that loadSerial is used during conflict resolution to load
object's current version, which is not visible to us normally (it was
committed after our snapshot was taken).
"""
# TODO:
# - rename parameters (here? and in handlers & packet definitions)
acquire = self._cache_lock_acquire
release = self._cache_lock_release
# XXX: Consider using a more fine-grained lock.
self._load_lock_acquire()
try:
acquire()
try:
result = self._loadFromCache(oid, tid, before_tid)
if result:
return result
self._loading_oid = oid
self._loading_invalidated = []
finally:
release()
# While the cache lock is released, an arbitrary number of
# invalidations may be processed, for this oid or not. And at this
# precise moment, if both tid and before_tid are None (which is
# unlikely to happen with recent ZODB), self.last_tid can be any
# new tid. Since we can get any serial from storage, fixing
# next_tid requires to keep a list of all possible serials.
# When not bound to a ZODB Connection, load() may be the
# first method called and last_tid may still be None.
# This happens, for example, when opening the DB.
if not (tid or before_tid) and self.last_tid:
# Do not get something more recent than the last invalidation
# we got from master.
before_tid = p64(u64(self.last_tid) + 1)
data, tid, next_tid, _ = self._loadFromStorage(oid, tid, before_tid)
acquire()
try:
if self._loading_oid:
if not next_tid:
for t in self._loading_invalidated:
if tid < t:
next_tid = t
break
self._cache.store(oid, data, tid, next_tid)
# Else, we just reconnected to the master.
finally:
release()
finally:
self._load_lock_release()
return data, tid, next_tid
def _loadFromStorage(self, oid, at_tid, before_tid):
def askStorage(conn, packet):
tid, next_tid, compression, checksum, data, data_tid \
= self._askStorage(conn, packet)
if data or checksum != ZERO_HASH:
if checksum != makeChecksum(data):
logging.error('wrong checksum from %s for oid %s',
conn, dump(oid))
raise NEOStorageReadRetry(False)
return (decompress_list[compression](data),
tid, next_tid, data_tid)
raise NEOStorageCreationUndoneError(dump(oid))
return self._askStorageForRead(oid,
Packets.AskObject(oid, at_tid, before_tid),
askStorage)
def _loadFromCache(self, oid, at_tid=None, before_tid=None):
"""
Load from local cache, return None if not found.
"""
if at_tid:
result = self._cache.load(oid, at_tid + '*')
assert not result or result[1] == at_tid
return result
return self._cache.load(oid, before_tid)
def tpc_begin(self, storage, transaction, tid=None, status=' '):
"""Begin a new transaction."""
# First get a transaction, only one is allowed at a time
txn_context = self._txn_container.new(transaction)
# use the given TID or request a new one to the master
answer_ttid = self._askPrimary(Packets.AskBeginTransaction(tid))
if answer_ttid is None:
raise NEOStorageError('tpc_begin failed')
assert tid in (None, answer_ttid), (tid, answer_ttid)
txn_context.Storage = storage
txn_context.ttid = answer_ttid
def store(self, oid, serial, data, version, transaction):
"""Store object."""
logging.debug('storing oid %s serial %s', dump(oid), dump(serial))
if not serial: # BBB
serial = ZERO_TID
self._store(self._txn_container.get(transaction), oid, serial, data)
def _store(self, txn_context, oid, serial, data, data_serial=None):
ttid = txn_context.ttid
if data is None:
# This is some undo: either a no-data object (undoing object
# creation) or a back-pointer to an earlier revision (going back to
# an older object revision).
compressed_data = ''
compression = 0
checksum = ZERO_HASH
else:
assert data_serial is None
size, compression, compressed_data = self.compress(data)
checksum = makeChecksum(compressed_data)
txn_context.data_size += size
# Store object in tmp cache
packet = Packets.AskStoreObject(oid, serial, compression,
checksum, compressed_data, data_serial, ttid)
txn_context.data_dict[oid] = data, serial, txn_context.write(
self, packet, oid, oid=oid, serial=serial)
while txn_context.data_size >= self._cache.max_size:
self._waitAnyTransactionMessage(txn_context)
self._waitAnyTransactionMessage(txn_context, False)
def _handleConflicts(self, txn_context):
data_dict = txn_context.data_dict
pop_conflict = txn_context.conflict_dict.popitem
resolved_dict = txn_context.resolved_dict
tryToResolveConflict = txn_context.Storage.tryToResolveConflict
while 1:
# We iterate over conflict_dict, and clear it,
# because new items may be added by calls to _store.
# This is also done atomically, to avoid race conditions
# with PrimaryNotificationsHandler.notifyDeadlock
try:
oid, serial = pop_conflict()
except KeyError:
return
try:
data, old_serial, _ = data_dict.pop(oid)
except KeyError:
assert oid is None, (oid, serial)
# Storage refused us from taking object lock, to avoid a
# possible deadlock. TID is actually used for some kind of
# "locking priority": when a higher value has the lock,
# this means we stored objects "too late", and we would
# otherwise cause a deadlock.
# To recover, we must ask storages to release locks we
# hold (to let possibly-competing transactions acquire
# them), and requeue our already-sent store requests.
ttid = txn_context.ttid
logging.info('Deadlock avoidance triggered for TXN %s'
' with new locking TID %s', dump(ttid), dump(serial))
txn_context.locking_tid = serial
packet = Packets.AskRebaseTransaction(ttid, serial)
for uuid in txn_context.conn_dict:
self._askStorageForWrite(txn_context, uuid, packet)
else:
if data is CHECKED_SERIAL:
raise ReadConflictError(oid=oid,
serials=(serial, old_serial))
# TODO: data can be None if a conflict happens during undo
if data:
txn_context.data_size -= len(data)
if self.last_tid < serial:
self.sync() # possible late invalidation (very rare)
try:
data = tryToResolveConflict(oid, serial, old_serial, data)
except ConflictError:
logging.info(
'Conflict resolution failed for %s@%s with %s',
dump(oid), dump(old_serial), dump(serial))
# With recent ZODB, get_pickle_metadata (from ZODB.utils)
# does not support empty values, so do not pass 'data'
# in this case.
raise ConflictError(oid=oid, serials=(serial, old_serial),
data=data or None)
else:
logging.info(
'Conflict resolution succeeded for %s@%s with %s',
dump(oid), dump(old_serial), dump(serial))
# Mark this conflict as resolved
resolved_dict[oid] = serial
# Try to store again
self._store(txn_context, oid, serial, data)
def _askStorageForWrite(self, txn_context, uuid, packet):
conn = txn_context.conn_dict[uuid]
try:
return conn.ask(packet, queue=txn_context.queue)
except AttributeError:
if conn is not None:
raise
except ConnectionClosed:
txn_context.conn_dict[uuid] = None
def waitResponses(self, queue):
"""Wait for all requests to be answered (or their connection to be
detected as closed)"""
pending = self.dispatcher.pending
_waitAnyMessage = self._waitAnyMessage
while pending(queue):
_waitAnyMessage(queue)
def waitStoreResponses(self, txn_context):
queue = txn_context.queue
pending = self.dispatcher.pending
_waitAnyTransactionMessage = self._waitAnyTransactionMessage
while pending(queue):
_waitAnyTransactionMessage(txn_context)
if txn_context.data_dict:
raise NEOStorageError('could not store/check all oids')
def tpc_vote(self, transaction):
"""Store current transaction."""
txn_context = self._txn_container.get(transaction)
self.waitStoreResponses(txn_context)
ttid = txn_context.ttid
ext = transaction._extension
ext = dumps(ext, _protocol) if ext else ''
# user and description are cast to str in case they're unicode.
# BBB: This is not required anymore with recent ZODB.
packet = Packets.AskStoreTransaction(ttid, str(transaction.user),
str(transaction.description), ext, list(txn_context.cache_dict))
queue = txn_context.queue
conn_dict = txn_context.conn_dict
# Ask in parallel all involved storage nodes to commit object metadata.
# Nodes that store the transaction metadata get a special packet.
trans_nodes = txn_context.write(self, packet, ttid)
packet = Packets.AskVoteTransaction(ttid)
for uuid in conn_dict:
if uuid not in trans_nodes:
self._askStorageForWrite(txn_context, uuid, packet)
self.waitStoreResponses(txn_context)
if None in conn_dict.itervalues(): # unlikely
# If some writes failed, we must first check whether
# all oids have been locked by at least one node.
failed = {node.getUUID(): node.isRunning()
for node in self.nm.getStorageList()
if conn_dict.get(node.getUUID(), 0) is None}
if txn_context.lockless_dict:
getCellList = self.pt.getCellList
for offset, uuid_set in txn_context.lockless_dict.iteritems():
for cell in getCellList(offset):
uuid = cell.getUUID()
if not (uuid in failed or uuid in uuid_set):
break
else:
# Very unlikely. Instead of raising, we could recover
# the transaction by doing something similar to
# deadlock avoidance; that would be done before voting.
# But it's not worth the complexity.
raise NEOStorageError(
'partition %s not fully write-locked' % offset)
failed = [uuid for uuid, running in failed.iteritems() if running]
# If there are running nodes for which some writes failed, ask the
# master whether they can be disconnected while keeping the cluster
# operational. If possible, this will happen during tpc_finish.
if failed:
try:
self._askPrimary(Packets.FailedVote(ttid, failed))
except ConnectionClosed:
pass
txn_context.voted = True
# We must not go further if connection to master was lost since
# tpc_begin, to lower the probability of failing during tpc_finish.
# IDEA: We can improve in 2 opposite directions:
# - In the case of big transactions, it would be useful to
# also detect failures earlier.
# - If possible, recover from master failure.
if txn_context.error:
raise NEOStorageError(txn_context.error)
if OLD_ZODB:
return [(oid, ResolvedSerial)
for oid in txn_context.resolved_dict]
return txn_context.resolved_dict
def tpc_abort(self, transaction):
"""Abort current transaction."""
txn_context = self._txn_container.pop(transaction)
if txn_context is None:
return
# We want that the involved nodes abort a transaction after any
# other packet sent by the client for this transaction. IOW, if we
# already have a connection with a storage node, potentially with
# a pending write, aborting only via the master may lead to a race
# condition. The consequence would be that storage nodes lock oids
# forever.
p = Packets.AbortTransaction(txn_context.ttid, ())
for conn in txn_context.conn_dict.itervalues():
if conn is not None:
try:
conn.send(p)
except ConnectionClosed:
pass
# Because we want to be sure that the involved nodes are notified,
# we still have to send the full list to the master. Most of the
# time, the storage nodes get 2 AbortTransaction packets, and the
# second one is rarely useful. Another option would be that the
# storage nodes keep a list of aborted transactions, but the
# difficult part would be to avoid a memory leak.
try:
notify = self.master_conn.send
except AttributeError:
pass
else:
try:
notify(Packets.AbortTransaction(txn_context.ttid,
list(txn_context.conn_dict)))
except ConnectionClosed:
pass
# We don't need to flush queue, as it won't be reused by future
# transactions (deleted on next line & indexed by transaction object
# instance).
self.dispatcher.forget_queue(txn_context.queue, flush_queue=False)
def tpc_finish(self, transaction, f=None):
"""Finish current transaction
To avoid inconsistencies between several databases involved in the
same transaction, an IStorage implementation must do its best not to
fail in tpc_finish. In particular, making a transaction permanent
should ideally be as simple as switching a bit permanently.
In NEO, all the data (with the exception of the tid, simply because
it is not known yet) is already flushed on disk at the end on the vote.
During tpc_finish, all nodes storing the transaction metadata are asked
to commit by saving the new tid and flushing again: for SQL backends,
it's just an UPDATE of 1 cell. At last, the metadata is moved to
a final place so that the new transaction is readable, but this is
something that can always be replayed (during the verification phase)
if any failure happens.
"""
txn_container = self._txn_container
if not txn_container.get(transaction).voted:
self.tpc_vote(transaction)
checked_list = []
self._load_lock_acquire()
try:
# Call finish on master
txn_context = txn_container.pop(transaction)
cache_dict = txn_context.cache_dict
checked_list = [oid for oid, data in cache_dict.iteritems()
if data is CHECKED_SERIAL]
for oid in checked_list:
del cache_dict[oid]
ttid = txn_context.ttid
p = Packets.AskFinishTransaction(ttid, list(cache_dict),
checked_list)
try:
tid = self._askPrimary(p, cache_dict=cache_dict, callback=f)
assert tid
except ConnectionClosed:
tid = self._getFinalTID(ttid)
if not tid:
raise
return tid
finally:
self._load_lock_release()
def _getFinalTID(self, ttid):
try:
p = Packets.AskFinalTID(ttid)
while 1:
try:
tid = self._askPrimary(p)
break
except ConnectionClosed:
pass
if tid == MAX_TID:
while 1:
try:
return self._askStorageForRead(ttid, p)
except NEOPrimaryMasterLost:
pass
elif tid:
return tid
except Exception:
logging.exception("Failed to get final tid for TXN %s",
dump(ttid))
def undo(self, undone_tid, txn):
txn_context = self._txn_container.get(txn)
txn_info, txn_ext = self._getTransactionInformation(undone_tid)
# Regroup objects per partition, to ask a minimum set of storage.
partition_oid_dict = defaultdict(list)
for oid in txn_info['oids']:
partition_oid_dict[self.pt.getPartition(oid)].append(oid)
# Ask storage the undo serial (serial at which object's previous data
# is)
getCellList = self.pt.getCellList
getCellSortKey = self.getCellSortKey
getConnForNode = self.getStorageConnection
queue = self._thread_container.queue
ttid = txn_context.ttid
undo_object_tid_dict = {}
snapshot_tid = p64(u64(self.last_tid) + 1)
kw = {
'queue': queue,
'partition_oid_dict': partition_oid_dict,
'undo_object_tid_dict': undo_object_tid_dict,
}
while partition_oid_dict:
for partition, oid_list in partition_oid_dict.iteritems():
cell_list = [cell
for cell in getCellList(partition, readable=True)
# Exclude nodes that may have missed previous resolved
# conflicts. For example, if a network failure happened
# only between the client and the storage, the latter would
# still be readable until we commit.
if txn_context.conn_dict.get(cell.getUUID(), 0) is not None]
storage_conn = getConnForNode(
min(cell_list, key=getCellSortKey).getNode())
storage_conn.ask(Packets.AskObjectUndoSerial(ttid,
snapshot_tid, undone_tid, oid_list),
partition=partition, **kw)
# Wait for all AnswerObjectUndoSerial. We might get
# OidNotFoundError, meaning that objects in transaction's oid_list
# do not exist any longer. This is the symptom of a pack, so forbid
# undoing transaction when it happens.
try:
self.waitResponses(queue)
except NEOStorageNotFoundError:
self.dispatcher.forget_queue(queue)
raise UndoError('non-undoable transaction')
# Send undo data to all storage nodes.
for oid, (current_serial, undo_serial, is_current) in \
undo_object_tid_dict.iteritems():
if is_current:
data = None
else:
# Serial being undone is not the latest version for this
# object. This is an undo conflict, try to resolve it.
try:
# Load the latest version we are supposed to see
if current_serial == ttid:
# XXX: see TODO below
data = txn_context.cache_dict[oid]
else:
data = self.load(oid, current_serial)[0]
# Load the version we were undoing to
undo_data = self.load(oid, undo_serial)[0]
except NEOStorageNotFoundError:
raise UndoError('Object not found while resolving undo '
'conflict')
# Resolve conflict
try:
data = txn_context.Storage.tryToResolveConflict(
oid, current_serial, undone_tid, undo_data, data)
except ConflictError:
raise UndoError('Some data were modified by a later ' \
'transaction', oid)
undo_serial = None
# TODO: The situation is similar to deadlock avoidance.
# Reenable the cache size limit to avoid OOM when there's
# a huge amount conflicting data, and get the data back
# from the storage when it's not in cache_dict anymore.
txn_context.cache_size = - float('inf')
self._store(txn_context, oid, current_serial, data, undo_serial)
self.waitStoreResponses(txn_context)
return None, list(undo_object_tid_dict)
def _getTransactionInformation(self, tid):
return self._askStorageForRead(tid,
Packets.AskTransactionInformation(tid))
def undoLog(self, first, last, filter=None, block=0):
# XXX: undoLog is broken
if last < 0:
# See FileStorage.py for explanation
last = first - last
# First get a list of transactions from all storage nodes.
# Each storage node will return TIDs only for UP_TO_DATE state and
# FEEDING state cells
queue = self._thread_container.queue
packet = Packets.AskTIDs(first, last, INVALID_PARTITION)
tid_set = set()
for storage_node in self.pt.getNodeSet(True):
conn = self.getStorageConnection(storage_node)
if conn is None:
continue
conn.ask(packet, queue=queue, tid_set=tid_set)
# Wait for answers from all storages.
# TODO: Results are incomplete when readable cells move concurrently
# from one storage to another. We detect when this happens and
# retry.
self.waitResponses(queue)
# Reorder tids
ordered_tids = sorted(tid_set, reverse=True)
logging.debug("UndoLog tids %s", map(dump, ordered_tids))
# For each transaction, get info
undo_info = []
append = undo_info.append
for tid in ordered_tids:
(txn_info, txn_ext) = self._getTransactionInformation(tid)
if filter is None or filter(txn_info):
txn_info.pop('packed')
txn_info.pop("oids")
if txn_ext:
txn_info.update(loads(txn_ext))
append(txn_info)
if len(undo_info) >= last - first:
break
# Check we return at least one element, otherwise call
# again but extend offset
if len(undo_info) == 0 and not block:
undo_info = self.undoLog(first=first, last=last*5, filter=filter,
block=1)
return undo_info
def transactionLog(self, start, stop, limit):
tid_list = []
# request a tid list for each partition
for offset in xrange(self.pt.getPartitions()):
r = self._askStorageForRead(offset,
Packets.AskTIDsFrom(start, stop, limit, offset))
if r:
tid_list = list(heapq.merge(tid_list, r))
if len(tid_list) >= limit:
del tid_list[limit:]
stop = tid_list[-1]
# request transactions informations
txn_list = []
append = txn_list.append
tid = None
for tid in tid_list:
(txn_info, txn_ext) = self._getTransactionInformation(tid)
txn_info['ext'] = loads(txn_ext) if txn_ext else {}
append(txn_info)
return (tid, txn_list)
def history(self, oid, size=1, filter=None):
packet = Packets.AskObjectHistory(oid, 0, size)
result = []
# history_list is already sorted descending (by the storage)
for serial, size in self._askStorageForRead(oid, packet):
txn_info, txn_ext = self._getTransactionInformation(serial)
# create history dict
del txn_info['id']
del txn_info['oids']
del txn_info['packed']
txn_info['tid'] = serial
txn_info['version'] = ''
txn_info['size'] = size
if filter is None or filter(txn_info):
result.append(txn_info)
if txn_ext:
txn_info.update(loads(txn_ext))
return result
def importFrom(self, storage, source):
# TODO: The main difference with BaseStorage implementation is that
# preindex can't be filled with the result 'store' (tid only
# known after 'tpc_finish'. This method could be dropped if we
# implemented IStorageRestoreable (a wrapper around source would
# still be required for partial import).
preindex = {}
for transaction in source.iterator():
tid = transaction.tid
self.tpc_begin(storage, transaction, tid, transaction.status)
for r in transaction:
oid = r.oid
try:
pre = preindex[oid]
except KeyError:
try:
pre = self.load(oid)[1]
except NEOStorageNotFoundError:
pre = ZERO_TID
self.store(oid, pre, r.data, r.version, transaction)
preindex[oid] = tid
conflicted = self.tpc_vote(transaction)
assert not conflicted, conflicted
real_tid = self.tpc_finish(transaction)
assert real_tid == tid, (real_tid, tid)
from .iterator import iterator
def sync(self):
self._askPrimary(Packets.Ping())
def pack(self, t):
tid = TimeStamp(*time.gmtime(t)[:5] + (t % 60, )).raw()
if tid == ZERO_TID:
raise NEOStorageError('Invalid pack time')
self._askPrimary(Packets.AskPack(tid))
# XXX: this is only needed to make ZODB unit tests pass.
# It should not be otherwise required (clients should be free to load
# old data as long as it is available in cache, event if it was pruned
# by a pack), so don't bother invalidating on other clients.
self._cache_lock_acquire()
try:
self._cache.clear()
finally:
self._cache_lock_release()
def getLastTID(self, oid):
return self.load(oid)[1]
def checkCurrentSerialInTransaction(self, oid, serial, transaction):
self._checkCurrentSerialInTransaction(
self._txn_container.get(transaction), oid, serial)
def _checkCurrentSerialInTransaction(self, txn_context, oid, serial):
ttid = txn_context.ttid
# ZODB.Connection performs calls 'checkCurrentSerialInTransaction'
# after stores, and skips oids that have been successfully stored.
assert oid not in txn_context.cache_dict, oid
assert oid not in txn_context.data_dict, oid
packet = Packets.AskCheckCurrentSerial(ttid, oid, serial)
txn_context.data_dict[oid] = CHECKED_SERIAL, serial, txn_context.write(
self, packet, oid, oid=oid, serial=serial)
self._waitAnyTransactionMessage(txn_context, False)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/cache.py 0000664 0000000 0000000 00000031423 13465024610 0024722 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2011-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from __future__ import division
import math
from bisect import insort
class CacheItem(object):
__slots__ = ('oid', 'tid', 'next_tid', 'data',
'counter', 'level', 'expire',
'prev', 'next')
def __repr__(self):
s = ''
for attr in self.__slots__:
try:
value = getattr(self, attr)
if value:
if attr in ('prev', 'next'):
s += ' %s=<...>' % attr
continue
elif attr == 'data':
value = '...'
s += ' %s=%r' % (attr, value)
except AttributeError:
pass
return '<%s%s>' % (self.__class__.__name__, s)
def __lt__(self, other):
return self.tid < other.tid
class ClientCache(object):
"""In-memory pickle cache based on Multi-Queue cache algorithm
Multi-Queue algorithm for Second Level Buffer Caches:
https://www.usenix.org/event/usenix01/full_papers/zhou/zhou_html/index.html
Quick description:
- There are multiple "regular" queues, plus a history queue
- The queue to store an object in depends on its access frequency
- The queue an object is in defines its lifespan (higher-index queue eq.
longer lifespan)
-> The more often an object is accessed, the higher lifespan it will
have
- Upon cache or history hit, object frequency is increased and object
might get moved to longer-lived queue
- Each access "ages" objects in cache, and an aging object is moved to
shorter-lived queue as it ages without being accessed, or in the
history queue if it's really too old.
- The history queue only contains items with counter > 0
"""
__slots__ = ('max_size', '_life_time', '_max_history_size',
'_queue_list', '_oid_dict', '_time', '_size', '_history_size',
'_nhit', '_nmiss')
def __init__(self, life_time=10000, max_history_size=100000,
max_size=20*1024*1024):
self._life_time = life_time
self._max_history_size = max_history_size
self.max_size = max_size
self.clear()
def clear(self):
"""Reset cache"""
self._queue_list = [None] # first is history
self._oid_dict = {}
self._time = 0
self._size = 0
self._history_size = 0
self._nhit = self._nmiss = 0
def __repr__(self):
nload = self._nhit + self._nmiss
return ("<%s #loads=%s #oids=%s size=%s time=%s queue_length=%r"
" (life_time=%s max_history_size=%s max_size=%s)>") % (
self.__class__.__name__,
nload and '%s (%.3g%% hit)' % (nload, 100 * self._nhit / nload),
len(self._oid_dict), self._size, self._time,
[self._history_size] + [
sum(1 for _ in self._iterQueue(level))
for level in xrange(1, len(self._queue_list))],
self._life_time, self._max_history_size, self.max_size)
def _iterQueue(self, level):
"""for debugging purpose"""
if level < len(self._queue_list):
# Lockless iteration of the queue.
# XXX: In case of race condition, the result is wrong but at least,
# it won't loop endlessly. If one want to collect accurate
# statistics, a lock should be used.
expire = 0
item = self._queue_list[level]
while item and item.level == level and expire < item.expire:
yield item
expire = item.expire
item = item.next
def _remove_from_oid_dict(self, item):
item_list = self._oid_dict[item.oid]
item_list.remove(item)
if not item_list:
del self._oid_dict[item.oid]
def _add(self, item):
level = item.level
try:
head = self._queue_list[level]
except IndexError:
assert len(self._queue_list) == level
self._queue_list.append(item)
item.prev = item.next = item
else:
if head:
item.prev = tail = head.prev
tail.next = head.prev = item
item.next = head
else:
self._queue_list[level] = item
item.prev = item.next = item
if level:
item.expire = self._time + self._life_time
else:
self._empty(item)
self._history_size += 1
if self._max_history_size < self._history_size:
self._remove(head)
self._remove_from_oid_dict(head)
def _empty(self, item):
self._size -= len(item.data)
item.data = None
def _remove(self, item):
level = item.level
if level is not None:
if level:
item.level = level - 1
else:
self._history_size -= 1
next = item.next
if next is item:
self._queue_list[level] = next = None
else:
item.prev.next = next
next.prev = item.prev
if self._queue_list[level] is item:
self._queue_list[level] = next
return next
def _fetched(self, item, _log=math.log):
self._remove(item)
item.counter = counter = item.counter + 1
# XXX It might be better to adjust the level according to the object
# size. See commented factor for example.
item.level = 1 + int(_log(counter, 2)
# * (1.01 - len(item.data) / self.max_size)
)
self._add(item)
self._time = time = self._time + 1
for head in self._queue_list[1:]:
if head and head.expire < time:
self._remove(head)
if head.level or head.counter:
self._add(head)
else:
self._empty(head)
self._remove_from_oid_dict(head)
break
def _load(self, oid, before_tid=None):
item_list = self._oid_dict.get(oid)
if item_list:
if before_tid:
for item in reversed(item_list):
if item.tid < before_tid:
next_tid = item.next_tid
if next_tid and next_tid < before_tid:
break
return item
else:
item = item_list[-1]
if not item.next_tid:
return item
def load(self, oid, before_tid=None):
"""Return a revision of oid that was current before given tid"""
item = self._load(oid, before_tid)
if item:
data = item.data
if data is not None:
self._nhit += 1
self._fetched(item)
return data, item.tid, item.next_tid
self._nmiss += 1
def store(self, oid, data, tid, next_tid):
"""Store a new data record in the cache"""
size = len(data)
max_size = self.max_size
if size < max_size:
item = self._load(oid, next_tid)
if item:
# We don't handle late invalidations for cached oids, because
# the caller is not supposed to explicitly asks for tids after
# app.last_tid (and the cache should be empty when app.last_tid
# is still None).
assert item.tid == tid, (item, tid)
if item.level: # already stored
assert item.next_tid == next_tid and item.data == data
return
assert not item.data
# Possible case of late invalidation.
item.next_tid = next_tid
else:
item = CacheItem()
item.oid = oid
item.tid = tid
item.next_tid = next_tid
item.counter = 0
item.level = None
try:
item_list = self._oid_dict[oid]
except KeyError:
self._oid_dict[oid] = [item]
else:
if next_tid:
insort(item_list, item)
else:
prev = item_list[-1]
assert prev.next_tid <= tid, (prev, item)
item.counter = prev.counter
if prev.level:
prev.counter = 0
if prev.level > 1:
self._fetched(prev)
item_list.append(item)
else:
self._remove(prev)
item_list[-1] = item
item.data = data
self._fetched(item)
self._size += size
if max_size < self._size:
for head in self._queue_list[1:]:
while head:
next = self._remove(head)
if head.counter:
head.level = 0
self._add(head)
else:
self._empty(head)
self._remove_from_oid_dict(head)
if self._size <= max_size:
return
head = next
def invalidate(self, oid, tid):
"""Mark data record as being valid only up to given tid"""
try:
item = self._oid_dict[oid][-1]
except KeyError:
pass
else:
if item.next_tid is None:
item.next_tid = tid
else:
assert item.next_tid <= tid, (item, oid, tid)
def clear_current(self):
for oid, item_list in self._oid_dict.items():
item = item_list[-1]
if item.next_tid is None:
if item.level:
self._empty(item)
self._remove(item)
del item_list[-1]
# We don't preserve statistics of removed items. This could be
# done easily when previous versions are cached, by copying
# counters, but it would not be fair for other oids, so it's
# probably not worth it.
if not item_list:
del self._oid_dict[oid]
def test(self):
cache = ClientCache()
repr(cache)
self.assertEqual(cache.load(1, 10), None)
self.assertEqual(cache.load(1, None), None)
cache.invalidate(1, 10)
data = '5', 5, 10
# 2 identical stores happens if 2 threads got a cache miss at the same time
cache.store(1, *data)
cache.store(1, *data)
self.assertEqual(cache.load(1, 10), data)
self.assertEqual(cache.load(1, None), None)
data = '15', 15, None
cache.store(1, *data)
self.assertEqual(cache.load(1, None), data)
cache.clear_current()
self.assertEqual(cache._size, 1)
self.assertEqual(cache.load(1, None), None)
cache.store(1, *data)
cache.invalidate(1, 20)
self.assertEqual(cache._size, 3)
cache.clear_current()
self.assertEqual(cache._size, 3)
self.assertEqual(cache.load(1, 20), ('15', 15, 20))
cache.store(1, '10', 10, 15)
cache.store(1, '20', 20, 21)
self.assertEqual([5, 10, 15, 20], [x.tid for x in cache._oid_dict[1]])
self.assertRaises(AssertionError, cache.store, 1, '20', 20, None)
repr(cache)
map(repr, cache._queue_list)
# Test late invalidations.
cache.clear()
cache.store(1, '10*', 10, None)
cache.max_size = cache._size
cache.store(2, '10', 10, 15)
self.assertEqual(cache._queue_list[0].oid, 1)
cache.store(2, '15', 15, None)
self.assertEqual(cache._queue_list[2].oid, 2)
data = '10', 10, 15
cache.store(1, *data)
self.assertEqual(cache.load(1, 15), data)
self.assertEqual(1, cache._history_size)
cache.clear_current()
self.assertEqual(0, cache._history_size)
if __name__ == '__main__':
import unittest
unittest.TextTestRunner().run(type('', (unittest.TestCase,), {
'runTest': test})())
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/component.xml 0000664 0000000 0000000 00000004720 13465024610 0026031 0 ustar 00root root 0000000 0000000
A scalable storage for Zope
Give the list of the master node like ip:port ip:port...
Give the name of the cluster
The value is either of 'boolean' type or an explicit algorithm that
matches the regex 'zlib(=\d+)?', where the optional number is
the compression level.
Any record that is not smaller once compressed is stored uncompressed.
True is the default and its meaning may change over time:
currently, it is the same as 'zlib'.
If true, only reads may be executed against the storage. Note
that the "pack" operation is not considered a write operation
and is still allowed on a read-only neostorage.
Log debugging information to specified SQLite DB.
Storage cache size in bytes. Records are cached uncompressed.
Optional ``KB``, ``MB`` or ``GB`` suffixes can (and usually are)
used to specify units other than bytes.
The file designated by this option contains an updated list of master
nodes which are known to be part of current cluster, so new nodes can
be added/removed without requiring a config change each time.
Certificate authority in PEM format.
Certificate in PEM format.
Private key in PEM format.
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/config.py 0000664 0000000 0000000 00000002201 13465024610 0025114 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from ZODB.config import BaseConfig
class NeoStorage(BaseConfig):
def open(self):
from .Storage import Storage
config = self.config
return Storage(**{k: getattr(config, k)
for k in config.getSectionAttributes()})
def compress(value):
from ZConfig.datatypes import asBoolean
try:
return asBoolean(value)
except ValueError:
from neo.lib.compress import parseOption
return parseOption(value)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/exception.py 0000664 0000000 0000000 00000003502 13465024610 0025652 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from ZODB import POSException
class NEOStorageError(POSException.StorageError):
pass
class NEOStorageReadRetry(NEOStorageError):
pass
class NEOStorageNotFoundError(NEOStorageError):
pass
class NEOStorageDoesNotExistError(NEOStorageNotFoundError):
"""
This error is a refinement of NEOStorageNotFoundError: this means
that some object was not found, but also that it does not exist at all.
"""
class NEOStorageCreationUndoneError(NEOStorageDoesNotExistError):
"""
This error is a refinement of NEOStorageDoesNotExistError: this means that
some object existed at some point, but its creation was undone.
"""
# TODO: Inherit from transaction.interfaces.TransientError
# (not recognized yet by ERP5 as a transient error).
class NEOPrimaryMasterLost(POSException.ReadConflictError):
"""
A read operation to a storage node failed because we get disconnected from
the primary master (which causes all connections to storages to be closed
as well). The connection failure to the master may be temporary so we'd
like to restart the transaction; if it isn't, we'll get failures when
trying to reconnect.
"""
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/handlers/ 0000775 0000000 0000000 00000000000 13465024610 0025102 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/handlers/__init__.py 0000664 0000000 0000000 00000001760 13465024610 0027217 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import handler
from ZODB.POSException import StorageError, ReadOnlyError
class AnswerBaseHandler(handler.AnswerBaseHandler): # XXX
def protocolError(self, conn, message):
raise StorageError("protocol error: %s" % message)
def readOnlyAccess(self, conn, message):
raise ReadOnlyError(message)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/handlers/master.py 0000664 0000000 0000000 00000015463 13465024610 0026760 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.exception import PrimaryElected
from neo.lib.handler import MTEventHandler
from neo.lib.pt import MTPartitionTable as PartitionTable
from neo.lib.protocol import NodeStates
from . import AnswerBaseHandler
from ..exception import NEOStorageError
class PrimaryBootstrapHandler(AnswerBaseHandler):
""" Bootstrap handler used when looking for the primary master """
def answerLastTransaction(*args):
pass
class PrimaryNotificationsHandler(MTEventHandler):
""" Handler that process the notifications from the primary master """
def notPrimaryMaster(self, *args):
try:
super(PrimaryNotificationsHandler, self).notPrimaryMaster(*args)
except PrimaryElected, e:
self.app.primary_master_node, = e.args
def answerLastTransaction(self, conn, ltid):
app = self.app
app_last_tid = app.__dict__.get('last_tid', '')
if app_last_tid != ltid:
# Either we're connecting or we already know the last tid
# via invalidations.
assert app.master_conn is None, app.master_conn
app._cache_lock_acquire()
try:
if app_last_tid < ltid:
app._cache.clear_current()
# In the past, we tried not to invalidate the
# Connection caches entirely, using the list of
# oids that are invalidated by clear_current.
# This was wrong because these caches may have
# entries that are not in the NEO cache anymore.
else:
# The DB was truncated. It happens so
# rarely that we don't need to optimize.
app._cache.clear()
# Make sure a parallel load won't refill the cache
# with garbage.
app._loading_oid = app._loading_invalidated = None
finally:
app._cache_lock_release()
db = app.getDB()
db is None or db.invalidateCache()
app.last_tid = ltid
app.ignore_invalidations = False
def answerTransactionFinished(self, conn, _, tid, callback, cache_dict):
app = self.app
app.last_tid = tid
# Update cache
cache = app._cache
app._cache_lock_acquire()
try:
for oid, data in cache_dict.iteritems():
# Update ex-latest value in cache
cache.invalidate(oid, tid)
if data is not None:
# Store in cache with no next_tid
cache.store(oid, data, tid, None)
if callback is not None:
callback(tid)
finally:
app._cache_lock_release()
def connectionClosed(self, conn):
app = self.app
if app.master_conn is not None:
msg = "connection to primary master node closed"
logging.critical(msg)
app.master_conn = None
for txn_context in app.txn_contexts():
txn_context.error = msg
try:
app.__dict__.pop('pt').clear()
except KeyError:
pass
app.primary_master_node = None
super(PrimaryNotificationsHandler, self).connectionClosed(conn)
def stopOperation(self, conn):
logging.critical("master node ask to stop operation")
def notifyClusterInformation(self, conn, state):
# TODO: on shutdown, abort any transaction that is not voted
logging.info("cluster switching to %s state", state)
def invalidateObjects(self, conn, tid, oid_list):
app = self.app
if app.ignore_invalidations:
return
app.last_tid = tid
app._cache_lock_acquire()
try:
invalidate = app._cache.invalidate
loading = app._loading_oid
for oid in oid_list:
invalidate(oid, tid)
if oid == loading:
app._loading_invalidated.append(tid)
db = app.getDB()
if db is not None:
db.invalidate(tid, oid_list)
finally:
app._cache_lock_release()
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
pt = self.app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def notifyNodeInformation(self, conn, timestamp, node_list):
super(PrimaryNotificationsHandler, self).notifyNodeInformation(
conn, timestamp, node_list)
# XXX: 'update' automatically closes UNKNOWN nodes. Do we really want
# to do the same thing for nodes in other non-running states ?
getByUUID = self.app.nm.getByUUID
for node in node_list:
if node[3] != NodeStates.RUNNING:
node = getByUUID(node[2])
if node and node.isConnected():
node.getConnection().close()
def notifyDeadlock(self, conn, ttid, locking_tid):
for txn_context in self.app.txn_contexts():
if txn_context.ttid == ttid:
txn_context.conflict_dict[None] = locking_tid
txn_context.wakeup(conn)
break
class PrimaryAnswersHandler(AnswerBaseHandler):
""" Handle that process expected packets from the primary master """
def answerBeginTransaction(self, conn, ttid):
self.app.setHandlerData(ttid)
def answerNewOIDs(self, conn, oid_list):
self.app.new_oids = iter(oid_list)
def incompleteTransaction(self, conn, message):
raise NEOStorageError("storage nodes for which vote failed can not be"
" disconnected without making the cluster non-operational")
def answerTransactionFinished(self, conn, _, tid):
self.app.setHandlerData(tid)
def answerPack(self, conn, status):
if not status:
raise NEOStorageError('Already packing')
def answerLastTransaction(self, conn, ltid):
pass
def answerFinalTID(self, conn, tid):
self.app.setHandlerData(tid)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/handlers/storage.py 0000664 0000000 0000000 00000017755 13465024610 0027137 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from ZODB.TimeStamp import TimeStamp
from neo.lib import logging
from neo.lib.compress import decompress_list
from neo.lib.connection import ConnectionClosed
from neo.lib.protocol import Packets, uuid_str, ZERO_TID
from neo.lib.util import dump, makeChecksum
from neo.lib.exception import NodeNotReady
from neo.lib.handler import MTEventHandler
from . import AnswerBaseHandler
from ..transactions import Transaction
from ..exception import NEOStorageError, NEOStorageNotFoundError
from ..exception import NEOStorageReadRetry, NEOStorageDoesNotExistError
class StorageEventHandler(MTEventHandler):
def _acceptIdentification(*args):
pass
class StorageBootstrapHandler(AnswerBaseHandler):
""" Handler used when connecting to a storage node """
def notReady(self, conn, message):
conn.close()
raise NodeNotReady(message)
class StorageAnswersHandler(AnswerBaseHandler):
""" Handle all messages related to ZODB operations """
def answerObject(self, conn, oid, *args):
self.app.setHandlerData(args)
def answerStoreObject(self, conn, conflict, oid, serial):
txn_context = self.app.getHandlerData()
if conflict:
if conflict == ZERO_TID:
txn_context.written(self.app, conn.getUUID(), oid, serial)
return
# Conflicts can not be resolved now because 'conn' is locked.
# We must postpone the resolution (by queuing the conflict in
# 'conflict_dict') to avoid any deadlock with another thread that
# also resolves a conflict successfully to the same storage nodes.
# Warning: if a storage (S1) is much faster than another (S2), then
# we may process entirely a conflict with S1 (i.e. we received the
# answer to the store of the resolved object on S1) before we
# receive the conflict answer from the first store on S2.
logging.info('%s reports a conflict on %s:%s with %s',
uuid_str(conn.getUUID()), dump(oid),
dump(txn_context.ttid), dump(conflict))
# If this conflict is not already resolved, mark it for
# resolution.
if txn_context.resolved_dict.get(oid, '') < conflict:
txn_context.conflict_dict[oid] = conflict
else:
txn_context.written(self.app, conn.getUUID(), oid)
answerCheckCurrentSerial = answerStoreObject
def answerRebaseTransaction(self, conn, oid_list):
txn_context = self.app.getHandlerData()
ttid = txn_context.ttid
queue = txn_context.queue
try:
for oid in oid_list:
# We could have an extra parameter to tell the storage if we
# still have the data, and in this case revert what was done
# in Transaction.written. This would save bandwidth in case of
# conflict.
conn.ask(Packets.AskRebaseObject(ttid, oid),
queue=queue, oid=oid)
except ConnectionClosed:
txn_context.conn_dict[conn.getUUID()] = None
def answerRebaseObject(self, conn, conflict, oid):
if conflict:
txn_context = self.app.getHandlerData()
serial, conflict, data = conflict
assert serial and serial < conflict, (serial, conflict)
resolved = conflict <= txn_context.resolved_dict.get(oid, '')
try:
cached = txn_context.cache_dict.pop(oid)
except KeyError:
if resolved:
# We should still be waiting for an answer from this node,
# unless we lost connection.
assert conn.uuid in txn_context.data_dict[oid][2] or \
txn_context.conn_dict[conn.uuid] is None
return
assert oid in txn_context.data_dict
if serial <= txn_context.conflict_dict.get(oid, ''):
# Another node already reported this conflict or a newer,
# by answering to this rebase or to the previous store.
return
# A node has not answered yet to a previous store. Do not wait
# it to report the conflict because it may fail before.
else:
# The data for this oid are now back on client side.
# Revert what was done in Transaction.written
assert not resolved
if data is None: # undo or CHECKED_SERIAL
data = cached
else:
compression, checksum, data = data
if checksum != makeChecksum(data):
raise NEOStorageError(
'wrong checksum while getting back data for'
' object %s during rebase of transaction %s'
% (dump(oid), dump(txn_context.ttid)))
data = decompress_list[compression](data)
size = len(data)
txn_context.data_size += size
if cached:
assert cached == data
txn_context.cache_size -= size
txn_context.data_dict[oid] = data, serial, []
txn_context.conflict_dict[oid] = conflict
def answerStoreTransaction(self, conn):
pass
answerVoteTransaction = answerStoreTransaction
def connectionClosed(self, conn):
# only called if we were waiting for an answer
txn_context = self.app.getHandlerData()
if type(txn_context) is Transaction:
txn_context.nodeLost(self.app, conn.getUUID())
super(StorageAnswersHandler, self).connectionClosed(conn)
def answerTIDsFrom(self, conn, tid_list):
logging.debug('Get %u TIDs from %r', len(tid_list), conn)
self.app.setHandlerData(tid_list)
def answerTransactionInformation(self, conn, tid,
user, desc, ext, packed, oid_list):
self.app.setHandlerData(({
'time': TimeStamp(tid).timeTime(),
'user_name': user,
'description': desc,
'id': tid,
'oids': oid_list,
'packed': packed,
}, ext))
def answerObjectHistory(self, conn, _, history_list):
# history_list is a list of tuple (serial, size)
self.app.setHandlerData(history_list)
def oidNotFound(self, conn, message):
# This can happen either when :
# - loading an object
# - asking for history
raise NEOStorageNotFoundError(message)
def oidDoesNotExist(self, conn, message):
raise NEOStorageDoesNotExistError(message)
def tidNotFound(self, conn, message):
# This can happen when requiring txn informations
raise NEOStorageNotFoundError(message)
def nonReadableCell(self, conn, message):
logging.info('non readable cell')
raise NEOStorageReadRetry(True)
def answerTIDs(self, conn, tid_list, tid_set):
tid_set.update(tid_list)
def answerObjectUndoSerial(self, conn, object_tid_dict, partition,
partition_oid_dict, undo_object_tid_dict):
del partition_oid_dict[partition]
undo_object_tid_dict.update(object_tid_dict)
def answerFinalTID(self, conn, tid):
self.app.setHandlerData(tid)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/iterator.py 0000664 0000000 0000000 00000005155 13465024610 0025513 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from ZODB import BaseStorage
from neo.lib.protocol import ZERO_TID, MAX_TID
from neo.lib.util import u64, add64
from .exception import NEOStorageCreationUndoneError, NEOStorageNotFoundError
CHUNK_LENGTH = 100
class Record(BaseStorage.DataRecord):
""" BaseStorage Transaction record yielded by the Transaction object """
def __str__(self):
oid = u64(self.oid)
tid = u64(self.tid)
args = (oid, tid, len(self.data), self.data_txn)
return 'Record %s:%s: %s (%s)' % args
class Transaction(BaseStorage.TransactionRecord):
""" Transaction object yielded by the NEO iterator """
def __init__(self, app, txn):
super(Transaction, self).__init__(txn['id'],
'p' if txn['packed'] else ' ',
txn['user_name'], txn['description'], txn['ext'])
self.app = app
self.oid_list = txn['oids']
def __iter__(self):
""" Iterate over the transaction records """
load = self.app._loadFromStorage
for oid in self.oid_list:
try:
data, _, _, data_tid = load(oid, self.tid, None)
except NEOStorageCreationUndoneError:
data = data_tid = None
except NEOStorageNotFoundError:
# Transactions are not updated after a pack, so their object
# will not be found in the database. Skip them.
continue
yield Record(oid, self.tid, data, data_tid)
def __str__(self):
return 'Transaction #%s: %s %s' \
% (u64(self.tid), self.user, self.status)
def iterator(app, start=None, stop=None):
"""NEO transaction iterator"""
if start is None:
start = ZERO_TID
stop = min(stop or MAX_TID, app.last_tid)
while 1:
max_tid, chunk = app.transactionLog(start, stop, CHUNK_LENGTH)
if not chunk:
break # nothing more
for txn in chunk:
yield Transaction(app, txn)
start = add64(max_tid, 1)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/transactions.py 0000664 0000000 0000000 00000016323 13465024610 0026371 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2017-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import defaultdict
from ZODB.POSException import StorageTransactionError
from neo.lib.connection import ConnectionClosed
from neo.lib.locking import SimpleQueue
from neo.lib.protocol import Packets
from neo.lib.util import dump
from .exception import NEOStorageError
@apply
class _WakeupPacket(object):
handler_method_name = 'pong'
_args = ()
getId = int
class Transaction(object):
cache_size = 0 # size of data in cache_dict
data_size = 0 # size of data in data_dict
error = None
locking_tid = None
voted = False
ttid = None # XXX: useless, except for testBackupReadOnlyAccess
lockless_dict = None # {partition: {uuid}}
def __init__(self, txn):
self.queue = SimpleQueue()
self.txn = txn
# data being stored
self.data_dict = {} # {oid: (value, serial, [node_id])}
# data stored: this will go to the cache on tpc_finish
self.cache_dict = {} # {oid: value}
# conflicts to resolve
self.conflict_dict = {} # {oid: serial}
# resolved conflicts
self.resolved_dict = {} # {oid: serial}
# involved storage nodes; connection is None is connection was lost
self.conn_dict = {} # {node_id: connection}
def __repr__(self):
error = self.error
return ("<%s ttid=%s locking_tid=%s voted=%u"
" #queue=%s #writing=%s #written=%s%s>") % (
self.__class__.__name__,
dump(self.ttid), dump(self.locking_tid), self.voted,
len(self.queue._queue), len(self.data_dict), len(self.cache_dict),
' error=%r' % error if error else '')
def wakeup(self, conn):
self.queue.put((conn, _WakeupPacket, {}))
def write(self, app, packet, object_id, **kw):
uuid_list = []
pt = app.pt
conn_dict = self.conn_dict
object_id = pt.getPartition(object_id)
for cell in pt.getCellList(object_id):
node = cell.getNode()
uuid = node.getUUID()
try:
try:
conn = conn_dict[uuid]
except KeyError:
conn = conn_dict[uuid] = app.getStorageConnection(node)
if self.locking_tid and 'oid' in kw:
# A deadlock happened but this node is not aware of it.
# Tell it to write-lock with the same locking tid as
# for the other nodes. The condition on kw is to
# distinguish whether we're writing an oid or
# transaction metadata.
conn.ask(Packets.AskRebaseTransaction(
self.ttid, self.locking_tid), queue=self.queue)
conn.ask(packet, queue=self.queue, **kw)
uuid_list.append(uuid)
except AttributeError:
if conn is not None:
raise
except ConnectionClosed:
conn_dict[uuid] = None
if uuid_list:
return uuid_list
raise NEOStorageError(
'no storage available for write to partition %s' % object_id)
def written(self, app, uuid, oid, lockless=None):
# When a node is being disconnected by the master because it was
# not part of the transaction that caused a conflict, we may receive a
# positive answer (not to be confused with lockless stores) before the
# conflict. Because we have no way to identify such case, we must keep
# the data in self.data_dict until all nodes have answered so we remain
# able to resolve conflicts.
data, serial, uuid_list = self.data_dict[oid]
try:
uuid_list.remove(uuid)
except ValueError:
# The most common case for this exception is because nodeLost()
# tries all oids blindly.
# Another possible case is when we receive several positive answers
# from a node that is being disconnected by the master, whereas the
# first one (at least) should actually be conflict answer.
return
if lockless:
if lockless != serial: # late lockless write
assert lockless < serial, (lockless, serial)
uuid_list.append(uuid)
return
# It's safe to do this after the above excepts: either the cell is
# already marked as lockless or the node will be reported as failed.
lockless = self.lockless_dict
if not lockless:
lockless = self.lockless_dict = defaultdict(set)
lockless[app.pt.getPartition(oid)].add(uuid)
if oid in self.conflict_dict:
# In the case of a rebase, uuid_list may not contain the id
# of the node reporting a conflict.
return
if uuid_list:
return
del self.data_dict[oid]
if type(data) is bytes:
size = len(data)
self.data_size -= size
size += self.cache_size
if size < app._cache.max_size:
self.cache_size = size
else:
# Do not cache data past cache max size, as it
# would just flush it on tpc_finish. This also
# prevents memory errors for big transactions.
data = None
self.cache_dict[oid] = data
def nodeLost(self, app, uuid):
# The following line is sometimes redundant
# with the one in `except ConnectionClosed:` clauses.
self.conn_dict[uuid] = None
for oid in list(self.data_dict):
# Exclude case of 1 conflict error immediately followed by a
# connection loss, possibly with lockless writes to replicas.
if oid not in self.conflict_dict:
self.written(app, uuid, oid)
class TransactionContainer(dict):
# IDEA: Drop this container and use the new set_data/data API on
# transactions (requires transaction >= 1.6).
def pop(self, txn):
return dict.pop(self, id(txn), None)
def get(self, txn):
try:
return self[id(txn)]
except KeyError:
raise StorageTransactionError("unknown transaction %r" % txn)
def new(self, txn):
key = id(txn)
if key in self:
raise StorageTransactionError("commit of transaction %r"
" already started" % txn)
context = self[key] = Transaction(txn)
return context
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/client/zodburi.py 0000664 0000000 0000000 00000006574 13465024610 0025346 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2017-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
"""NEO URI resolver for zodburi
URI format:
neo://name@master1,master2,...,masterN?options
"""
import ZODB.config
import ZConfig
from cStringIO import StringIO
from collections import OrderedDict
from urlparse import urlsplit, parse_qsl
# neo_zconf_options returns set of zconfig options supported by NEO storage
def neo_zconf_options():
neo_schema = """
"""
neo_schema = StringIO(neo_schema)
neo_schema = ZConfig.loadSchemaFile(neo_schema)
neo_storage_zconf = neo_schema.gettype('NeoStorage')
options = {k for k, _ in neo_storage_zconf}
assert 'master_nodes' in options
assert 'name' in options
return options
# canonical_opt_name returns "oPtion_nAme" as "option-name"
def canonical_opt_name(name):
return name.lower().replace('_', '-')
# worker entrypoint for resolve_uri and tests
def _resolve_uri(uri):
scheme, netloc, path, query, frag = urlsplit(uri)
if scheme != "neo":
raise ValueError("invalid uri: %s : expected neo:// scheme" % uri)
if path != "":
raise ValueError("invalid uri: %s : non-empty path" % uri)
if frag != "":
raise ValueError("invalid uri: %s : non-empty fragment" % uri)
# extract master list and name from netloc
name, masterloc = netloc.split('@', 1)
master_list = masterloc.split(',')
neokw = OrderedDict()
neokw['master_nodes'] = ' '.join(master_list)
neokw['name'] = name
# get options from query: only those that are defined by NEO schema go to
# storage - rest are returned as database options
dbkw = {}
neo_options = neo_zconf_options()
for k, v in OrderedDict(parse_qsl(query)).items():
if k in neo_options:
neokw[k] = v
else:
# it might be option for storage, but not in canonical form e.g.
# read_only -> read-only (zodburi world settled on using "_" and
# ZConfig world on "-" as separators)
k2 = canonical_opt_name(k)
if k2 in neo_options:
neokw[k2] = v
# else keep this kv as db option
else:
dbkw[k] = v
# now we have everything. Let ZConfig do actual work for validation options
# and borning the storage
neozconf = """%import neo.client
"""
for k, v in neokw.items():
neozconf += " %s\t%s\n" % (k, v)
neozconf += "\n"
return neozconf, dbkw
# resolve_uri resolves uri according to neo:// schema.
# see module docstring for uri format.
def resolve_uri(uri):
neozconf, dbkw = _resolve_uri(uri)
def factory():
return ZODB.config.storageFromString(neozconf)
return factory, dbkw
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/debug.py 0000664 0000000 0000000 00000015117 13465024610 0023471 0 ustar 00root root 0000000 0000000 """Example of script starting a debugger on RTMIN+3 signal
The pdb is launched in a separate thread in order not to trigger timeouts.
The prompt is accessible through network in case that the process is daemonized:
$ socat READLINE TCP:127.0.0.1:54930
> neo/debug.py(63)pdb()
-> app # this is Application instance
(Pdb) app
"""
import sys
def app_set():
try:
return sys.modules['neo.lib.threaded_app'].app_set
except KeyError:
f = sys._getframe(4)
try:
while f.f_code.co_name != 'run' or \
f.f_locals.get('self').__class__.__name__ != 'Application':
f = f.f_back
return f.f_locals['self'],
except AttributeError:
return ()
def defer(task):
def wrapper():
from traceback import print_exc
try:
task(app)
except:
print_exc()
for app in app_set():
app.em.wakeup(wrapper)
break
IF = 'pdb'
if IF == 'pdb':
# List of (module, callables) to break at.
# If empty, a thread is started with a breakpoint.
# All breakpoints are automatically removed on the first break,
# or when this module is reloaded.
BP = (#('ZODB.Connection', 'Connection.setstate'),
#('ZPublisher.Publish', 'publish_module_standard'),
)
import socket, threading, weakref
from neo.lib.debug import PdbSocket
# We don't use the one from neo.lib.debug because unfortunately,
# IPython does not always print to given stdout.
from pdb import Pdb as getPdb
def pdb():
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
# For better security, maybe we should use a unix socket.
s.settimeout(60)
s.bind(('127.0.0.1', 0))
s.listen(0)
print 'Listening to %u' % s.getsockname()[1]
sys.stdout.flush() # BBB: On Python 3, print() takes a 'flush' arg.
_socket = PdbSocket(s.accept()[0])
finally:
s.close()
try:
app, = app_set
except ValueError:
app = None
getPdb(stdin=_socket, stdout=_socket).set_trace()
app # this is Application instance (see 'app_set' if there are several)
app_set = app_set()
class setupBreakPoints(list):
def __init__(self, bp_list):
self._lock = threading.Lock()
for o, name in bp_list:
o = __import__(o, fromlist=('*',), level=0)
x = name.split('.')
name = x.pop()
for x in x:
o = getattr(o, x)
orig = getattr(o, name)
if orig.__module__ == __name__:
orig.__closure__[1].cell_contents._revert()
orig = getattr(o, name)
assert orig.__module__ != __name__, (o, name)
orig = getattr(orig, '__func__', orig)
self.append((o, name, orig))
setattr(o, name, self._wrap(orig))
print 'BP set on', orig
sys.stdout.flush()
self._hold = weakref.ref(pdb, self._revert)
def _revert(self, *_):
for x in self:
setattr(*x)
print 'BP removed on', x[2]
sys.stdout.flush()
del self[:]
def _wrap(self, orig):
return lambda *args, **kw: self(orig, *args, **kw)
def __call__(self, orig, *args, **kw):
stop = False
with self._lock:
if self:
stop = True
self._revert()
if stop:
pdb()
return orig(*args, **kw)
if BP:
setupBreakPoints(BP)
else:
threading.Thread(target=pdb, name='pdb').start()
elif IF == 'frames':
# WARNING: Because of https://bugs.python.org/issue17094, the output is
# usually incorrect for subprocesses started by the functional
# test framework.
import traceback
write = sys.stderr.write
for thread_id, frame in sys._current_frames().iteritems():
write("Thread %s:\n" % thread_id)
traceback.print_stack(frame)
write("End of dump\n")
elif IF == 'profile':
DURATION = 60
def stop(prof, path):
prof.disable()
prof.dump_stats(path)
@defer
def profile(app):
import cProfile, threading, time
from .lib.protocol import uuid_str
path = 'neo-%s-%s.prof' % (uuid_str(app.uuid), time.time())
prof = cProfile.Profile()
threading.Timer(DURATION, stop, (prof, path)).start()
prof.enable()
elif IF == 'trace-cache':
from struct import Struct
from .client.cache import ClientCache
from .lib.protocol import uuid_str, ZERO_TID as z64
class CacheTracer(object):
_pack2 = Struct('!B8s8s').pack
_pack4 = Struct('!B8s8s8sL').pack
def __init__(self, cache, path):
self._cache = cache
self._trace_file = open(path, 'a')
def close(self):
self._trace_file.close()
return self._cache
def _trace(self, op, x, y=z64, z=z64):
self._trace_file.write(self._pack(op, x, y, z))
def __repr__(self):
return repr(self._cache)
@property
def max_size(self):
return self._cache.max_size
def clear(self):
self._trace_file.write('\0')
self._cache.clear()
def clear_current(self):
self._trace_file.write('\1')
self._cache.clear_current()
def load(self, oid, before_tid=None):
r = self._cache.load(oid, before_tid)
self._trace_file.write(self._pack2(
3 if r else 2, oid, before_tid or z64))
return r
def store(self, oid, data, tid, next_tid):
self._trace_file.write(self._pack4(
4, oid, tid, next_tid or z64, len(data)))
self._cache.store(oid, data, tid, next_tid)
def invalidate(self, oid, tid):
self._trace_file.write(self._pack2(5, oid, tid))
self._cache.invalidate(oid, tid)
@defer
def profile(app):
app._cache_lock_acquire()
try:
cache = app._cache
if type(cache) is ClientCache:
app._cache = CacheTracer(cache, '%s-%s.neo-cache-trace' %
(app.name, uuid_str(app.uuid)))
app._cache.clear()
else:
app._cache = cache.close()
finally:
app._cache_lock_release()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/ 0000775 0000000 0000000 00000000000 13465024610 0022572 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/__init__.py 0000664 0000000 0000000 00000001300 13465024610 0024675 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from .logger import logging
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/app.py 0000664 0000000 0000000 00000010045 13465024610 0023724 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2015-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from . import logging, util
from .config import OptionList
from .event import EventManager
from .node import NodeManager
def buildOptionParser(cls):
parser = cls.option_parser = cls.OptionList()
_ = parser.path
_('l', 'logfile',
help="log debugging information to specified SQLite DB")
_('ca', help="(SSL) certificate authority in PEM format")
_('cert', help="(SSL) certificate in PEM format")
_('key', help="(SSL) private key in PEM format")
cls._buildOptionParser()
return cls
class BaseApplication(object):
class OptionList(OptionList):
def parse(self, argv=None):
config = OptionList.parse(self, argv)
ssl = (
config.pop('ca', None),
config.pop('cert', None),
config.pop('key', None),
)
if any(ssl):
config['ssl'] = ssl
return config
server = None
ssl = None
@classmethod
def addCommonServerOptions(cls, section, bind, masters='127.0.0.1:10000'):
_ = cls.option_parser.group('server node')
_.path('f', 'file', help='specify a configuration file')
_('s', 'section', default=section,
help='specify a configuration section')
_('c', 'cluster', required=True, help='the cluster name')
_('m', 'masters', default=masters, parse=util.parseMasterList,
help='master node list')
_('b', 'bind', default=bind,
parse=lambda x: util.parseNodeAddress(x, 0),
help='the local address to bind to')
_.path('D', 'dynamic-master-list',
help='path of the file containing dynamic master node list')
def __init__(self, ssl=None, dynamic_master_list=None):
if ssl:
if not all(ssl):
raise ValueError("To enable encryption, 3 files must be"
" provided: the CA certificate, and the certificate"
" of this node with its private key.")
ca, cert, key = ssl
import ssl
version, version_name = max((getattr(ssl, k), k)
for k in dir(ssl) if k.startswith("PROTOCOL_TLSv"))
self.ssl = context = ssl.SSLContext(version)
context.options |= (0
| ssl.OP_CIPHER_SERVER_PREFERENCE
| ssl.OP_NO_COMPRESSION
)
context.set_ciphers(ssl._RESTRICTED_SERVER_CIPHERS)
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(ca)
context.load_cert_chain(cert, key)
context.verify_flags |= ssl.VERIFY_X509_STRICT | (
context.cert_store_stats()['crl'] and ssl.VERIFY_CRL_CHECK_LEAF)
logging.info("TLS %s enabled for %s",
float(version_name[13:].replace("_", ".")), self)
self._handlers = {}
self.em = EventManager()
self.nm = NodeManager(dynamic_master_list)
# XXX: Do not implement __del__ unless all references to the Application
# become weak.
# Due to cyclic references, Python < 3.4 would never call it unless
# it's closed explicitly, and in this case, there's nothing to do.
def close(self):
self.nm.close()
self.em.close()
self.__dict__.clear()
def setUUID(self, uuid):
if self.uuid != uuid:
self.uuid = uuid
logging.node(self.name, uuid)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/attributeTracker.py 0000664 0000000 0000000 00000003673 13465024610 0026474 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
ATTRIBUTE_TRACKER_ENABLED = False
from .locking import LockUser
"""
Usage example:
from neo import attributeTracker
class Foo(object):
...
def assertBar(self, expected_value):
if self.bar_attr != expected_value:
attributeTracker.whoSet(self, 'bar_attr')
attributeTracker.track(Foo)
"""
MODIFICATION_CONTAINER_ID = '_attribute_tracker_dict'
def tracker_setattr(self, attr, value, setattr):
modification_container = getattr(self, MODIFICATION_CONTAINER_ID, None)
if modification_container is None:
modification_container = {}
setattr(self, MODIFICATION_CONTAINER_ID, modification_container)
modification_container[attr] = LockUser()
setattr(self, attr, value)
if ATTRIBUTE_TRACKER_ENABLED:
def track(klass):
original_setattr = klass.__setattr__
def klass_tracker_setattr(self, attr, value):
tracker_setattr(self, attr, value, original_setattr)
klass.__setattr__ = klass_tracker_setattr
else:
def track(klass):
pass
def whoSet(instance, attr):
result = getattr(instance, MODIFICATION_CONTAINER_ID, None)
if result is not None:
result = result.get(attr)
if result is not None:
result = result.formatStack()
return result
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/bootstrap.py 0000664 0000000 0000000 00000006431 13465024610 0025165 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from . import logging
from .exception import PrimaryElected
from .handler import EventHandler
from .protocol import Packets
from .connection import ClientConnection
class BootstrapManager(EventHandler):
"""
Manage the bootstrap stage, lookup for the primary master then connect to it
"""
def __init__(self, app, node_type, server=None, devpath=(), new_nid=()):
"""
Manage the bootstrap stage of a non-master node, it lookup for the
primary master node, connect to it then returns when the master node
is ready.
"""
self.server = server
self.devpath = devpath
self.new_nid = new_nid
self.node_type = node_type
app.nm.reset()
uuid = property(lambda self: self.app.uuid)
def connectionCompleted(self, conn):
EventHandler.connectionCompleted(self, conn)
conn.ask(Packets.RequestIdentification(self.node_type, self.uuid,
self.server, self.app.name, None, self.devpath, self.new_nid))
def connectionFailed(self, conn):
EventHandler.connectionFailed(self, conn)
self.current = None
def connectionLost(self, conn, new_state):
self.current = None
def _acceptIdentification(self, node):
assert self.current is node, (self.current, node)
def getPrimaryConnection(self):
"""
Primary lookup/connection process.
Returns when the connection is made.
"""
logging.info('connecting to a primary master node')
app = self.app
poll = app.em.poll
index = 0
self.current = None
# retry until identified to the primary
while True:
try:
while self.current:
if self.current.isIdentified():
return self.current, self.current.getConnection()
poll(1)
except PrimaryElected, e:
if self.current:
self.current.getConnection().close()
self.current, = e.args
index = app.nm.getMasterList().index(self.current)
else:
# select a master
master_list = app.nm.getMasterList()
index = (index + 1) % len(master_list)
self.current = master_list[index]
ClientConnection(app, self, self.current)
# Note that the connection may be already closed. This happens when
# the kernel reacts so quickly to a closed port that 'connect'
# fails on the first call. In such case, poll(1) would deadlock
# if there's no other connection to timeout.
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/compress.py 0000664 0000000 0000000 00000003471 13465024610 0025004 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2018-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import zlib
decompress_list = (
lambda data: data,
zlib.decompress,
)
def parseOption(value):
x = value.split('=', 1)
try:
alg = ('zlib',).index(x[0])
if len(x) == 1:
return alg, None
level = int(x[1])
except Exception:
raise ValueError("not a valid 'compress' option: %r" % value)
if 0 < level <= zlib.Z_BEST_COMPRESSION:
return alg, level
raise ValueError("invalid compression level: %r" % level)
def getCompress(value):
if value:
alg, level = (0, None) if value is True else value
_compress = zlib.compress
if level:
zlib_compress = _compress
_compress = lambda data: zlib_compress(data, level)
alg += 1
assert 0 < alg < len(decompress_list), 'invalid compression algorithm'
def compress(data):
size = len(data)
compressed = _compress(data)
if len(compressed) < size:
return size, alg, compressed
return size, 0, data
compress._compress = _compress # for testBasicStore
return compress
return lambda data: (len(data), 0, data)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/config.py 0000664 0000000 0000000 00000013650 13465024610 0024416 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import argparse, os, sys
from functools import wraps
from ConfigParser import SafeConfigParser
class _Required(object):
def __init__(self, *args):
self._option_list, self._name = args
def __nonzero__(self):
with_required = self._option_list._with_required
return with_required is not None and self._name not in with_required
class _Option(object):
def __init__(self, *args, **kw):
if len(args) > 1:
self.short, self.name = args
else:
self.name, = args
self.__dict__.update(kw)
def _asArgparse(self, parser, option_list):
kw = self._argument_kw()
args = ['--' + self.name]
try:
args.insert(0, '-' + self.short)
except AttributeError:
pass
kw['help'] = self.help
action = parser.add_argument(*args, **kw)
if action.required:
assert not hasattr(self, 'default')
action.required = _Required(option_list, self.name)
def fromConfigFile(self, cfg, section):
return self(cfg.get(section, self.name.replace('-', '_')))
@staticmethod
def parse(value):
return value
class BoolOption(_Option):
def _argument_kw(self):
return {'action': 'store_true'}
def __call__(self, value):
return value
def fromConfigFile(self, cfg, section):
return cfg.getboolean(section, self.name)
class Option(_Option):
@property
def __name__(self):
return self.type.__name__
def _argument_kw(self):
kw = {'type': self}
for x in 'default', 'metavar', 'required', 'choices':
try:
kw[x] = getattr(self, x)
except AttributeError:
pass
return kw
@staticmethod
def type(value):
if value:
return value
raise argparse.ArgumentTypeError('value is empty')
def __call__(self, value):
return self.type(value)
class OptionGroup(object):
def __init__(self, description=None):
self.description = description
self._options = []
def _asArgparse(self, parser, option_list):
g = parser.add_argument_group(self.description)
for option in self._options:
option._asArgparse(g, option_list)
def set_defaults(self, **kw):
option_dict = self.getOptionDict()
for k, v in kw.iteritems():
option_dict[k].default = v
def getOptionDict(self):
option_dict = {}
for option in self._options:
if isinstance(option, OptionGroup):
option_dict.update(option.getOptionDict())
else:
option_dict[option.name.replace('-', '_')] = option
return option_dict
def __call__(self, *args, **kw):
self._options.append(Option(*args, **kw))
def __option_type(t):
return wraps(t)(lambda self, *args, **kw: self(type=t, *args, **kw))
float = __option_type(float)
int = __option_type(int)
path = __option_type(os.path.expanduser)
def bool(self, *args, **kw):
self._options.append(BoolOption(*args, **kw))
class Argument(Option):
def __init__(self, name, **kw):
super(Argument, self).__init__(name, **kw)
def _asArgparse(self, parser, option_list):
kw = {'help': self.help, 'type': self}
for x in 'default', 'metavar', 'nargs', 'choices':
try:
kw[x] = getattr(self, x)
except AttributeError:
pass
parser.add_argument(self.name, **kw)
class OptionList(OptionGroup):
_with_required = None
def argument(self, *args, **kw):
self._options.append(Argument(*args, **kw))
def group(self, description):
group = OptionGroup(description)
self._options.append(group)
return group
def parse(self, argv=None):
parser = argparse.ArgumentParser(description=self.description,
argument_default=argparse.SUPPRESS,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
for option in self._options:
option._asArgparse(parser, self)
_format_help = parser.format_help
def format_help():
self._with_required = ()
try:
return _format_help()
finally:
del self._with_required
parser.format_help = format_help
if argv is None:
argv = sys.argv[1:]
args = parser.parse_args(argv)
option_dict = self.getOptionDict()
try:
config_file = args.file
except AttributeError:
d = ()
else:
cfg = SafeConfigParser()
cfg.read(config_file)
section = args.section
d = {}
for name in cfg.options(section):
try:
option = option_dict[name]
except KeyError:
continue
d[name] = option.fromConfigFile(cfg, section)
parser.set_defaults(**d)
self._with_required = d
try:
args = parser.parse_args(argv)
finally:
del self._with_required
return {name: option.parse(getattr(args, name))
for name, option in option_dict.iteritems()
if hasattr(args, name)}
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/connection.py 0000664 0000000 0000000 00000060621 13465024610 0025310 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from functools import wraps
from time import time
import msgpack
from msgpack.exceptions import UnpackValueError
from . import attributeTracker, logging
from .connector import ConnectorException, ConnectorDelayedConnection
from .locking import RLock
from .protocol import uuid_str, Errors, PacketMalformedError, Packets, \
Unpacker
@apply
class dummy_read_buffer(msgpack.Unpacker):
def feed(self, _):
pass
class ConnectionClosed(Exception):
pass
class HandlerSwitcher(object):
_is_handling = False
_pending = ({}, None),
def __init__(self, handler):
# pending handlers and related requests
self._pending = []
self.setHandler(handler)
def close(self):
self.__dict__.clear()
def isPending(self):
return bool(self._pending[0][0])
def cancelRequests(self, conn, message):
if self.isPending():
p = Errors.ProtocolError(message)
while True:
request_dict, handler = self._pending[0]
while request_dict:
msg_id, request = request_dict.popitem()
p.setId(msg_id)
handler.packetReceived(conn, p, request[1])
if len(self._pending) == 1:
break
del self._pending[0]
def getHandler(self):
return self._pending[0][1]
def getLastHandler(self):
""" Return the last (may be unapplied) handler registered """
return self._pending[-1][1]
def emit(self, request, kw={}):
# register the request in the current handler
_pending = self._pending
if self._is_handling:
# If this is called while handling a packet, the response is to
# be expected for the current handler...
(request_dict, _) = _pending[0]
else:
# ...otherwise, queue for the latest handler
assert len(_pending) == 1 or _pending[0][0]
(request_dict, _) = _pending[-1]
msg_id = request.getId()
answer_class = request.getAnswerClass()
assert answer_class is not None, "Not a request"
assert msg_id not in request_dict, "Packet id already expected"
request_dict[msg_id] = answer_class, kw
def handle(self, connection, packet):
assert not self._is_handling
self._is_handling = True
try:
self._handle(connection, packet)
finally:
self._is_handling = False
def _handle(self, connection, packet):
pending = self._pending
assert len(pending) == 1 or pending[0][0], pending
logging.packet(connection, packet, False)
if connection.isClosed() and (connection.isAborted() or
packet.ignoreOnClosedConnection()):
logging.debug('Ignoring packet %r on closed connection %r',
packet, connection)
return
if not packet.isResponse(): # notification
# XXX: If there are several handlers, which one to use ?
pending[0][1].packetReceived(connection, packet)
return
msg_id = packet.getId()
request_dict, handler = pending[0]
# checkout the expected answer class
try:
klass, kw = request_dict.pop(msg_id)
except KeyError:
klass = None
kw = {}
try:
if klass and isinstance(packet, klass) or packet.isError():
handler.packetReceived(connection, packet, kw)
else:
logging.error('Unexpected answer %r in %r', packet, connection)
if not connection.isClosed():
connection.answer(Errors.ProtocolError(
'Unexpected answer: %r' % packet))
connection.abort()
finally:
# apply a pending handler if no more answers are pending
while len(pending) > 1 and not pending[0][0]:
del pending[0]
logging.debug('Apply handler %r on %r', pending[0][1],
connection)
def setHandler(self, handler):
can_apply = len(self._pending) == 1 and not self._pending[0][0]
if can_apply:
# nothing is pending, change immediately
self._pending[0][1] = handler
else:
# put the next handler in queue
self._pending.append([{}, handler])
return can_apply
class BaseConnection(object):
"""A base connection
About timeouts:
In the past, ask() took a timeout parameter as a way to close the
connection if the remote node was too long to reply, with the idea
that something went wrong. There was no known bug but this feature was
actually a bad idea.
It is impossible to test whether the remote node is in good state or
not. The experience shows that timeouts were always triggered because
the remote nodes were simply too slow. Waiting remains the best option
and anything else would only make things worse.
The only case where it could make sense to react on a slow request is
when there is redundancy, more exactly for read requests to storage
nodes when there are replicas. A client node could resend its request
to another node, _without_ breaking the first connection (then wait for
the first reply and ignore the other).
The previous timeout implementation (before May 2017) was not well
suited to support the above use case so most of the code has been
removed, but it may contain some interesting parts.
Currently, since applicative pings have been replaced by TCP
keepalives, timeouts are only used for 2 things:
- to avoid reconnecting too fast
- to close idle client connections
"""
from .connector import SocketConnector as ConnectorClass
def __init__(self, event_manager, handler, connector, addr=None):
assert connector is not None, "Need a low-level connector"
self.em = event_manager
self.connector = connector
self.addr = addr
self._handlers = HandlerSwitcher(handler)
# XXX: do not use getHandler
getHandler = property(lambda self: self._handlers.getHandler)
getLastHandler = property(lambda self: self._handlers.getLastHandler)
isPending = property(lambda self: self._handlers.isPending)
def cancelRequests(self, *args, **kw):
return self._handlers.cancelRequests(self, *args, **kw)
def getTimeout(self):
pass
def lockWrapper(self, func):
return func
def getConnector(self):
return self.connector
def getAddress(self):
return self.addr
def readable(self):
raise NotImplementedError
def writable(self):
raise NotImplementedError
def close(self):
"""Close the connection."""
if self.connector is not None:
self.em.unregister(self, True)
self.connector = None
self.aborted = False
def _getReprInfo(self):
r = [
('nid', uuid_str(self.getUUID())),
('address', ('[%s]:%s' if ':' in self.addr[0] else '%s:%s')
% self.addr if self.addr else '?'),
('handler', self.getHandler()),
]
connector = self.connector
if connector is None:
return r, ['closed']
r.append(('fd', connector.getDescriptor()))
return r, ['aborted'] if self.isAborted() else []
def __repr__(self):
r, flags = self._getReprInfo()
r = map('%s=%s'.__mod__, r)
r += flags
return '<%s(%s) at %x>' % (
self.__class__.__name__,
', '.join(r),
id(self),
)
def setHandler(self, handler):
changed = self._handlers.setHandler(handler)
if changed:
logging.debug('Handler changed on %r', self)
else:
logging.debug('Delay handler %r on %r', handler, self)
return changed
def getUUID(self):
return None
def isClosed(self):
return self.connector is None or self.isAborted()
def isAborted(self):
return False
def isListening(self):
return False
def isServer(self):
return False
def isClient(self):
return False
def hasPendingMessages(self):
return False
def whoSetConnector(self):
"""
Debugging method: call this method to know who set the current
connector value.
"""
return attributeTracker.whoSet(self, 'connector')
attributeTracker.track(BaseConnection)
class ListeningConnection(BaseConnection):
"""A listen connection."""
def __init__(self, app, handler, addr):
self._ssl = app.ssl
logging.debug('listening to %s:%d', *addr)
connector = self.ConnectorClass(addr)
BaseConnection.__init__(self, app.em, handler, connector, addr)
connector.makeListeningConnection()
self.em.register(self)
def readable(self):
connector, addr = self.connector.accept()
logging.debug('accepted a connection from %s:%d', *addr)
conn = ServerConnection(self.em, self.getHandler(), connector, addr)
if self._ssl:
conn.connecting = True
connector.ssl(self._ssl, conn._connected)
# Nothing to send as long as we haven't received a ClientHello
# message.
else:
conn._connected()
self.em.addWriter(conn) # for HANDSHAKE_PACKET
def getAddress(self):
return self.connector.getAddress()
def isListening(self):
return True
class Connection(BaseConnection):
"""A connection."""
# XXX: rename isPending, hasPendingMessages & pending methods
buffering = False
connecting = True
client = False
server = False
peer_id = None
_total_unpacked = 0
_timeout = None
def __init__(self, event_manager, *args, **kw):
BaseConnection.__init__(self, event_manager, *args, **kw)
self.read_buf = Unpacker()
self.cur_id = 0
self.aborted = False
self.uuid = None
self._queue = []
self._on_close = None
def _getReprInfo(self):
r, flags = super(Connection, self)._getReprInfo()
if self._queue:
r.append(('len(queue)', len(self._queue)))
if self._on_close is not None:
r.append(('on_close', getattr(self._on_close, '__name__', '?')))
flags.extend(x for x in ('connecting', 'client', 'server')
if getattr(self, x))
return r, flags
def setOnClose(self, callback):
assert not self.isClosed(), self
self._on_close = callback
def isClient(self):
return self.client
def isServer(self):
return self.server
def asClient(self):
try:
del self._timeout
except AttributeError:
self.client = True
else:
assert self.client
def asServer(self):
self.server = True
def _closeClient(self):
if self.server:
del self._timeout
self.client = False
self.send(Packets.CloseClient())
else:
self.close()
def closeClient(self):
# Currently, the only usage that is really useful is between a backup
# storage node and an upstream one, to avoid:
# - maintaining many connections for nothing when there's no write
# activity for a long time (and waste resources with keepalives)
# - reconnecting too often (i.e. be reactive) when there's moderate
# activity (think of a timer with a period of 1 minute)
if self.connector is not None and self.client:
self._timeout = time() + 100
def isAborted(self):
return self.aborted
def getUUID(self):
return self.uuid
def setUUID(self, uuid):
self.uuid = uuid
def setPeerId(self, peer_id):
assert peer_id is not None
self.peer_id = peer_id
def getPeerId(self):
return self.peer_id
def _getNextId(self):
next_id = self.cur_id
self.cur_id = (next_id + 1) & 0xffffffff
return next_id
def getTimeout(self):
if not self._queue:
return self._timeout
def onTimeout(self):
assert self._timeout
self._closeClient()
def abort(self):
"""Abort dealing with this connection."""
assert self.pending()
if self.connecting:
self.close()
return
logging.debug('aborting a connector for %r', self)
self.aborted = True
self.read_buf = dummy_read_buffer
if self._on_close is not None:
self._on_close()
self._on_close = None
def writable(self):
"""Called when self is writable."""
try:
if self.connector.send():
if self.aborted:
self.close()
else:
self.em.removeWriter(self)
except ConnectorException:
self._closure()
def _parse(self):
from .protocol import HANDSHAKE_PACKET, MAGIC_SIZE, Packets
read_buf = self.read_buf
handshake = read_buf.read_bytes(len(HANDSHAKE_PACKET))
if handshake != HANDSHAKE_PACKET:
if HANDSHAKE_PACKET.startswith(handshake): # unlikely so tested last
# Not enough data and there's no API to know it in advance.
# Put it back.
read_buf.feed(handshake)
return
if HANDSHAKE_PACKET.startswith(handshake[:MAGIC_SIZE]):
logging.warning('Protocol version mismatch with %r', self)
else:
logging.debug('Rejecting non-NEO %r', self)
raise ConnectorException
read_next = read_buf.next
read_pos = read_buf.tell
def parse():
try:
msg_id, msg_type, args = read_next()
except StopIteration:
return
except UnpackValueError as e:
raise PacketMalformedError(str(e))
try:
packet_klass = Packets[msg_type]
except KeyError:
raise PacketMalformedError('Unknown packet type')
pos = read_pos()
packet = packet_klass(*args)
packet.setId(msg_id)
packet.size = pos - self._total_unpacked
self._total_unpacked = pos
return packet
self._parse = parse
return parse()
def readable(self):
"""Called when self is readable."""
# last known remote activity
try:
try:
if self.connector.receive(self.read_buf):
self.em.addWriter(self)
finally:
# A connector may read some data
# before raising ConnectorException
while 1:
packet = self._parse()
if packet is None:
break
self._queue.append(packet)
except ConnectorException:
self._closure()
except PacketMalformedError, e:
logging.error('malformed packet from %r: %s', self, e)
self._closure()
return not not self._queue
def hasPendingMessages(self):
"""
Returns True if there are messages queued and awaiting processing.
"""
return not not self._queue
def process(self):
"""
Process a pending packet.
"""
# check out packet and process it with current handler
self._handlers.handle(self, self._queue.pop(0))
def pending(self):
connector = self.connector
return connector is not None and connector.queued
@property
def setReconnectionNoDelay(self):
try:
return self.connector.setReconnectionNoDelay
except AttributeError:
raise ConnectionClosed
def close(self):
if self.connector is None:
assert self._on_close is None
assert not self.read_buf.read_bytes(1)
assert not self.isPending()
return
# process the network events with the last registered handler to
# solve issues where a node is lost with pending handlers and
# create unexpected side effects.
handler = self._handlers.getLastHandler()
super(Connection, self).close()
if self._on_close is not None:
self._on_close()
self._on_close = None
self.read_buf = dummy_read_buffer
try:
if self.connecting:
handler.connectionFailed(self)
self.connecting = False
else:
handler.connectionClosed(self)
finally:
self._handlers.close()
def _closure(self):
assert self.connector is not None, self.whoSetConnector()
while self._queue:
self._handlers.handle(self, self._queue.pop(0))
self.close()
def _addPacket(self, packet):
"""Add a packet into the write buffer."""
if self.connector.queue(packet.encode()):
if packet.nodelay or 65536 < self.connector.queue_size:
assert not self.buffering
# enable polling for writing.
self.em.addWriter(self)
else:
self.buffering = True
elif self.buffering and (65536 < self.connector.queue_size
or packet.nodelay):
self.buffering = False
self.em.addWriter(self)
logging.packet(self, packet, True)
def send(self, packet, msg_id=None):
""" Then a packet with a new ID """
if self.isClosed():
raise ConnectionClosed
packet.setId(self._getNextId() if msg_id is None else msg_id)
self._addPacket(packet)
def ask(self, packet, **kw):
"""
Send a packet with a new ID and register the expectation of an answer
"""
if self.isClosed():
raise ConnectionClosed
msg_id = self._getNextId()
packet.setId(msg_id)
self._addPacket(packet)
self._handlers.emit(packet, kw)
return msg_id
def answer(self, packet):
""" Answer to a packet by re-using its ID for the packet answer """
assert packet.isResponse(), packet
if self.isClosed():
if packet.ignoreOnClosedConnection() and not packet.isError():
raise ConnectionClosed
return
packet.setId(self.peer_id)
self._addPacket(packet)
def _connected(self):
self.connecting = False
self.getHandler().connectionCompleted(self)
class ClientConnection(Connection):
"""A connection from this node to a remote node."""
client = True
def __init__(self, app, handler, node):
self._ssl = app.ssl
addr = node.getAddress()
connector = self.ConnectorClass(addr)
Connection.__init__(self, app.em, handler, connector, addr)
node.setConnection(self)
handler.connectionStarted(self)
self._connect()
def _connect(self):
try:
connected = self.connector.makeClientConnection()
except ConnectorDelayedConnection, c:
connect_limit, = c.args
self.getTimeout = lambda: connect_limit
self.onTimeout = self._delayedConnect
self.em.register(self, timeout_only=True)
except ConnectorException:
self._closure()
else:
self.em.register(self)
if connected:
self._maybeConnected()
# There's always the protocol version to send.
self.em.addWriter(self)
def _delayedConnect(self):
del self.getTimeout, self.onTimeout
self._connect()
def writable(self):
"""Called when self is writable."""
if self.connector.getError():
self._closure()
else:
self._maybeConnected()
self.writable()
def _maybeConnected(self):
self.writable = self.lockWrapper(super(ClientConnection, self).writable)
if self._ssl:
self.connector.ssl(self._ssl, self._connected)
else:
self._connected()
class ServerConnection(Connection):
"""A connection from a remote node to this node."""
server = True
def __init__(self, *args, **kw):
Connection.__init__(self, *args, **kw)
self.em.register(self)
class MTConnectionType(type):
def __init__(cls, *args):
if __debug__:
for name in 'answer',:
setattr(cls, name, cls.lockCheckWrapper(name))
for name in 'close', 'send':
setattr(cls, name, cls.__class__.lockWrapper(cls, name))
for name in ('_delayedConnect', 'onTimeout',
'process', 'readable', 'writable'):
setattr(cls, name, cls.__class__.lockWrapper(cls, name, True))
def lockCheckWrapper(cls, name):
def wrapper(self, *args, **kw):
# XXX: Unfortunately, RLock does not has any public method
# to test whether we own the lock or not.
assert self.lock._is_owned(), (self, args, kw)
return getattr(super(cls, self), name)(*args, **kw)
return wraps(getattr(cls, name).im_func)(wrapper)
def lockWrapper(cls, name, maybe_closed=False):
if maybe_closed:
def wrapper(self):
with self.lock:
if self.isClosed():
logging.info("%r.%s()", self, name)
else:
return getattr(super(cls, self), name)()
else:
def wrapper(self, *args, **kw):
with self.lock:
return getattr(super(cls, self), name)(*args, **kw)
return wraps(getattr(cls, name).im_func)(wrapper)
class MTClientConnection(ClientConnection):
"""A Multithread-safe version of ClientConnection."""
__metaclass__ = MTConnectionType
def lockWrapper(self, func):
lock = self.lock
def wrapper(*args, **kw):
with lock:
return func(*args, **kw)
return wrapper
def __init__(self, *args, **kwargs):
self.lock = lock = RLock()
self.dispatcher = kwargs.pop('dispatcher')
with lock:
super(MTClientConnection, self).__init__(*args, **kwargs)
# Alias without lock (cheaper than super())
_ask = ClientConnection.ask.__func__
def ask(self, packet, queue=None, **kw):
with self.lock:
if queue is None:
if type(packet) is Packets.Ping:
return self._ask(packet, **kw)
raise TypeError('Only Ping packet can be asked'
' without a queue, got a %r.' % packet)
msg_id = self._ask(packet, **kw)
self.dispatcher.register(self, msg_id, queue)
return msg_id
# Currently, on connected connections, we only use timeouts for
# closeClient, which is never used for MTClientConnection.
# So we disable the logic completely as a precaution, and for performance.
# What is specific to MTClientConnection is that the poll thread must be
# woken up whenever the timeout is changed to a smaller value.
def closeClient(self):
# For example here, in addition to what the super method does,
# we may have to call `self.em.wakeup()`
raise NotImplementedError
def getTimeout(self):
pass
def onTimeout(self):
# It is possible that another thread manipulated the connection while
# getting a timeout from epoll. Only the poll thread fills _queue
# so we know that it is empty, but we may have to check timeout values
# again (i.e. compare time() with the result of getTimeout()).
raise NotImplementedError
###
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/connector.py 0000664 0000000 0000000 00000031143 13465024610 0025140 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import socket
import ssl
import errno
from time import time
from . import logging
from .protocol import HANDSHAKE_PACKET
# Global connector registry.
# Fill by calling registerConnectorHandler.
# Read by calling SocketConnector.__new__
connector_registry = {}
def registerConnectorHandler(connector_handler):
connector_registry[connector_handler.af_type] = connector_handler
class SocketConnector(object):
""" This class is a wrapper for a socket """
is_closed = is_server = None
connect_limit = {}
CONNECT_LIMIT = 1
KEEPALIVE = 60, 3, 10
SOMAXCONN = 5 # for threaded tests
def __new__(cls, addr, s=None):
if s is None:
host, port = addr
for af_type, cls in connector_registry.iteritems():
try :
socket.inet_pton(af_type, host)
break
except socket.error:
pass
else:
raise ValueError("Unknown type of host", host)
self = object.__new__(cls)
self.addr = cls._normAddress(addr)
if s is None:
s = socket.socket(af_type, socket.SOCK_STREAM)
else:
self.is_server = True
self.is_closed = False
self.socket = s
self.socket_fd = s.fileno()
# always use non-blocking sockets
s.setblocking(0)
# TCP keepalive, enabled on both sides to detect:
# - remote host crash
# - network failure
# They're more efficient than applicative pings and we don't want
# to consider the connection dead if the remote node is busy.
# The following 3 lines are specific to Linux. It seems that OSX
# has similar options (TCP_KEEPALIVE/TCP_KEEPINTVL/TCP_KEEPCNT),
# and Windows has SIO_KEEPALIVE_VALS (fixed count of 10).
idle, cnt, intvl = self.KEEPALIVE
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, idle)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, cnt)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, intvl)
s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
# disable Nagle algorithm to reduce latency
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
self.queued = [HANDSHAKE_PACKET]
self.queue_size = len(HANDSHAKE_PACKET)
return self
def queue(self, data):
was_empty = not self.queued
self.queued.append(data)
self.queue_size += len(data)
return was_empty
def _error(self, op, exc=None):
if exc is None:
logging.debug('%r closed in %s', self, op)
else:
logging.debug("%s failed for %s: %s (%s)",
op, self, errno.errorcode[exc.errno], exc.strerror)
raise ConnectorException
# Threaded tests monkey-patch the following 2 operations.
_connect = lambda self, addr: self.socket.connect(addr)
_bind = lambda self, addr: self.socket.bind(addr)
def makeClientConnection(self):
assert self.is_closed is None
addr = self.addr
try:
connect_limit = self.connect_limit[addr]
if time() < connect_limit:
# Next call to queue() must return False
# in order not to enable polling for writing.
self.queued or self.queued.append('')
raise ConnectorDelayedConnection(connect_limit)
if self.queued and not self.queued[0]:
del self.queued[0]
except KeyError:
pass
self.connect_limit[addr] = time() + self.CONNECT_LIMIT
self.is_server = self.is_closed = False
try:
self._connect(addr)
except socket.error, e:
if e.errno == errno.EINPROGRESS:
return False
self._error('connect', e)
return True
def makeListeningConnection(self):
assert self.is_closed is None
self.is_closed = False
try:
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self._bind(self.addr)
self.socket.listen(self.SOMAXCONN)
except socket.error, e:
self.is_closed = True
self.socket.close()
self._error('listen', e)
def ssl(self, ssl, on_handshake_done=None):
self.socket = ssl.wrap_socket(self.socket,
server_side=self.is_server,
do_handshake_on_connect=False,
suppress_ragged_eofs=False)
self.__class__ = self.SSLHandshakeConnectorClass
self.on_handshake_done = on_handshake_done
self.queued or self.queued.append('')
def getError(self):
return self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR)
def getDescriptor(self):
# this descriptor must only be used by the event manager, where it
# guarantee uniqueness only while the connector is opened and
# registered in epoll
return self.socket_fd
@staticmethod
def _normAddress(addr):
return addr
def getAddress(self):
return self._normAddress(self.socket.getsockname())
def accept(self):
try:
s, addr = self.socket.accept()
s = self.__class__(addr, s)
return s, s.addr
except socket.error, e:
self._error('accept', e)
def receive(self, read_buf):
try:
data = self.socket.recv(65536)
except socket.error, e:
self._error('recv', e)
if data:
read_buf.feed(data)
return
self._error('recv')
def send(self):
# XXX: Inefficient for big packets. In any case, we should make sure
# that 'msg' does not exceed 2GB with SSL (OverflowError).
# Before commit 1a064725b81a702a124d672dba2bcae498980c76,
# this happened when many big AddObject packets were sent
# for a single replication chunk.
msg = ''.join(self.queued)
if msg:
try:
n = self.socket.send(msg)
except socket.error, e:
self._error('send', e)
# Do nothing special if n == 0:
# - it never happens for simple sockets;
# - for SSL sockets, this is always the case unless everything
# could be sent.
if n != len(msg):
self.queued[:] = msg[n:],
self.queue_size -= n
return False
del self.queued[:]
self.queue_size = 0
else:
assert not self.queued
return True
def shutdown(self):
self.is_closed = True
try:
if self.connect_limit[self.addr] < time():
del self.connect_limit[self.addr]
except KeyError:
pass
try:
self.socket.shutdown(socket.SHUT_RDWR)
except socket.error, e:
if e.errno != errno.ENOTCONN:
raise
return self.socket.close
def setReconnectionNoDelay(self):
"""Mark as successful so that we can reconnect without delay"""
self.connect_limit.pop(self.addr, None)
def __repr__(self):
if self.is_closed is None:
state = ', never opened'
else:
if self.is_closed:
state = ', closed '
else:
state = ' fileno %s %s, opened ' % (
self.socket_fd, self.getAddress())
if self.is_server is None:
state += 'listening'
else:
if self.is_server:
state += 'from '
else:
state += 'to '
state += str(self.addr)
return '<%s at 0x%x%s>' % (self.__class__.__name__, id(self), state)
class SocketConnectorIPv4(SocketConnector):
" Wrapper for IPv4 sockets"
af_type = socket.AF_INET
class SocketConnectorIPv6(SocketConnector):
" Wrapper for IPv6 sockets"
af_type = socket.AF_INET6
@staticmethod
def _normAddress(addr):
return addr[:2]
registerConnectorHandler(SocketConnectorIPv4)
registerConnectorHandler(SocketConnectorIPv6)
def overlay_connector_class(cls):
name = cls.__name__[1:]
alias = name + 'ConnectorClass'
for base in connector_registry.itervalues():
setattr(base, alias, type(name + base.__name__,
cls.__bases__ + (base,), cls.__dict__))
return cls
@overlay_connector_class
class _SSL:
def _error(self, op, exc=None):
if isinstance(exc, ssl.SSLError):
if not isinstance(exc, ssl.SSLEOFError):
logging.debug("%s failed for %s: %s", op, self, exc)
raise ConnectorException
exc = None
SocketConnector._error(self, op, exc)
def receive(self, read_buf):
try:
while 1:
read_buf.feed(self.socket.recv(4096))
except ssl.SSLWantReadError:
pass
except socket.error, e:
self._error('recv', e)
@overlay_connector_class
class _SSLHandshake(_SSL):
# WKRD: Unfortunately, SSL_do_handshake(3SSL) does not try to reject
# non-SSL connections as soon as possible, by checking the first
# byte. It even does nothing before receiving a full TLSPlaintext
# frame (5 bytes).
# The NEO protocol is such that a client connection is always the
# first to send a packet, as soon as the connection is established,
# and without waiting that the protocol versions are checked.
# So in practice, non-SSL connection to SSL would never hang, but
# there's another issue: such case results in WRONG_VERSION_NUMBER
# instead of something like UNEXPECTED_RECORD, because the SSL
# version is checked first.
# For better logging, we try to detect non-SSL connections with
# MSG_PEEK. This only works reliably on server side.
# For SSL client connections, 2 things may prevent the workaround to
# log that the remote node has not enabled SSL:
# - non-SSL data received (or connection closed) before the first
# call to 'recv' in 'do_handshake'
# - the server connection detects a wrong protocol version before it
# sent its one
def _handshake(self, read_buf=None):
# ???Writer | send | receive
# -----------+--------+--------
# want read | remove | -
# want write | - | add
try:
self.socket.do_handshake()
except ssl.SSLWantReadError:
return read_buf is None
except ssl.SSLWantWriteError:
return read_buf is not None
except socket.error, e:
# OpenSSL 1.1 may raise socket.error(0)
# where previous versions raised SSLEOFError.
self._error('send' if read_buf is None else 'recv',
e if e.errno else None)
if not self.queued[0]:
del self.queued[0]
del self.receive, self.send
self.__class__ = self.SSLConnectorClass
cipher, proto, bits = self.socket.cipher()
logging.debug("SSL handshake done for %s: %s %s", self, cipher, bits)
if self.on_handshake_done:
self.on_handshake_done()
del self.on_handshake_done
if read_buf is None:
return self.send()
self.receive(read_buf)
return self.queued
def send(self, read_buf=None):
handshake = self.receive = self.send = self._handshake
return handshake(read_buf)
def receive(self, read_buf):
try:
content_type = self.socket._sock.recv(1, socket.MSG_PEEK)
except socket.error, e:
self._error('recv', e)
if content_type == '\26': # handshake
return self.send(read_buf)
if content_type:
logging.debug('Rejecting non-SSL %r', self)
raise ConnectorException
self._error('recv')
class ConnectorException(Exception):
pass
class ConnectorDelayedConnection(ConnectorException):
pass
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/debug.py 0000664 0000000 0000000 00000007712 13465024610 0024241 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2010-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import errno, imp, os, signal, socket, sys, traceback
from functools import wraps
import neo
# kill -RTMIN+2
# Dump information to logs.
# kill -RTMIN+3
# Loads (or reloads) neo.debug module.
# The content is up to you (it's only imported). It can be a breakpoint.
def safe_handler(func):
def wrapper(sig, frame):
try:
func(sig, frame)
except:
# Prevent exception from exiting signal handler, so mistakes in
# "debug" module don't kill process.
traceback.print_exc()
return wraps(func)(wrapper)
@safe_handler
def debugHandler(sig, frame):
file, filename, (suffix, mode, type) = imp.find_module('debug',
neo.__path__)
imp.load_module('neo.debug', file, filename, (suffix, mode, type))
def getPdb(**kw):
try: # try ipython if available
import IPython
shell = IPython.terminal.embed.InteractiveShellEmbed()
return IPython.core.debugger.Pdb(shell.colors, **kw)
except (AttributeError, ImportError):
import pdb
return pdb.Pdb(**kw)
_debugger = None
def winpdb(depth=0):
import rpdb2
depth += 1
if rpdb2.g_debugger is not None:
return rpdb2.setbreak(depth)
script = rpdb2.calc_frame_path(sys._getframe(depth))
pwd = str(os.getpid()) + os.getcwd().replace('/', '_').replace('-', '_')
pid = os.fork()
if pid:
try:
rpdb2.start_embedded_debugger(pwd, depth=depth)
finally:
os.waitpid(pid, 0)
else:
try:
os.execlp('python', 'python', '-c', """import os\nif not os.fork():
import rpdb2, winpdb
rpdb2_raw_input = rpdb2._raw_input
rpdb2._raw_input = lambda s: \
s == rpdb2.STR_PASSWORD_INPUT and %r or rpdb2_raw_input(s)
winpdb.g_ignored_warnings[winpdb.STR_EMBEDDED_WARNING] = True
winpdb.main()
""" % pwd, '-a', script)
finally:
os.abort()
def register(on_log=None):
try:
if on_log is not None:
@safe_handler
def on_log_signal(signum, signal):
on_log()
signal.signal(signal.SIGRTMIN+2, on_log_signal)
signal.signal(signal.SIGRTMIN+3, debugHandler)
except ValueError: # signal only works in main thread
pass
class PdbSocket(object):
def __init__(self, socket):
# In case that the default timeout is not None.
socket.settimeout(None)
self._socket = socket
self._buf = ''
def close(self):
self._socket.close()
def write(self, data):
self._socket.send(data)
def readline(self):
recv = self._socket.recv
data = self._buf
while True:
i = 1 + data.find('\n')
if i:
self._buf = data[i:]
return data[:i]
d = recv(4096)
data += d
if not d:
self._buf = ''
return data
def flush(self):
pass
def closed(self):
self._socket.setblocking(0)
try:
self._socket.recv(0)
return True
except socket.error, (err, _):
if err != errno.EAGAIN:
raise
self._socket.setblocking(1)
return False
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/dispatcher.py 0000664 0000000 0000000 00000010730 13465024610 0025273 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from functools import wraps
from .locking import Lock, Empty
EMPTY = {}
NOBODY = []
@apply
class _ConnectionClosed(object):
handler_method_name = 'connectionClosed'
_args = ()
class getId(object):
def __eq__(self, other):
return True
def giant_lock(func):
def wrapped(self, *args, **kw):
self.lock_acquire()
try:
return func(self, *args, **kw)
finally:
self.lock_release()
return wraps(func)(wrapped)
class Dispatcher:
"""Register a packet, connection pair as expecting a response packet."""
def __init__(self):
self.message_table = {}
self.queue_dict = {}
lock = Lock()
self.lock_acquire = lock.acquire
self.lock_release = lock.release
@giant_lock
def dispatch(self, conn, msg_id, packet, kw):
"""
Retrieve register-time provided queue, and put conn and packet in it.
"""
queue = self.message_table.get(id(conn), EMPTY).pop(msg_id, None)
if queue is None:
return False
elif queue is NOBODY:
return True
self._decrefQueue(queue)
queue.put((conn, packet, kw))
return True
def _decrefQueue(self, queue):
queue_id = id(queue)
queue_dict = self.queue_dict
if queue_dict[queue_id] == 1:
queue_dict.pop(queue_id)
else:
queue_dict[queue_id] -= 1
def _increfQueue(self, queue):
queue_id = id(queue)
queue_dict = self.queue_dict
try:
queue_dict[queue_id] += 1
except KeyError:
queue_dict[queue_id] = 1
@giant_lock
def register(self, conn, msg_id, queue):
"""Register an expectation for a reply."""
self.message_table.setdefault(id(conn), {})[msg_id] = queue
self._increfQueue(queue)
def unregister(self, conn):
""" Unregister a connection and put fake packet in queues to unlock
threads expecting responses from that connection """
notified_set = set()
_decrefQueue = self._decrefQueue
self.lock_acquire()
try:
message_table = self.message_table.pop(id(conn), EMPTY)
for queue in message_table.itervalues():
if queue is NOBODY:
continue
queue_id = id(queue)
if queue_id not in notified_set:
queue.put((conn, _ConnectionClosed, EMPTY))
notified_set.add(queue_id)
_decrefQueue(queue)
finally:
self.lock_release()
@giant_lock
def forget_queue(self, queue, flush_queue=True):
"""
Forget all pending messages for given queue.
Actually makes them "expected by nobody", so we know we can ignore
them, and not detect it as an error.
flush_queue (boolean, default=True)
All packets in queue get flushed.
"""
# XXX: expensive lookup: we iterate over the whole dict
found = 0
for message_table in self.message_table.itervalues():
for msg_id, t_queue in message_table.iteritems():
if queue is t_queue:
found += 1
message_table[msg_id] = NOBODY
refcount = self.queue_dict.pop(id(queue), 0)
assert refcount == found, (refcount, found)
if flush_queue:
get = queue.get
while True:
try:
get(block=False)
except Empty:
break
def registered(self, conn):
"""Check if a connection is registered into message table."""
return len(self.message_table.get(id(conn), EMPTY)) != 0
@giant_lock
def pending(self, queue):
return not queue.empty() or self.queue_dict.get(id(queue), 0) > 0
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/event.py 0000664 0000000 0000000 00000030007 13465024610 0024265 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
from time import time
from select import epoll, EPOLLIN, EPOLLOUT, EPOLLERR, EPOLLHUP
from errno import EAGAIN, EEXIST, EINTR, ENOENT
from . import logging
from .locking import Lock
@apply
def dictionary_changed_size_during_iteration():
d = {}; i = iter(d); d[0] = 0
try:
next(i)
except RuntimeError as e:
return str(e)
raise AssertionError
class EpollEventManager(object):
"""This class manages connections and events based on epoll(5)."""
_timeout = None
def __init__(self):
self.connection_dict = {}
self.reader_set = set()
self.writer_set = set()
self.epoll = epoll()
self._pending_processing = []
self._trigger_list = []
self._trigger_fd, w = os.pipe()
os.close(w)
self._trigger_lock = Lock()
close_list = []
self._closeAppend = close_list.append
l = Lock()
self._closeAcquire = l.acquire
_release = l.release
def release():
try:
while close_list:
close_list.pop()()
finally:
_release()
self._closeRelease = release
def close(self):
os.close(self._trigger_fd)
for c in self.connection_dict.values():
c.close()
del self.__dict__
def getConnectionList(self):
# XXX: use index
while 1:
# See _poll() about the use of self.connection_dict.itervalues()
try:
return [x for x in self.connection_dict.itervalues()
if not x.isAborted()]
except RuntimeError, e:
if str(e) != dictionary_changed_size_during_iteration:
raise
logging.info("%r", e)
def getClientList(self):
# XXX: use index
return [c for c in self.getConnectionList() if c.isClient()]
def getServerList(self):
# XXX: use index
return [c for c in self.getConnectionList() if c.isServer()]
def getConnectionListByUUID(self, uuid):
""" Return the connection associated to the UUID, None if the UUID is
None, invalid or not found"""
# XXX: use index
# XXX: consider remove UUID from connection and thus this method
if uuid is None:
return None
result = []
append = result.append
for conn in self.getConnectionList():
if conn.getUUID() == uuid:
append(conn)
return result
# epoll_wait always waits for EPOLLERR & EPOLLHUP so we're forced
# to unregister when we want to ignore all events for a connection.
def register(self, conn, timeout_only=False):
fd = conn.getConnector().getDescriptor()
self.connection_dict[fd] = conn
if timeout_only:
self.wakeup()
else:
self.epoll.register(fd)
self.addReader(conn)
def unregister(self, conn, close=False):
new_pending_processing = [x for x in self._pending_processing
if x is not conn]
# Check that we removed at most one entry from
# self._pending_processing .
assert len(new_pending_processing) > len(self._pending_processing) - 2
self._pending_processing = new_pending_processing
connector = conn.getConnector()
fd = connector.getDescriptor()
try:
del self.connection_dict[fd]
self.epoll.unregister(fd)
except KeyError:
pass
except IOError, e:
if e.errno != ENOENT:
raise
else:
self.reader_set.discard(fd)
self.writer_set.discard(fd)
if close:
self._closeAppend(connector.shutdown())
if self._closeAcquire(0):
self._closeRelease()
return
if close:
# The connection is not registered, so do not wait for epoll
# to wake up (which may not even happen, and lead to EMFILE).
connector.shutdown()()
def isIdle(self):
return not (self._pending_processing or self.writer_set)
def _addPendingConnection(self, conn):
pending_processing = self._pending_processing
if conn not in pending_processing:
pending_processing.append(conn)
def poll(self, blocking=1):
if not self._pending_processing:
# Fetch messages from polled file descriptors
self._poll(blocking)
if not self._pending_processing:
return
to_process = self._pending_processing.pop(0)
try:
to_process.process()
finally:
# ...and requeue if there are pending messages
if to_process.hasPendingMessages():
self._addPendingConnection(to_process)
# Non-blocking call: as we handled a packet, we should just offer
# poll a chance to fetch & send already-available data, but it must
# not delay us.
self._poll(0)
def _poll(self, blocking):
if blocking:
# self.connection_dict may be changed at any time by another thread,
# which may cause itervalues() to fail. But this happens so rarely,
# that for performance reasons, we prefer to retry, rather than:
# - protect self.connection_dict with a lock
# - or iterate over an atomic copy.
while 1:
try:
timeout = self._timeout
timeout_object = self
for conn in self.connection_dict.itervalues():
t = conn.getTimeout()
if t and (timeout is None or t < timeout):
timeout = t
timeout_object = conn
break
except RuntimeError, e:
if str(e) != dictionary_changed_size_during_iteration:
raise
logging.info("%r", e)
# Make sure epoll_wait does not return too early, because it has a
# granularity of 1ms and Python 2.7 rounds the timeout towards zero.
# See also https://bugs.python.org/issue20452 (fixed in Python 3).
blocking = .001 + max(0, timeout - time()) if timeout else -1
# From this point, and until we have processed all fds returned by
# epoll, we must prevent any fd from being closed, because they could
# be reallocated by new connection, either by this thread or by another.
# Sockets to close are queued, and they're really closed in the
# 'finally' clause.
self._closeAcquire()
try:
event_list = self.epoll.poll(blocking)
except IOError, exc:
if exc.errno in (0, EAGAIN):
logging.info('epoll.poll triggered undocumented error %r',
exc.errno)
elif exc.errno != EINTR:
raise
return
else:
if event_list:
wlist = []
elist = []
for fd, event in event_list:
if event & EPOLLIN:
try:
conn = self.connection_dict[fd]
except KeyError:
continue
if conn.readable():
self._addPendingConnection(conn)
if event & EPOLLOUT:
wlist.append(fd)
if event & (EPOLLERR | EPOLLHUP):
elist.append(fd)
for fd in wlist:
try:
conn = self.connection_dict[fd]
except KeyError:
continue
conn.writable()
for fd in elist:
try:
conn = self.connection_dict[fd]
except KeyError:
if fd == self._trigger_fd:
with self._trigger_lock:
self.epoll.unregister(fd)
action_list = self._trigger_list
try:
while action_list:
action_list.pop(0)()
finally:
del action_list[:]
continue
if conn.readable():
self._addPendingConnection(conn)
return
finally:
self._closeRelease()
if blocking > 0:
logging.debug('timeout triggered for %r', timeout_object)
timeout_object.onTimeout()
def onTimeout(self):
on_timeout = self._on_timeout
del self._on_timeout
self._timeout = None
on_timeout()
def setTimeout(self, *args):
self._timeout, self._on_timeout = args
def wakeup(self, *actions):
with self._trigger_lock:
self._trigger_list += actions
try:
self.epoll.register(self._trigger_fd)
except IOError, e:
# Ignore if 'wakeup' is called several times in a row.
if e.errno != EEXIST:
raise
def addReader(self, conn):
connector = conn.getConnector()
assert connector is not None, conn.whoSetConnector()
fd = connector.getDescriptor()
if fd not in self.reader_set:
self.reader_set.add(fd)
self.epoll.modify(fd, EPOLLIN | (
fd in self.writer_set and EPOLLOUT))
def removeReader(self, conn):
connector = conn.getConnector()
assert connector is not None, conn.whoSetConnector()
fd = connector.getDescriptor()
if fd in self.reader_set:
self.reader_set.remove(fd)
self.epoll.modify(fd, fd in self.writer_set and EPOLLOUT)
def addWriter(self, conn):
connector = conn.getConnector()
assert connector is not None, conn.whoSetConnector()
fd = connector.getDescriptor()
if fd not in self.writer_set:
self.writer_set.add(fd)
self.epoll.modify(fd, EPOLLOUT | (
fd in self.reader_set and EPOLLIN))
def removeWriter(self, conn):
connector = conn.getConnector()
assert connector is not None, conn.whoSetConnector()
fd = connector.getDescriptor()
if fd in self.writer_set:
self.writer_set.remove(fd)
self.epoll.modify(fd, fd in self.reader_set and EPOLLIN)
def log(self):
logging.info('Event Manager:')
logging.info(' Readers: %r', list(self.reader_set))
logging.info(' Writers: %r', list(self.writer_set))
logging.info(' Connections:')
pending_set = set(self._pending_processing)
for fd, conn in self.connection_dict.items():
logging.info(' %r: %r (pending=%r)', fd, conn,
conn in pending_set)
for request_dict, handler in conn._handlers._pending:
handler = handler.__class__.__name__
for msg_id, (klass, kw) in sorted(request_dict.items()):
logging.info(' #0x%04x %s (%s)', msg_id,
klass.__name__, handler)
# Default to EpollEventManager.
EventManager = EpollEventManager
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/exception.py 0000664 0000000 0000000 00000001605 13465024610 0025144 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
class NeoException(Exception):
pass
class PrimaryElected(NeoException):
pass
class PrimaryFailure(NeoException):
pass
class StoppedOperation(NeoException):
pass
class NodeNotReady(NeoException):
pass
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/handler.py 0000664 0000000 0000000 00000031747 13465024610 0024575 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import sys
from collections import deque
from operator import itemgetter
from . import logging
from .connection import ConnectionClosed
from .exception import PrimaryElected
from .protocol import (NodeStates, NodeTypes, Packets, uuid_str,
Errors, BackendNotImplemented, NonReadableCell, NotReadyError,
PacketMalformedError, ProtocolError, UnexpectedPacketError)
from .util import cached_property
class AnswerDenied(Exception):
"""Helper exception to stop packet processing and answer a Denied error"""
class DelayEvent(Exception):
pass
class EventHandler(object):
"""This class handles events."""
def __new__(cls, app, *args, **kw):
try:
return app._handlers[cls]
except AttributeError: # for BackupApplication
self = object.__new__(cls)
except KeyError:
self = object.__new__(cls)
if cls.__init__ is object.__init__:
app._handlers[cls] = self
self.app = app
return self
def __repr__(self):
return self.__class__.__name__
def __unexpectedPacket(self, conn, packet, message=None):
"""Handle an unexpected packet."""
if message is None:
message = 'unexpected packet type %s in %s' % (type(packet),
self.__class__.__name__)
else:
message = 'unexpected packet: %s in %s' % (message,
self.__class__.__name__)
logging.error(message)
conn.answer(Errors.ProtocolError(message))
conn.abort()
def dispatch(self, conn, packet, kw={}):
"""This is a helper method to handle various packet types."""
try:
conn.setPeerId(packet.getId())
try:
method = getattr(self, packet.handler_method_name)
except AttributeError:
raise UnexpectedPacketError('no handler found')
args = packet._args
method(conn, *args, **kw)
except DelayEvent, e:
assert not kw, kw
self.getEventQueue().queueEvent(method, conn, args, *e.args)
except UnexpectedPacketError, e:
if not conn.isClosed():
self.__unexpectedPacket(conn, packet, *e.args)
except NotReadyError, message:
if not conn.isClosed():
if not message.args:
message = 'Retry Later'
message = str(message)
conn.answer(Errors.NotReady(message))
conn.abort()
except ProtocolError, message:
if not conn.isClosed():
message = str(message)
conn.answer(Errors.ProtocolError(message))
conn.abort()
except BackendNotImplemented, message:
m = message[0]
conn.answer(Errors.BackendNotImplemented(
"%s.%s does not implement %s"
% (m.im_class.__module__, m.im_class.__name__, m.__name__)))
except NonReadableCell, e:
conn.answer(Errors.NonReadableCell())
except AnswerDenied, e:
conn.answer(Errors.Denied(str(e)))
except AssertionError:
e = sys.exc_info()
try:
try:
conn.close()
except Exception:
logging.exception("")
raise e[0], e[1], e[2]
finally:
del e
def checkClusterName(self, name):
# raise an exception if the given name mismatch the current cluster name
if self.app.name != name:
logging.error('reject an alien cluster')
raise ProtocolError('invalid cluster name')
# Network level handlers
def packetReceived(self, *args):
"""Called when a packet is received."""
self.dispatch(*args)
def connectionStarted(self, conn):
"""Called when a connection is started."""
logging.debug('connection started for %r', conn)
def connectionCompleted(self, conn):
"""Called when a connection is completed."""
logging.debug('connection completed for %r (from %s:%u)',
conn, *conn.getConnector().getAddress())
def connectionFailed(self, conn):
"""Called when a connection failed."""
logging.debug('connection failed for %r', conn)
def connectionClosed(self, conn):
"""Called when a connection is closed by the peer."""
logging.debug('connection closed for %r', conn)
self.connectionLost(conn, NodeStates.DOWN)
def connectionLost(self, conn, new_state):
""" this is a method to override in sub-handlers when there is no need
to make distinction from the kind event that closed the connection """
pass
# Packet handlers.
def notPrimaryMaster(self, conn, primary, known_master_list):
nm = self.app.nm
for address in known_master_list:
nm.createMaster(address=address)
if primary is not None:
primary = known_master_list[primary]
assert primary != self.app.server
raise PrimaryElected(nm.getByAddress(primary))
def _acceptIdentification(*args):
pass
def acceptIdentification(self, conn, node_type, uuid, your_uuid):
app = self.app
node = app.nm.getByAddress(conn.getAddress())
assert node.getConnection() is conn, (node.getConnection(), conn)
if node.getType() == node_type:
if node_type == NodeTypes.MASTER:
other = app.nm.getByUUID(uuid)
if other is not None:
other.setUUID(None)
node.setUUID(uuid)
node.setRunning()
if your_uuid is None:
raise ProtocolError('No UUID supplied')
logging.info('connected to a primary master node')
app.setUUID(your_uuid)
app.id_timestamp = None
elif node.getUUID() != uuid or app.uuid != your_uuid != None:
raise ProtocolError('invalid uuids')
node.setIdentified()
self._acceptIdentification(node)
return
conn.close()
def notifyNodeInformation(self, conn, *args):
app = self.app
app.nm.update(app, *args)
def ping(self, conn):
conn.answer(Packets.Pong())
def pong(self, conn):
pass
def closeClient(self, conn):
conn.server = False
if not conn.client:
conn.close()
def flushLog(self, conn):
logging.flush()
# Error packet handlers.
def error(self, conn, code, message, **kw):
try:
getattr(self, Errors[code])(conn, message)
except (AttributeError, ValueError):
raise UnexpectedPacketError(message)
# XXX: For some errors, the connection should have been closed by the remote
# peer. But what happens if it's not the case because of some bug ?
def protocolError(self, conn, message):
logging.error('protocol error: %s', message)
def notReadyError(self, conn, message):
logging.error('not ready: %s', message)
def timeoutError(self, conn, message):
logging.error('timeout error: %s', message)
def ack(self, conn, message):
logging.debug("no error message: %s", message)
def backendNotImplemented(self, conn, message):
raise NotImplementedError(message)
class MTEventHandler(EventHandler):
"""Base class of handler implementations for MTClientConnection"""
@cached_property
def dispatcher(self):
return self.app.dispatcher
def dispatch(self, conn, packet, kw={}):
assert conn.lock._is_owned() # XXX: see also lockCheckWrapper
super(MTEventHandler, self).dispatch(conn, packet, kw)
def packetReceived(self, conn, packet, kw={}):
"""Redirect all received packet to dispatcher thread."""
if packet.isResponse():
if packet.poll_thread:
self.dispatch(conn, packet, kw)
kw = {}
if not (self.dispatcher.dispatch(conn, packet.getId(), packet, kw)
or type(packet) is Packets.Pong or conn.isClosed()):
raise ProtocolError('Unexpected response packet from %r: %r'
% (conn, packet))
else:
self.dispatch(conn, packet, kw)
def connectionLost(self, conn, new_state):
self.dispatcher.unregister(conn)
def connectionFailed(self, conn):
self.dispatcher.unregister(conn)
def unexpectedInAnswerHandler(*args, **kw):
raise Exception('Unexpected event in an answer handler')
class AnswerBaseHandler(EventHandler):
connectionStarted = unexpectedInAnswerHandler
connectionCompleted = unexpectedInAnswerHandler
connectionFailed = unexpectedInAnswerHandler
timeoutExpired = unexpectedInAnswerHandler
connectionClosed = unexpectedInAnswerHandler
packetReceived = unexpectedInAnswerHandler
protocolError = unexpectedInAnswerHandler
def acceptIdentification(*args):
pass
def connectionClosed(self, conn):
raise ConnectionClosed
class _DelayedConnectionEvent(EventHandler):
# WARNING: This assumes that the connection handler does not change.
handler_method_name = '_func'
__new__ = object.__new__
def __init__(self, func, conn, args):
self._args = args
self._conn = conn
self._func = func
self._msg_id = conn.getPeerId()
def __call__(self):
conn = self._conn
if not conn.isClosed():
msg_id = conn.getPeerId()
try:
self.dispatch(conn, self)
finally:
conn.setPeerId(msg_id)
def __repr__(self):
return '<%s: 0x%x %s>' % (self._func.__name__, self._msg_id, self._conn)
def decode(self):
return self._args
def getEventQueue(self):
raise
def getId(self):
return self._msg_id
class EventQueue(object):
def __init__(self):
self._event_queue = []
self._executing_event = -1
# Stable sort when 2 keys are equal.
# XXX: Is it really useful to keep events with same key ordered
# chronologically ? The caller could use more specific keys. For
# write-locks (by the storage node), the locking tid seems enough.
sortQueuedEvents = (lambda key=itemgetter(0): lambda self:
self._event_queue.sort(key=key))()
def queueEvent(self, func, conn=None, args=(), key=None):
assert self._executing_event < 0, self._executing_event
self._event_queue.append((key, func if conn is None else
_DelayedConnectionEvent(func, conn, args)))
if key is not None:
self.sortQueuedEvents()
def sortAndExecuteQueuedEvents(self):
if self._executing_event < 0:
self.sortQueuedEvents()
self.executeQueuedEvents()
else:
# We can't sort events when they're being processed.
self._executing_event = 1
def executeQueuedEvents(self):
# Not reentrant. When processing a queued event, calling this method
# only tells the caller to retry all events from the beginning, because
# events for the same connection must be processed in chronological
# order.
queue = self._event_queue
if queue: # return quickly if the queue is empty
self._executing_event += 1
if self._executing_event:
return
done = []
while 1:
try:
for i, event in enumerate(queue):
try:
event[1]()
done.append(i)
except DelayEvent:
pass
if self._executing_event:
break
else:
break
finally:
while done:
del queue[done.pop()]
self._executing_event = 0
# What sortAndExecuteQueuedEvents could not do immediately
# is done here:
if event[0] is not None:
self.sortQueuedEvents()
self._executing_event = -1
def logQueuedEvents(self):
if self._event_queue:
logging.info(" Pending events:")
for event in self._event_queue:
logging.info(' %r', event)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/interfaces.py 0000664 0000000 0000000 00000007043 13465024610 0025273 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2015-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import inspect
def check_signature(reference, function):
# args, varargs, varkw, defaults
A, B, C, D = inspect.getargspec(reference)
a, b, c, d = inspect.getargspec(function)
x = len(A) - len(a)
if x < 0: # ignore extra default parameters
if B or x + len(d) < 0:
return False
del a[x:]
d = d[:x] or None
elif x: # different signature
return a == A[:-x] and (b or a and c) and (d or ()) == (D or ())[:-x]
return a == A and (b or not B) and (c or not C) and d == D
def implements(obj, ignore=()):
ignore = set(ignore)
not_implemented = []
wrong_signature = []
if isinstance(obj, type):
tobj = obj
else:
tobj = type(obj)
mro = tobj.mro()
mro.reverse()
base = []
for name in dir(obj):
for x in mro:
try:
x = getattr(x, name)
break
except AttributeError:
pass
if hasattr(x, "__abstract__") or hasattr(x, "__requires__"):
base.append((name, x.__func__))
try:
while 1:
name, func = base.pop()
x = getattr(obj, name)
if type(getattr(x, '__self__', None)) is tobj:
x = x.__func__
if x is func:
try:
x = func.__requires__
except AttributeError:
try:
ignore.remove(name)
except KeyError:
not_implemented.append(name)
else:
base.extend((x.__name__, x) for x in x)
elif not check_signature(func, x):
wrong_signature.append(name)
except IndexError: # base empty
assert not ignore, ignore
assert not not_implemented, not_implemented
assert not wrong_signature, wrong_signature
return obj
def _stub(func):
args, varargs, varkw, _ = inspect.getargspec(func)
if varargs:
args.append("*" + varargs)
if varkw:
args.append("**" + varkw)
exec "def %s(%s): raise NotImplementedError\nf = %s" % (
func.__name__, ",".join(args), func.__name__)
return f
def abstract(func):
f = _stub(func)
f.__abstract__ = 1
f.__defaults__ = func.__defaults__
f.__doc__ = func.__doc__
return f
def requires(*args):
for func in args:
# Tolerate useless abstract decoration on required method (e.g. it
# simplifies the implementation of a fallback decorator), but remove
# marker since it does not need to be implemented if it's required
# by a method that is overridden.
try:
del func.__abstract__
except AttributeError:
func.__code__ = _stub(func).__code__
def decorator(func):
func.__requires__ = args
return func
return decorator
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/locking.py 0000664 0000000 0000000 00000015276 13465024610 0024605 0 ustar 00root root 0000000 0000000 import os
import sys
import threading
import traceback
from collections import deque
from time import time
from Queue import Empty
"""
Verbose locking classes.
Python threading module contains a simple logging mechanism, but:
- It's limitted to RLock class
- It's enabled instance by instance
- Choice to log or not is done at instantiation
- It does not emit any log before trying to acquire lock
This file defines a VerboseLock class implementing basic lock API and
logging in appropriate places with extensive details.
It can be toggled globally by setting NEO_VERBOSE_LOCKING environment
variable to a non-empty value before this module is loaded.
There is no overhead at all when disabled (passthrough to threading
classes).
"""
class LockUser(object):
def __init__(self, message=None, level=0):
t = threading.currentThread()
ident = getattr(t, 'node_name', t.name)
# This class is instantiated from a place desiring to known what
# called it.
# limit=1 would return execution position in this method
# limit=2 would return execution position in caller
# limit=3 returns execution position in caller's caller
# Additionnal level value (should be positive only) can be used when
# more intermediate calls are involved
self.stack = stack = traceback.extract_stack()[:-2-level]
path, line_number, func_name, line = stack[-1]
# Simplify path. Only keep 3 last path elements. It is enough for
# current Neo directory structure.
path = os.path.join('...', *path.split(os.path.sep)[-3:])
self.time = time()
if message is not None:
self.ident = "%s@%r %s:%s %s" % (
ident, self.time, path, line_number, line)
self.note(message)
self.ident = ident
def __eq__(self, other):
return isinstance(other, self.__class__) and self.ident == other.ident
def __repr__(self):
return "%s@%r" % (self.ident, self.time)
def formatStack(self):
return ''.join(traceback.format_list(self.stack))
def note():
write = sys.stderr.write
flush = sys.stderr.flush
def note(self, message):
write("[%s] %s\n" % (self.ident, message))
flush()
return note
note = note()
class VerboseLockBase(object):
_error_class = threading.ThreadError
_release_error = 'release unlocked lock'
def __init__(self, check_owner, name=None, verbose=None):
self._check_owner = check_owner
self._name = name or '<%s@%X>' % (self.__class__.__name__, id(self))
self.owner = None
self.waiting = []
LockUser(repr(self) + " created", 1)
def acquire(self, blocking=1):
owner = self.owner if self._locked() else None
me = LockUser("%s.acquire(%s). Owned by %r. Waiting: %r"
% (self, blocking, owner, self.waiting))
if blocking:
if self._check_owner and me == owner:
me.note("I already own this lock: %r" % owner)
me.note("Owner traceback:\n%s" % owner.formatStack())
me.note("My traceback:\n%s" % me.formatStack())
self.waiting.append(me)
try:
locked = self.lock.acquire(blocking)
finally:
if blocking:
self.waiting.remove(me)
if locked:
self.owner = me
me.note("Lock granted. Waiting: " + repr(self.waiting))
return locked
__enter__ = acquire
def release(self):
me = LockUser("%s.release(). Waiting: %r" % (self, self.waiting))
try:
return self.lock.release()
except self._error_class:
t, v, tb = sys.exc_info()
if str(v) == self._release_error:
raise t, "%s %s (%s)" % (v, self, me), tb
raise
def __exit__(self, t, v, tb):
self.release()
def _locked(self):
raise NotImplementedError
def __repr__(self):
return self._name
class VerboseRLock(VerboseLockBase):
_error_class = RuntimeError
_release_error = 'cannot release un-acquired lock'
def __init__(self, **kw):
super(VerboseRLock, self).__init__(check_owner=False, **kw)
self.lock = threading.RLock()
def _locked(self):
return self.lock._RLock__block.locked()
def _is_owned(self):
return self.lock._is_owned()
class VerboseLock(VerboseLockBase):
def __init__(self, check_owner=True, **kw):
super(VerboseLock, self).__init__(check_owner, **kw)
self.lock = threading.Lock()
def locked(self):
return self.lock.locked()
_locked = locked
class VerboseSemaphore(VerboseLockBase):
def __init__(self, value=1, check_owner=True, **kw):
super(VerboseSemaphore, self).__init__(check_owner, **kw)
self.lock = threading.Semaphore(value)
def _locked(self):
return not self.lock._Semaphore__value
if os.getenv('NEO_VERBOSE_LOCKING'):
Lock = VerboseLock
RLock = VerboseRLock
Semaphore = VerboseSemaphore
else:
Lock = threading.Lock
RLock = threading.RLock
Semaphore = threading.Semaphore
class SimpleQueue(object):
"""
Similar to Queue.Queue but with simpler locking scheme, reducing lock
contention on "put" (benchmark shows 60% less time spent in "put").
As a result:
- only a single consumer possible ("get" vs. "get" race condition)
- only a single producer possible ("put" vs. "put" race condition)
- no blocking size limit possible
- no consumer -> producer notifications (task_done/join API)
Queue is on the critical path: any moment spent here increases client
application wait for object data, transaction completion, etc.
As we have a single consumer (client application's thread) and a single
producer (lib.dispatcher, which can be called from several threads but
serialises calls internally) for each queue, Queue.Queue's locking scheme
can be relaxed to reduce latency.
"""
__slots__ = ('_lock', '_unlock', '_popleft', '_append', '_queue')
def __init__(self):
lock = Lock()
self._lock = lock.acquire
self._unlock = lock.release
self._queue = queue = deque()
self._popleft = queue.popleft
self._append = queue.append
def get(self, block):
if block:
self._lock(False)
while True:
try:
return self._popleft()
except IndexError:
if not block:
raise Empty
self._lock()
def put(self, item):
self._append(item)
self._lock(False)
self._unlock()
def empty(self):
return not self._queue
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/logger.py 0000664 0000000 0000000 00000027345 13465024610 0024436 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import deque
from functools import wraps
from logging import getLogger, Formatter, Logger, StreamHandler, \
DEBUG, WARNING
from time import time
from traceback import format_exception
import bz2, inspect, neo, os, signal, sqlite3, sys, threading
from .util import nextafter
INF = float('inf')
# Stats for storage node of matrix test (py2.7:SQLite)
RECORD_SIZE = ( 234360832 # extra memory used
- 16777264 # sum of raw data ('msg' attribute)
) // 187509 # number of records
FMT = ('%(asctime)s %(levelname)-9s %(name)-10s'
' [%(module)14s:%(lineno)3d] \n%(message)s')
class _Formatter(Formatter):
def formatTime(self, record, datefmt=None):
return Formatter.formatTime(self, record,
'%Y-%m-%d %H:%M:%S') + '.%04d' % (record.msecs * 10)
def format(self, record):
lines = iter(Formatter.format(self, record).splitlines())
prefix = lines.next()
return '\n'.join(prefix + line for line in lines)
class PacketRecord(object):
args = None
levelno = DEBUG
__init__ = property(lambda self: self.__dict__.update)
class NEOLogger(Logger):
default_root_handler = StreamHandler()
default_root_handler.setFormatter(_Formatter(FMT))
def __init__(self):
Logger.__init__(self, None)
self.parent = root = getLogger()
if not root.handlers:
root.addHandler(self.default_root_handler)
self.__reset()
self._nid_dict = {}
self._async = set()
l = threading.Lock()
self._acquire = l.acquire
release = l.release
def _release():
try:
while self._async:
self._async.pop()(self)
finally:
release()
self._release = _release
self.backlog()
def __reset(self):
self._db = None
self._node = {}
self._record_queue = deque()
self._record_size = 0
def __enter__(self):
self._acquire()
return self._db
def __exit__(self, t, v, tb):
self._release()
def __async(wrapped):
def wrapper(self):
self._async.add(wrapped)
if self._acquire(0):
self._release()
return wraps(wrapped)(wrapper)
@__async
def reopen(self):
if self._db is None:
return
q = self._db.execute
if not q("SELECT 1 FROM packet LIMIT 1").fetchone():
q("DROP TABLE protocol")
# DROP TABLE already replaced previous data with zeros,
# so VACUUM is not really useful. But here, it should be free.
q("VACUUM")
self._setup(q("PRAGMA database_list").fetchone()[2])
@__async
def flush(self):
if self._db is None:
return
try:
for r in self._record_queue:
self._emit(r)
finally:
# Always commit, to not lose any record that we could emit.
self.commit()
self._record_queue.clear()
self._record_size = 0
def commit(self):
try:
self._db.commit()
except sqlite3.OperationalError as e:
x = e.args[0]
if x != 'database is locked':
raise
sys.stderr.write('%s: retrying to emit log...' % x)
while 1:
try:
self._db.commit()
break
except sqlite3.OperationalError as e:
if e.args[0] != x:
raise
sys.stderr.write(' ok\n')
def backlog(self, max_size=1<<24, max_packet=None):
with self:
self._max_packet = max_packet
self._max_size = max_size
if max_size is None:
self.flush()
else:
q = self._record_queue
while max_size < self._record_size:
self._record_size -= RECORD_SIZE + len(q.popleft().msg)
def _setup(self, filename=None, reset=False):
from . import protocol as p
global packb, uuid_str
packb = p.packb
uuid_str = p.uuid_str
if self._db is not None:
self._db.close()
if not filename:
self.__reset()
return
if filename:
self._db = sqlite3.connect(filename, check_same_thread=False)
q = self._db.execute
if self._max_size is None:
q("PRAGMA synchronous = OFF")
if 1: # Not only when logging everything,
# but also for interoperability with logrotate.
q("PRAGMA journal_mode = MEMORY")
for t, columns in (('log', (
"level INTEGER NOT NULL",
"pathname TEXT",
"lineno INTEGER",
"msg TEXT",
)),
('packet', (
"msg_id INTEGER NOT NULL",
"code INTEGER NOT NULL",
"peer TEXT NOT NULL",
"body BLOB",
))):
if reset:
q('DROP TABLE IF EXISTS ' + t)
q('DROP TABLE IF EXISTS %s1' % t)
elif (2, 'name', 'TEXT', 0, None, 0) in q(
"PRAGMA table_info(%s)" % t):
q("ALTER TABLE %s RENAME TO %s1" % (t, t))
columns = (
"date REAL PRIMARY KEY",
"node INTEGER",
) + columns
q("CREATE TABLE IF NOT EXISTS %s (\n %s) WITHOUT ROWID"
% (t, ',\n '.join(columns)))
q("""CREATE TABLE IF NOT EXISTS protocol (
date REAL PRIMARY KEY,
text BLOB NOT NULL) WITHOUT ROWID
""")
q("""CREATE TABLE IF NOT EXISTS node (
id INTEGER PRIMARY KEY,
name TEXT,
cluster TEXT,
nid INTEGER)
""")
with open(inspect.getsourcefile(p)) as p:
p = buffer(bz2.compress(p.read()))
x = q("SELECT text FROM protocol ORDER BY date DESC LIMIT 1"
).fetchone()
if (x and x[0]) != p:
# In case of multithreading, we can have locally unsorted
# records so we can't find the oldest one (it may not be
# pushed to queue): let's use 0 on log rotation.
x = time() if x else 0
q("INSERT INTO protocol VALUES (?,?)", (x, p))
self._db.commit()
self._node = {x[1:]: x[0] for x in q("SELECT * FROM node")}
def setup(self, filename=None, reset=False):
with self:
self._setup(filename, reset)
__del__ = setup
def fork(self):
with self:
pid = os.fork()
if pid:
return pid
self._setup()
def isEnabledFor(self, level):
return True
def _emit(self, r):
try:
nid = self._node[r._node]
except KeyError:
if r._node == (None, None, None):
nid = None
else:
try:
nid = 1 + max(x for x in self._node.itervalues()
if x is not None)
except ValueError:
nid = 0
self._db.execute("INSERT INTO node VALUES (?,?,?,?)",
(nid,) + r._node)
self._node[r._node] = nid
if type(r) is PacketRecord:
ip, port = r.addr
peer = ('%s %s ([%s]:%s)' if ':' in ip else '%s %s (%s:%s)') % (
'>' if r.outgoing else '<', uuid_str(r.uuid), ip, port)
msg = r.msg
if msg is not None:
msg = buffer(msg if type(msg) is bytes else packb(msg))
q = "INSERT INTO packet VALUES (?,?,?,?,?,?)"
x = [r.created, nid, r.msg_id, r.code, peer, msg]
else:
pathname = os.path.relpath(r.pathname, *neo.__path__)
q = "INSERT INTO log VALUES (?,?,?,?,?,?)"
x = [r.created, nid, r.levelno, pathname, r.lineno, r.msg]
while 1:
try:
self._db.execute(q, x)
break
except sqlite3.IntegrityError:
x[0] = nextafter(x[0], INF)
def _queue(self, record):
name = self.name and str(self.name)
record._node = (name,) + self._nid_dict.get(name, (None, None))
self._acquire()
try:
if self._max_size is None:
self._emit(record)
self.commit()
else:
self._record_size += RECORD_SIZE + len(record.msg or '')
q = self._record_queue
q.append(record)
if record.levelno < WARNING:
while self._max_size < self._record_size:
self._record_size -= RECORD_SIZE + len(q.popleft().msg)
else:
self.flush()
finally:
self._release()
def callHandlers(self, record):
if self._db is not None:
record.msg = record.getMessage()
record.args = None
if record.exc_info:
record.msg = (record.msg and record.msg + '\n') + ''.join(
format_exception(*record.exc_info)).strip()
record.exc_info = None
self._queue(record)
if Logger.isEnabledFor(self, record.levelno):
record.name = self.name or 'NEO'
self.parent.callHandlers(record)
def packet(self, connection, packet, outgoing):
if self._db is not None:
if self._max_packet and self._max_packet < packet.size:
args = None
else:
args = packet._args
try:
hash(args)
except TypeError:
args = packb(args)
self._queue(PacketRecord(
created=time(),
msg_id=packet._id,
code=packet._code,
outgoing=outgoing,
uuid=connection.getUUID(),
addr=connection.getAddress(),
msg=args))
def node(self, *cluster_nid):
name = self.name and str(self.name)
prev = self._nid_dict.get(name)
if prev != cluster_nid:
from .protocol import uuid_str
self.info('Node ID: %s', uuid_str(cluster_nid[1]))
self._nid_dict[name] = cluster_nid
@property
def resetNids(self):
return self._nid_dict.clear
logging = NEOLogger()
signal.signal(signal.SIGRTMIN, lambda signum, frame: logging.flush())
signal.signal(signal.SIGRTMIN+1, lambda signum, frame: logging.reopen())
def patch():
def fork():
with logging:
pid = os_fork()
if not pid:
logging._setup()
return pid
os_fork = os.fork
os.fork = fork
patch()
del patch
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/node.py 0000664 0000000 0000000 00000050060 13465024610 0024072 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import errno, json, os
from time import time
from . import attributeTracker, logging
from .handler import DelayEvent, EventQueue
from .protocol import formatNodeList, uuid_str, \
NodeTypes, NodeStates, NotReadyError, ProtocolError
class Node(object):
"""This class represents a node."""
_connection = None
_identified = False
devpath = ()
id_timestamp = None
def __init__(self, manager, address=None, uuid=None, state=NodeStates.DOWN):
self._state = state
self._address = address
self._uuid = uuid
self._manager = manager
self._last_state_change = time()
manager.add(self)
@property
def send(self):
assert self.isConnected(), 'Not connected'
return self._connection.send
@property
def ask(self):
assert self.isConnected(), 'Not connected'
return self._connection.ask
@property
def answer(self):
assert self.isConnected(), 'Not connected'
return self._connection.answer
def getLastStateChange(self):
return self._last_state_change
def getState(self):
return self._state
def setState(self, new_state):
if self._state == new_state:
return
if new_state == NodeStates.UNKNOWN:
self._manager.remove(self)
self._state = new_state
else:
old_state = self._state
self._state = new_state
self._manager._updateState(self, old_state)
self._last_state_change = time()
def setAddress(self, address):
if self._address == address:
return
old_address = self._address
self._address = address
self._manager._updateAddress(self, old_address)
def getAddress(self):
return self._address
def setUUID(self, uuid):
if self._uuid == uuid:
return
old_uuid = self._uuid
self._uuid = uuid
self._manager._updateUUID(self, old_uuid)
if self._connection is not None:
self._connection.setUUID(uuid)
def getUUID(self):
return self._uuid
def onConnectionClosed(self):
"""
Callback from node's connection when closed
"""
assert self._connection is not None
del self._connection
self._identified = False
def setConnection(self, connection, force=None):
"""
Define the connection that is currently available to this node.
If there is already a connection set, 'force' must be given:
the new connection replaces the old one if it is true. In any case,
the node must be managed by the same handler for the client and
server parts.
"""
assert connection.getUUID() in (None, self._uuid), connection
connection.setUUID(self._uuid)
conn = self._connection
if conn is None:
self._connection = connection
if connection.isServer():
self.setIdentified()
else:
assert force is not None, (conn,
attributeTracker.whoSet(self, '_connection'))
# The test on peer_id is there to protect against buggy nodes.
# XXX: handler comparison does not cover all cases: there may
# be a pending handler change, which won't be detected, or a future
# handler change which is not prevented. Complete implementation
# should allow different handlers for each connection direction,
# with in-packets client/server indicators to decide which handler
# (server-ish or client-ish) to use. There is currently no need for
# the full-fledged functionality, and it is simpler this way.
if not force or conn.getPeerId() is not None or \
type(conn.getHandler()) is not type(connection.getHandler()):
# It may also happen in case of a network failure that is only
# noticed by the peer. We'd like to accept the new connection
# immediately but it's quite complicated. At worst (keepalive
# packets dropped), 'conn' will be closed in ~ 1 minute.
raise ProtocolError("already connected")
def on_closed():
self._connection = connection
assert connection.isServer()
self.setIdentified()
conn.setOnClose(on_closed)
conn.close()
connection.setOnClose(self.onConnectionClosed)
def getConnection(self):
"""
Returns the connection to the node if available
"""
assert self._connection is not None
return self._connection
def isConnected(self, connecting=False):
"""
Returns True is a connection is established with the node
"""
return self._connection is not None and (connecting or
not self._connection.connecting)
def setIdentified(self):
assert self._connection is not None
self._identified = True
def isIdentified(self):
"""
Returns True if identification packets have been exchanged
"""
return self._identified
def __repr__(self):
addr = self._address
return '<%s(uuid=%s%s, state=%s, connection=%r%s) at %x>' % (
self.__class__.__name__,
uuid_str(self._uuid),
', address=' + ('[%s]:%s' if ':' in addr[0] else '%s:%s') % addr
if addr else '',
self._state,
self._connection,
'' if self._identified else ', not identified',
id(self),
)
def asTuple(self):
""" Returned tuple is intended to be used in protocol encoders """
return (self.getType(), self._address, self._uuid, self._state,
self.id_timestamp)
def __gt__(self, node):
# sort per UUID if defined
if self._uuid is not None:
return self._uuid > node._uuid
return self._address > node._address
def whoSetState(self):
"""
Debugging method: call this method to know who set the current
state value.
"""
return attributeTracker.whoSet(self, '_state')
attributeTracker.track(Node)
class MasterDB(object):
"""
Manages accesses to master's address database.
"""
def __init__(self, path):
self._path = path
try:
with open(path) as db:
self._set = set(map(tuple, json.load(db)))
except IOError, e:
if e.errno != errno.ENOENT:
raise
self._set = set()
self._save(True)
def _save(self, raise_on_error=False):
tmp = self._path + '#neo#'
try:
with open(tmp, 'w') as db:
json.dump(list(self._set), db)
os.rename(tmp, self._path)
except EnvironmentError:
if raise_on_error:
raise
logging.exception('failed saving list of master nodes to %r',
self._path)
finally:
try:
os.remove(tmp)
except OSError:
pass
def remove(self, addr):
if addr in self._set:
self._set.remove(addr)
self._save()
def addremove(self, old, new):
assert old != new
if None is not new not in self._set:
self._set.add(new)
elif old not in self._set:
return
self._set.discard(old)
self._save()
def __repr__(self):
return '<%s@%s: %s>' % (self.__class__.__name__, self._path,
', '.join(sorted(('[%s]:%s' if ':' in x[0] else '%s:%s') % x
for x in self._set)))
def __iter__(self):
return iter(self._set)
class NodeManager(EventQueue):
"""This class manages node status."""
_master_db = None
# TODO: rework getXXXList() methods, filter first by node type
# - getStorageList(identified=True, connected=True, )
# - getList(...)
def __init__(self, master_db=None):
"""
master_db (string)
Path to a file containing master nodes' addresses. Used to automate
master list updates. If not provided, no automation will happen.
"""
self._node_set = set()
self._address_dict = {}
self._uuid_dict = {}
self._type_dict = {}
self._state_dict = {}
if master_db is not None:
self._master_db = db = MasterDB(master_db)
for addr in db:
self.createMaster(address=addr)
self.reset()
close = __init__
def reset(self):
EventQueue.__init__(self)
self._timestamp = 0
def add(self, node):
if node in self._node_set:
logging.warning('adding a known node %r, ignoring', node)
return
assert not node.isUnknown(), node
self._node_set.add(node)
self._updateAddress(node, None)
self._updateUUID(node, None)
self.__updateSet(self._type_dict, None, node.getType(), node)
self.__updateSet(self._state_dict, None, node.getState(), node)
def remove(self, node):
self._node_set.remove(node)
# a node may have not be indexed by uuid or address, eg.:
# - a client or admin node that don't have listening address
self._address_dict.pop(node.getAddress(), None)
# - a master known by address but without UUID
self._uuid_dict.pop(node.getUUID(), None)
self._state_dict[node.getState()].remove(node)
self._type_dict[node.getType()].remove(node)
if node.isMaster() and self._master_db is not None:
self._master_db.remove(node.getAddress())
def __update(self, index_dict, old_key, new_key, node):
""" Update an index from old to new key """
if old_key is not None:
assert index_dict[old_key] is node, '%r is stored as %s, ' \
'moving %r to %s' % (index_dict[old_key], old_key, node,
new_key)
del index_dict[old_key]
if new_key is not None:
assert index_dict.get(new_key, node) is node, 'Adding %r at %r ' \
'would overwrite %r' % (node, new_key, index_dict[new_key])
index_dict[new_key] = node
def _updateAddress(self, node, old_address):
address = node.getAddress()
self.__update(self._address_dict, old_address, address, node)
if node.isMaster() and self._master_db is not None:
self._master_db.addremove(old_address, address)
def _updateUUID(self, node, old_uuid):
self.__update(self._uuid_dict, old_uuid, node.getUUID(), node)
def __updateSet(self, set_dict, old_key, new_key, node):
""" Update a set index from old to new key """
if old_key in set_dict:
set_dict[old_key].remove(node)
set_dict.setdefault(new_key, set()).add(node)
def _updateState(self, node, old_state):
assert not node.isUnknown(), node
self.__updateSet(self._state_dict, old_state, node.getState(), node)
def getList(self, node_filter=None):
if node_filter is None:
return list(self._node_set)
return filter(node_filter, self._node_set)
def getIdentifiedList(self, pool_set=None):
"""
Returns a generator to iterate over identified nodes
pool_set is an iterable of UUIDs allowed
"""
return [x for x in self._node_set if x.isIdentified() and (
pool_set is None or x.getUUID() in pool_set)]
def getConnectedList(self):
"""
Returns a generator to iterate over connected nodes
"""
# TODO: use an index
return [x for x in self._node_set if x.isConnected()]
def getByStateList(self, state):
""" Get a node list filtered per the node state """
return list(self._state_dict.get(state, ()))
def _getTypeList(self, node_type, only_identified=False):
node_set = self._type_dict.get(node_type, ())
if only_identified:
return [x for x in node_set if x.isIdentified()]
return list(node_set)
def getByAddress(self, address):
""" Return the node that match with a given address """
return self._address_dict.get(address, None)
def getByUUID(self, uuid, *id_timestamp):
"""Return the node that matches with a given UUID
If an id timestamp is passed, DelayEvent is raised if identification
must be delayed. This is because we rely only on the notifications from
the master to recognize nodes (otherwise, we could get id conflicts)
and such notifications may be late in some cases, even when the master
expects us to not reject the connection.
"""
node = self._uuid_dict.get(uuid)
if id_timestamp:
id_timestamp, = id_timestamp
if not node or node.id_timestamp != id_timestamp:
if self._timestamp < id_timestamp:
raise DelayEvent
# The peer got disconnected from the master.
raise NotReadyError('unknown by master')
return node
def _createNode(self, klass, address=None, uuid=None, **kw):
by_address = self.getByAddress(address)
by_uuid = self.getByUUID(uuid)
if by_address is None and by_uuid is None:
node = klass(self, address=address, uuid=uuid, **kw)
else:
if by_uuid is None or by_address is by_uuid:
node = by_address
elif by_address is None:
node = by_uuid
else:
raise ValueError('Got different nodes for uuid %s: %r and '
'address %r: %r.' % (uuid_str(uuid), by_uuid, address,
by_address))
if uuid is not None:
node_uuid = node.getUUID()
if node_uuid is None:
node.setUUID(uuid)
elif node_uuid != uuid:
raise ValueError('Expected uuid %s on node %r' % (
uuid_str(uuid), node))
if address is not None:
node_address = node.getAddress()
if node_address is None:
node.setAddress(address)
elif node_address != address:
raise ValueError('Expected address %r on node %r' % (
address, node))
assert node.__class__ is klass, (node.__class__, klass)
return node
def createFromNodeType(self, node_type, **kw):
return self._createNode(NODE_TYPE_MAPPING[node_type], **kw)
def update(self, app, timestamp, node_list):
assert self._timestamp < timestamp, (self._timestamp, timestamp)
self._timestamp = timestamp
added_list = [] if app.id_timestamp is None else None
for node_type, addr, uuid, state, id_timestamp in node_list:
# This should be done here (although klass might not be used in this
# iteration), as it raises if type is not valid.
klass = NODE_TYPE_MAPPING[node_type]
# lookup in current table
node_by_uuid = self.getByUUID(uuid)
node_by_addr = self.getByAddress(addr)
node = node_by_addr or node_by_uuid
log_args = node_type, uuid_str(uuid), addr, state, id_timestamp
if node is None:
assert state != NodeStates.UNKNOWN, (self._node_set,) + log_args
node = self._createNode(klass, address=addr, uuid=uuid,
state=state)
logging.debug('creating node %r', node)
else:
assert isinstance(node, klass), 'node %r is not ' \
'of expected type: %r' % (node, klass)
if None is not node_by_uuid is not node_by_addr is not None:
assert added_list is not None, \
'Discrepancy between node_by_uuid (%r) and ' \
'node_by_addr (%r)' % (node_by_uuid, node_by_addr)
node_by_uuid.setUUID(None)
if state == NodeStates.UNKNOWN:
logging.debug('dropping node %r (%r), found with %s '
'%s %s %s %s', node, node.isConnected(), *log_args)
if node.isConnected():
# Cut this connection, node removed by handler.
# It's important for a storage to disconnect nodes that
# aren't connected to the primary master, in order to
# avoid conflict of node id. The clients will first
# reconnect to the master because they cleared their
# partition table upon disconnection.
node.getConnection().close()
if app.uuid != uuid: # XXX
dropped = app.pt.dropNode(node)
assert dropped, node
self.remove(node)
continue
logging.debug('updating node %r to %s %s %s %s %s',
node, *log_args)
node.setUUID(uuid)
node.setAddress(addr)
node.setState(state)
node.id_timestamp = id_timestamp
if app.uuid == uuid:
app.id_timestamp = id_timestamp
if added_list is not None:
added_list.append(node)
if added_list is not None:
assert app.id_timestamp is not None
# For the first notification, we receive a full list of nodes from
# the master. Remove all unknown nodes from a previous connection.
for node in self._node_set.difference(added_list):
if not node.isStorage() or app.pt.dropNode(node):
self.remove(node)
self.log()
self.executeQueuedEvents()
def log(self):
logging.info('Node manager : %u nodes', len(self._node_set))
if self._node_set:
logging.info('\n'.join(formatNodeList(
map(Node.asTuple, self._node_set), ' * ')))
self.logQueuedEvents()
@apply
def NODE_TYPE_MAPPING():
def setmethod(cls, attr, value):
assert not hasattr(cls, attr), (cls, attr)
setattr(cls, attr, value)
def setfullmethod(cls, attr, value):
value.__name__ = attr
setmethod(cls, attr, value)
def camel_case(enum):
return str(enum).replace('_', ' ').title().replace(' ', '')
def setStateAccessors(state):
name = camel_case(state)
setfullmethod(Node, 'set' + name, lambda self: self.setState(state))
setfullmethod(Node, 'is' + name, lambda self: self._state == state)
map(setStateAccessors, NodeStates)
node_type_dict = {}
getType = lambda node_type: staticmethod(lambda: node_type)
true = staticmethod(lambda: True)
createNode = lambda cls: lambda self, **kw: self._createNode(cls, **kw)
getList = lambda node_type: lambda self, only_identified=False: \
self._getTypeList(node_type, only_identified)
bases = Node,
for node_type in NodeTypes:
name = camel_case(node_type)
is_name = 'is' + name
setmethod(Node, is_name, bool)
node_type_dict[node_type] = cls = type(name + 'Node', bases, {
'getType': getType(node_type),
is_name: true,
})
setfullmethod(NodeManager, 'create' + name, createNode(cls))
setfullmethod(NodeManager, 'get%sList' % name, getList(node_type))
return node_type_dict
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/patch.py 0000664 0000000 0000000 00000005517 13465024610 0024253 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2015-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
def speedupFileStorageTxnLookup():
"""Speed up lookup of start position when instantiating an iterator
FileStorage does not index the file positions of transactions.
With this patch, we use the existing {oid->file_pos} index to bisect the
the closest file position to start iterating.
"""
from array import array
from bisect import bisect
from collections import defaultdict
from neo.lib import logging
from ZODB.FileStorage.FileStorage import FileStorage, FileIterator
typecode = 'L' if array('I').itemsize < 4 else 'I'
class Start(object):
def __init__(self, read_data_header, h, tid):
self.read_data_header = read_data_header
self.h = h << 32
self.tid = tid
def __lt__(self, l):
return self.tid < self.read_data_header(self.h | l).tid
def iterator(self, start=None, stop=None):
if start:
try:
index = self._tidindex
except AttributeError:
logging.info("Building index for faster lookup of"
" transactions in the FileStorage DB.")
# Cache a sorted list of all the file pos from oid index.
# To reduce memory usage, the list is splitted in arrays of
# low order 32-bit words.
tindex = defaultdict(lambda: array(typecode))
for x in self._index.itervalues():
tindex[x >> 32].append(x & 0xffffffff)
index = self._tidindex = []
for h, l in sorted(tindex.iteritems()):
l = array(typecode, sorted(l))
x = self._read_data_header(h << 32 | l[0])
index.append((x.tid, h, l))
logging.info("... index built")
x = bisect(index, (start,)) - 1
if x >= 0:
x, h, index = index[x]
x = self._read_data_header
h = x(h << 32 | index[bisect(index, Start(x, h, start)) - 1])
return FileIterator(self._file_name, start, stop, h.tloc)
return FileIterator(self._file_name, start, stop)
FileStorage.iterator = iterator
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/protocol.py 0000664 0000000 0000000 00000064366 13465024610 0025024 0 ustar 00root root 0000000 0000000
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import threading
from functools import partial
from msgpack import packb
# The protocol version must be increased whenever upgrading a node may require
# to upgrade other nodes.
PROTOCOL_VERSION = 0
# By encoding the handshake packet with msgpack, the whole NEO stream can be
# decoded with msgpack. The first byte is 0x92, which is different from TLS
# Handshake (0x16).
HANDSHAKE_PACKET = packb(('NEO', PROTOCOL_VERSION))
# Used to distinguish non-NEO stream from version mismatch.
MAGIC_SIZE = len(HANDSHAKE_PACKET) - len(packb(PROTOCOL_VERSION))
RESPONSE_MASK = 0x8000
# Avoid some memory errors on corrupted data.
# Before we use msgpack, we limited the size of a whole packet. That's not
# possible anymore because the size is not known in advance. Packets bigger
# than the buffer size are possible (e.g. a huge list of small items) and for
# that we could compare the stream position (Unpacker.tell); it's not worth it.
UNPACK_BUFFER_SIZE = 0x4000000
@apply
def Unpacker():
global registerExtType, packb
from msgpack import ExtType, unpackb, Packer, Unpacker
ext_type_dict = []
kw = dict(use_bin_type=True)
pack_ext = Packer(**kw).pack
def registerExtType(getstate, make):
code = len(ext_type_dict)
ext_type_dict.append(lambda data: make(unpackb(data, use_list=False)))
return lambda obj: ExtType(code, pack_ext(getstate(obj)))
iterable_types = set, tuple
def default(obj):
try:
pack = obj._pack
except AttributeError:
assert type(obj) in iterable_types, type(obj)
return list(obj)
return pack()
lock = threading.Lock()
pack = Packer(default, strict_types=True, **kw).pack
def packb(obj):
with lock: # in case that 'default' is called
return pack(obj)
return partial(Unpacker, use_list=False, max_buffer_size=UNPACK_BUFFER_SIZE,
ext_hook=lambda code, data: ext_type_dict[code](data))
class Enum(tuple):
class Item(int):
__slots__ = '_name', '_enum', '_pack'
def __str__(self):
return self._name
def __repr__(self):
return "" % (self._name, self)
def __eq__(self, other):
if type(other) is self.__class__:
assert other._enum is self._enum
return other is self
return other == int(self)
def __new__(cls, func):
names = func.func_code.co_names
self = tuple.__new__(cls, map(cls.Item, xrange(len(names))))
self._name = func.__name__
pack = registerExtType(int, self.__getitem__)
for item, name in zip(self, names):
setattr(self, name, item)
item._name = name
item._enum = self
item._pack = (lambda x: lambda: x)(pack(item))
return self
def __repr__(self):
return "" % self._name
# The order of extension type is important.
# Enum types first, sorted alphabetically.
@Enum
def CellStates():
# Write-only cell. Last transactions are missing because storage is/was down
# for a while, or because it is new for the partition. It usually becomes
# UP_TO_DATE when replication is done.
OUT_OF_DATE
# Normal state: cell is writable/readable, and it isn't planned to drop it.
UP_TO_DATE
# Same as UP_TO_DATE, except that it will be discarded as soon as another
# node finishes to replicate it. It means a partition is moved from 1 node
# to another. It is also discarded immediately if out-of-date.
FEEDING
# A check revealed that data differs from other replicas. Cell is neither
# readable nor writable.
CORRUPTED
# Not really a state: only used in network packets to tell storages to drop
# partitions.
DISCARDED
@Enum
def ClusterStates():
# The cluster is initially in the RECOVERING state, and it goes back to
# this state whenever the partition table becomes non-operational again.
# An election of the primary master always happens, in case of a network
# cut between a primary master and all other nodes. The primary master:
# - first recovers its own data by reading it from storage nodes;
# - waits for the partition table be operational;
# - automatically switch to VERIFYING if the cluster can be safely started.
RECOVERING
# Transient state, used to:
# - replay the transaction log, in case of unclean shutdown;
# - and actually truncate the DB if the user asked to do so.
# Then, the cluster either goes to RUNNING or STARTING_BACKUP state.
VERIFYING
# Normal operation. The DB is read-writable by clients.
RUNNING
# Transient state to shutdown the whole cluster.
STOPPING
# Transient state, during which the master (re)connect to the upstream
# master.
STARTING_BACKUP
# Backup operation. The master is notified of new transactions thanks to
# invalidations and orders storage nodes to fetch them from upstream.
# Because cells are synchronized independently, the DB is often
# inconsistent.
BACKINGUP
# Transient state, when the user decides to go back to RUNNING state.
# The master stays in this state until the DB is consistent again.
# In case of failure, the cluster will go back to backup mode.
STOPPING_BACKUP
@Enum
def ErrorCodes():
ACK
DENIED
NOT_READY
OID_NOT_FOUND
TID_NOT_FOUND
OID_DOES_NOT_EXIST
PROTOCOL_ERROR
REPLICATION_ERROR
CHECKING_ERROR
BACKEND_NOT_IMPLEMENTED
NON_READABLE_CELL
READ_ONLY_ACCESS
INCOMPLETE_TRANSACTION
@Enum
def NodeStates():
UNKNOWN
DOWN
RUNNING
PENDING
@Enum
def NodeTypes():
MASTER
STORAGE
CLIENT
ADMIN
# used for logging
node_state_prefix_dict = {
NodeStates.RUNNING: 'R',
NodeStates.DOWN: 'D',
NodeStates.UNKNOWN: 'U',
NodeStates.PENDING: 'P',
}
# used for logging
cell_state_prefix_dict = {
CellStates.UP_TO_DATE: 'U',
CellStates.OUT_OF_DATE: 'O',
CellStates.FEEDING: 'F',
CellStates.DISCARDED: 'D',
CellStates.CORRUPTED: 'C',
}
# Other constants.
INVALID_TID = \
INVALID_OID = '\xff' * 8
INVALID_PARTITION = 0xffffffff
ZERO_HASH = '\0' * 20
ZERO_TID = \
ZERO_OID = '\0' * 8
MAX_TID = '\x7f' + '\xff' * 7 # SQLite does not accept numbers above 2^63-1
# High-order byte:
# 7 6 5 4 3 2 1 0
# | | | | +-+-+-+-- reserved (0)
# | +-+-+---------- node type
# +---------------- temporary if negative
# UUID namespaces are required to prevent conflicts when the master generate
# new uuid before it knows uuid of existing storage nodes. So only the high
# order bit is really important and the 31 other bits could be random.
# Extra namespace information and non-randomness of 3 LOB help to read logs.
UUID_NAMESPACES = {
NodeTypes.STORAGE: 0x00,
NodeTypes.MASTER: -0x10,
NodeTypes.CLIENT: -0x20,
NodeTypes.ADMIN: -0x30,
}
uuid_str = (lambda ns: lambda uuid:
ns[uuid >> 24] + str(uuid & 0xffffff) if uuid else str(uuid)
)({v: str(k)[0] for k, v in UUID_NAMESPACES.iteritems()})
class ProtocolError(Exception):
""" Base class for protocol errors, close the connection """
class PacketMalformedError(ProtocolError):
"""Close the connection"""
class UnexpectedPacketError(ProtocolError):
"""Close the connection"""
class NotReadyError(ProtocolError):
""" Just close the connection """
class BackendNotImplemented(Exception):
""" Method not implemented by backend storage """
class NonReadableCell(Exception):
"""Read-access to a cell that is actually non-readable
This happens in case of race condition at processing partition table
updates: client's PT is older or newer than storage's. The latter case is
possible because the master must validate any end of replication, which
means that the storage node can't anticipate the PT update (concurrently,
there may be a first tweaks that moves the replicated cell to another node,
and a second one that moves it back).
On such event, the client must retry, preferably another cell.
"""
class Packet(object):
"""
Base class for any packet definition.
"""
_ignore_when_closed = False
_request = None
_answer = None
_code = None
_id = None
nodelay = True
poll_thread = False
def __init__(self, *args):
assert self._code is not None, "Packet class not registered"
self._args = args
def setId(self, value):
self._id = value
def getId(self):
assert self._id is not None, "No identifier applied on the packet"
return self._id
def encode(self, packb=packb):
""" Encode a packet as a string to send it over the network """
r = packb((self._id, self._code, self._args))
self.size = len(r)
return r
def __repr__(self):
return '%s[%r]' % (self.__class__.__name__, self._id)
def __eq__(self, other):
""" Compare packets with their code instead of content """
if other is None:
return False
assert isinstance(other, Packet)
return self._code == other._code
def isError(self):
return self._code == RESPONSE_MASK
def isResponse(self):
return self._code & RESPONSE_MASK
def getAnswerClass(self):
return self._answer
def ignoreOnClosedConnection(self):
"""
Tells if this packet must be ignored when its connection is closed
when it is handled.
"""
return self._ignore_when_closed
class PacketRegistryFactory(dict):
def __call__(self, name, base, d):
for k, v in d.items():
if isinstance(v, type) and issubclass(v, Packet):
v.__name__ = k
v.handler_method_name = k[0].lower() + k[1:]
# this builds a "singleton"
return type('PacketRegistry', base, d)(self)
def register(self, doc, ignore_when_closed=None, request=False, error=False,
_base=(Packet,), **kw):
""" Register a packet in the packet registry """
code = len(self)
if doc is None:
self[code] = None
return # None registered only to skip a code number (for compatibility)
if error and not request:
assert not code
code = RESPONSE_MASK
kw.update(__doc__=doc, _code=code)
packet = type('', _base, kw)
# register the request
self[code] = packet
if request:
if ignore_when_closed is None:
# By default, on a closed connection:
# - request: ignore
# - answer: keep
# - notification: keep
packet._ignore_when_closed = True
else:
assert ignore_when_closed is False
if error:
packet._answer = self[RESPONSE_MASK]
else:
# build a class for the answer
code |= RESPONSE_MASK
kw['_code'] = code
answer = packet._answer = self[code] = type('', _base, kw)
return packet, answer
else:
assert ignore_when_closed is None
return packet
class Packets(dict):
"""
Packet registry that checks packet code uniqueness and provides an index
"""
__metaclass__ = PacketRegistryFactory()
notify = __metaclass__.register
request = partial(notify, request=True)
Error = notify("""
Error is a special type of message, because this can be sent against
any other message, even if such a message does not expect a reply
usually.
:nodes: * -> *
""", error=True)
RequestIdentification, AcceptIdentification = request("""
Request a node identification. This must be the first packet for any
connection.
:nodes: * -> *
""", poll_thread=True)
Ping, Pong = request("""
Empty request used as network barrier.
:nodes: * -> *
""")
CloseClient = notify("""
Tell peer that it can close the connection if it has finished with us.
:nodes: * -> *
""")
AskPrimary, AnswerPrimary = request("""
Ask node identier of the current primary master.
:nodes: ctl -> A
""")
NotPrimaryMaster = notify("""
Notify peer that I'm not the primary master. Attach any extra
information to help the peer joining the cluster.
:nodes: SM -> *
""")
NotifyNodeInformation = notify("""
Notify information about one or more nodes.
:nodes: M -> *
""")
AskRecovery, AnswerRecovery = request("""
Ask storage nodes data needed by master to recover.
Reused by `neoctl print ids`.
:nodes: M -> S; ctl -> A -> M
""")
AskLastIDs, AnswerLastIDs = request("""
Ask the last OID/TID so that a master can initialize its
TransactionManager. Reused by `neoctl print ids`.
:nodes: M -> S; ctl -> A -> M
""")
AskPartitionTable, AnswerPartitionTable = request("""
Ask storage node the remaining data needed by master to recover.
:nodes: M -> S
""")
SendPartitionTable = notify("""
Send the full partition table to admin/client/storage nodes on
connection.
:nodes: M -> A, C, S
""")
NotifyPartitionChanges = notify("""
Notify about changes in the partition table.
:nodes: M -> *
""")
StartOperation = notify("""
Tell a storage node to start operation. Before this message,
it must only communicate with the primary master.
:nodes: M -> S
""")
StopOperation = notify("""
Notify that the cluster is not operational anymore.
Any operation between nodes must be aborted.
:nodes: M -> S, C
""")
AskUnfinishedTransactions, AnswerUnfinishedTransactions = request("""
Ask unfinished transactions, which will be replicated
when they're finished.
:nodes: S -> M
""")
AskLockedTransactions, AnswerLockedTransactions = request("""
Ask locked transactions to replay committed transactions
that haven't been unlocked.
:nodes: M -> S
""")
AskFinalTID, AnswerFinalTID = request("""
Return final tid if ttid has been committed, to recover from certain
failures during tpc_finish.
:nodes: M -> S; C -> M, S
""")
ValidateTransaction = notify("""
Do replay a committed transaction that was not unlocked.
:nodes: M -> S
""")
AskBeginTransaction, AnswerBeginTransaction = request("""
Ask to begin a new transaction. This maps to `tpc_begin`.
:nodes: C -> M
""")
FailedVote = request("""
Report storage nodes for which vote failed.
True is returned if it's still possible to finish the transaction.
:nodes: C -> M
""", error=True)
AskFinishTransaction, AnswerTransactionFinished = request("""
Finish a transaction. Return the TID of the committed transaction.
This maps to `tpc_finish`.
:nodes: C -> M
""", ignore_when_closed=False, poll_thread=True)
AskLockInformation, AnswerInformationLocked = request("""
Commit a transaction. The new data is read-locked.
:nodes: M -> S
""", ignore_when_closed=False)
InvalidateObjects = notify("""
Notify about a new transaction modifying objects,
invalidating client caches.
:nodes: M -> C
""")
NotifyUnlockInformation = notify("""
Notify about a successfully committed transaction. The new data can be
unlocked.
:nodes: M -> S
""")
AskNewOIDs, AnswerNewOIDs = request("""
Ask new OIDs to create objects.
:nodes: C -> M
""")
NotifyDeadlock = notify("""
Ask master to generate a new TTID that will be used by the client to
solve a deadlock by rebasing the transaction on top of concurrent
changes.
:nodes: S -> M -> C
""")
AskRebaseTransaction, AnswerRebaseTransaction = request("""
Rebase a transaction to solve a deadlock.
:nodes: C -> S
""")
AskRebaseObject, AnswerRebaseObject = request("""
Rebase an object change to solve a deadlock.
:nodes: C -> S
XXX: It is a request packet to simplify the implementation. For more
efficiency, this should be turned into a notification, and the
RebaseTransaction should answered once all objects are rebased
(so that the client can still wait on something).
""", data_path=(1, 0, 2, 0))
AskStoreObject, AnswerStoreObject = request("""
Ask to create/modify an object. This maps to `store`.
As for IStorage, 'serial' is ZERO_TID for new objects.
:nodes: C -> S
""", data_path=(0, 2))
AbortTransaction = notify("""
Abort a transaction. This maps to `tpc_abort`.
:nodes: C -> S; C -> M -> S
""")
AskStoreTransaction, AnswerStoreTransaction = request("""
Ask to store a transaction. Implies vote.
:nodes: C -> S
""")
AskVoteTransaction, AnswerVoteTransaction = request("""
Ask to vote a transaction.
:nodes: C -> S
""")
AskObject, AnswerObject = request("""
Ask a stored object by its OID, optionally at/before a specific tid.
This maps to `load/loadBefore/loadSerial`.
:nodes: C -> S
""", data_path=(1, 3))
AskTIDs, AnswerTIDs = request("""
Ask for TIDs between a range of offsets. The order of TIDs is
descending, and the range is [first, last). This maps to `undoLog`.
:nodes: C -> S
""")
AskTransactionInformation, AnswerTransactionInformation = request("""
Ask for transaction metadata.
:nodes: C -> S
""")
AskObjectHistory, AnswerObjectHistory = request("""
Ask history information for a given object. The order of serials is
descending, and the range is [first, last]. This maps to `history`.
:nodes: C -> S
""")
AskPartitionList, AnswerPartitionList = request("""
Ask information about partitions.
:nodes: ctl -> A
""")
AskNodeList, AnswerNodeList = request("""
Ask information about nodes.
:nodes: ctl -> A
""")
SetNodeState = request("""
Change the state of a node.
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
AddPendingNodes = request("""
Mark given pending nodes as running, for future inclusion when tweaking
the partition table.
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
TweakPartitionTable, AnswerTweakPartitionTable = request("""
Ask the master to balance the partition table, optionally excluding
specific nodes in anticipation of removing them.
:nodes: ctl -> A -> M
""")
SetNumReplicas = request("""
Set the number of replicas.
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
SetClusterState = request("""
Set the cluster state.
:nodes: ctl -> A -> M
""", error=True, ignore_when_closed=False)
Repair = request("""
Ask storage nodes to repair their databases.
:nodes: ctl -> A -> M
""", error=True)
NotifyRepair = notify("""
Repair is translated to this message, asking a specific storage node to
repair its database.
:nodes: M -> S
""")
NotifyClusterInformation = notify("""
Notify about a cluster state change.
:nodes: M -> *
""")
AskClusterState, AnswerClusterState = request("""
Ask the state of the cluster
:nodes: ctl -> A; A -> M
""")
AskObjectUndoSerial, AnswerObjectUndoSerial = request("""
Ask storage the serial where object data is when undoing given
transaction, for a list of OIDs.
Answer a dict mapping oids to 3-tuples:
current_serial (TID)
The latest serial visible to the undoing transaction.
undo_serial (TID)
Where undone data is (tid at which data is before given undo).
is_current (bool)
If current_serial's data is current on storage.
:nodes: C -> S
""")
AskTIDsFrom, AnswerTIDsFrom = request("""
Ask for length TIDs starting at min_tid. The order of TIDs is ascending.
Used by `iterator`.
:nodes: C -> S
""")
AskPack, AnswerPack = request("""
Request a pack at given TID.
:nodes: C -> M -> S
""", ignore_when_closed=False)
CheckReplicas = request("""
Ask the cluster to search for mismatches between replicas, metadata
only, and optionally within a specific range. Reference nodes can be
specified.
:nodes: ctl -> A -> M
""", error=True)
CheckPartition = notify("""
Ask a storage node to compare a partition with all other nodes.
Like for CheckReplicas, only metadata are checked, optionally within a
specific range. A reference node can be specified.
:nodes: M -> S
""")
AskCheckTIDRange, AnswerCheckTIDRange = request("""
Ask some stats about a range of transactions.
Used to know if there are differences between a replicating node and
reference node.
:nodes: S -> S
""")
AskCheckSerialRange, AnswerCheckSerialRange = request("""
Ask some stats about a range of object history.
Used to know if there are differences between a replicating node and
reference node.
:nodes: S -> S
""")
NotifyPartitionCorrupted = notify("""
Notify that mismatches were found while check replicas for a partition.
:nodes: S -> M
""")
NotifyReady = notify("""
Notify that we're ready to serve requests.
:nodes: S -> M
""")
AskLastTransaction, AnswerLastTransaction = request("""
Ask last committed TID.
:nodes: C -> M; ctl -> A -> M
""", poll_thread=True)
AskCheckCurrentSerial, AnswerCheckCurrentSerial = request("""
Check if given serial is current for the given oid, and lock it so that
this state is not altered until transaction ends.
This maps to `checkCurrentSerialInTransaction`.
:nodes: C -> S
""")
NotifyTransactionFinished = notify("""
Notify that a transaction blocking a replication is now finished.
:nodes: M -> S
""")
Replicate = notify("""
Notify a storage node to replicate partitions up to given 'tid'
and from given sources.
args: tid, upstream_name, {partition: address}
- upstream_name: replicate from an upstream cluster
- address: address of the source storage node, or None if there's
no new data up to 'tid' for the given partition
:nodes: M -> S
""")
NotifyReplicationDone = notify("""
Notify the master node that a partition has been successfully
replicated from a storage to another.
:nodes: S -> M
""")
AskFetchTransactions, AnswerFetchTransactions = request("""
Ask a storage node to send all transaction data we don't have,
and reply with the list of transactions we should not have.
:nodes: S -> S
""")
AskFetchObjects, AnswerFetchObjects = request("""
Ask a storage node to send object records we don't have,
and reply with the list of records we should not have.
:nodes: S -> S
""")
AddTransaction = notify("""
Send metadata of a transaction to a node that does not have them.
:nodes: S -> S
""", nodelay=False)
AddObject = notify("""
Send an object record to a node that does not have it.
:nodes: S -> S
""", nodelay=False, data_path=(0, 2))
Truncate = request("""
Request DB to be truncated. Also used to leave backup mode.
:nodes: ctl -> A -> M; M -> S
""", error=True)
FlushLog = notify("""
Request all nodes to flush their logs.
:nodes: ctl -> A -> M -> *
""")
del notify, request
def Errors():
registry_dict = {}
handler_method_name_dict = {}
Error = Packets.Error
def register_error(code):
return lambda self, message='': Error(code, message)
for error in ErrorCodes:
name = ''.join(part.capitalize() for part in str(error).split('_'))
registry_dict[name] = register_error(int(error))
handler_method_name_dict[int(error)] = name[0].lower() + name[1:]
return type('ErrorRegistry', (dict,),
registry_dict)(handler_method_name_dict)
Errors = Errors()
# Common helpers between the 'neo' module and 'neolog'.
from datetime import datetime
from operator import itemgetter
def formatNodeList(node_list, prefix='', _sort_key=itemgetter(2)):
if node_list:
node_list = [(
uuid_str(uuid), str(node_type),
('[%s]:%s' if ':' in addr[0] else '%s:%s')
% addr if addr else '', str(state),
str(id_timestamp and datetime.utcfromtimestamp(id_timestamp)))
for node_type, addr, uuid, state, id_timestamp
in sorted(node_list, key=_sort_key)]
t = ''.join('%%-%us | ' % max(len(x[i]) for x in node_list)
for i in xrange(len(node_list[0]) - 1))
return map((prefix + t + '%s').__mod__, node_list)
return ()
Packets.NotifyNodeInformation._neolog = staticmethod(
lambda timestamp, node_list:
((timestamp,), formatNodeList(node_list, ' ! ')))
Packets.Error._neolog = staticmethod(lambda *args: ((), ("%s (%s)" % args,)))
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/pt.py 0000664 0000000 0000000 00000026434 13465024610 0023600 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import math
from collections import defaultdict
from functools import partial
from . import logging, protocol
from .locking import Lock
from .protocol import uuid_str, CellStates
from .util import u64
class PartitionTableException(Exception):
"""
Base class for partition table exceptions
"""
class Cell(object):
"""This class represents a cell in a partition table."""
def __init__(self, node, state = CellStates.UP_TO_DATE):
self.node = node
self.state = state
def __repr__(self):
return "" % (
uuid_str(self.getUUID()),
self.getAddress(),
self.getState(),
)
def getState(self):
return self.state
def setState(self, state):
assert state != CellStates.DISCARDED
self.state = state
def isUpToDate(self):
return self.state == CellStates.UP_TO_DATE
def isOutOfDate(self):
return self.state == CellStates.OUT_OF_DATE
def isFeeding(self):
return self.state == CellStates.FEEDING
def isCorrupted(self):
return self.state == CellStates.CORRUPTED
def isReadable(self):
return self.state == CellStates.UP_TO_DATE or \
self.state == CellStates.FEEDING
def getNode(self):
return self.node
def getNodeState(self):
"""This is a short hand."""
return self.node.getState()
def getUUID(self):
return self.node.getUUID()
def getAddress(self):
return self.node.getAddress()
class PartitionTable(object):
"""This class manages a partition table."""
# Flushing logs whenever a cell becomes out-of-date would flood them.
_first_outdated_message = \
'a cell became non-readable whereas all cells were readable'
def __init__(self, num_partitions, num_replicas):
self.np = num_partitions
self.nr = num_replicas
self.clear()
def getID(self):
return self._id
def getPartitions(self):
return self.np
def getReplicas(self):
return self.nr
def clear(self):
"""Forget an existing partition table."""
self._id = None
self.num_filled_rows = 0
# Note: don't use [[]] * self.np construct, as it duplicates
# instance *references*, so the outer list contains really just one
# inner list instance.
self.partition_list = [[] for _ in xrange(self.np)]
self.count_dict = {}
def addNodeList(self, node_list):
"""Add nodes"""
added_list = []
for node in node_list:
if node not in self.count_dict:
self.count_dict[node] = 0
added_list.append(node)
return added_list
def getAssignedPartitionList(self, uuid):
""" Return the partition assigned to the specified UUID """
assigned_partitions = []
for offset in xrange(self.np):
for cell in self.getCellList(offset, readable=True):
if cell.getUUID() == uuid:
assigned_partitions.append(offset)
break
return assigned_partitions
def hasOffset(self, offset):
try:
return len(self.partition_list[offset]) > 0
except IndexError:
return False
def getNodeSet(self, readable=False):
if readable:
return {x.getNode() for row in self.partition_list for x in row
if x.isReadable()}
return {x.getNode() for row in self.partition_list for x in row}
def getConnectedNodeList(self):
return [node for node in self.getNodeSet() if node.isConnected()]
def getCellList(self, offset, readable=False):
if readable:
return filter(Cell.isReadable, self.partition_list[offset])
return list(self.partition_list[offset])
def getPartition(self, oid_or_tid):
return u64(oid_or_tid) % self.getPartitions()
def getOutdatedOffsetListFor(self, uuid):
return [
offset for offset in xrange(self.np)
for c in self.partition_list[offset]
if c.getUUID() == uuid and c.getState() == CellStates.OUT_OF_DATE
]
def isAssigned(self, oid, uuid):
""" Check if the oid is assigned to the given node """
for cell in self.partition_list[u64(oid) % self.np]:
if cell.getUUID() == uuid:
return True
return False
def getCell(self, offset, uuid):
for cell in self.partition_list[offset]:
if cell.getUUID() == uuid:
return cell
def _setCell(self, offset, node, state):
if state == CellStates.DISCARDED:
return self.removeCell(offset, node)
if node.isUnknown():
raise PartitionTableException('Invalid node state')
self.count_dict.setdefault(node, 0)
for cell in self.partition_list[offset]:
if cell.getNode() is node:
if not cell.isFeeding():
self.count_dict[node] -= 1
cell.setState(state)
break
else:
row = self.partition_list[offset]
self.num_filled_rows += not row
row.append(Cell(node, state))
if state != CellStates.FEEDING:
self.count_dict[node] += 1
def removeCell(self, offset, node):
row = self.partition_list[offset]
for cell in row:
if cell.getNode() == node:
row.remove(cell)
self.num_filled_rows -= not row
if not cell.isFeeding():
self.count_dict[node] -= 1
break
def dropNode(self, node):
count = self.count_dict.get(node)
if count == 0:
del self.count_dict[node]
return not count
def _load(self, ptid, num_replicas, row_list, getByUUID):
self.__init__(len(row_list), num_replicas)
self._id = ptid
for offset, row in enumerate(row_list):
for uuid, state in row:
node = getByUUID(uuid)
self._setCell(offset, node, state)
def load(self, ptid, num_replicas, row_list, nm):
"""
Load the partition table with the specified PTID, discard all previous
content.
"""
self._load(ptid, num_replicas, row_list, nm.getByUUID)
logging.debug('partition table loaded (ptid=%s)', ptid)
self.log()
def update(self, ptid, num_replicas, cell_list, nm):
"""
Update the partition with the cell list supplied. If a node
is not known, it is created in the node manager and set as unavailable
"""
assert self._id < ptid, (self._id, ptid)
self._id = ptid
self.nr = num_replicas
readable_list = []
for row in self.partition_list:
if not all(cell.isReadable() for cell in row):
del readable_list[:]
break
readable_list += row
for offset, uuid, state in cell_list:
node = nm.getByUUID(uuid)
assert node is not None, 'No node found for uuid ' + uuid_str(uuid)
self._setCell(offset, node, state)
self.logUpdated()
if not all(cell.isReadable() for cell in readable_list):
logging.warning(self._first_outdated_message)
def filled(self):
return self.num_filled_rows == self.np
def logUpdated(self):
logging.debug('partition table updated (ptid=%s)', self._id)
self.log()
def log(self):
logging.debug(self.format())
def format(self):
return '\n'.join(self._format())
def _format(self):
"""Help debugging partition table management.
Output sample (np=48, nr=0, just after a 3rd node is added):
pt: 10v 20v 30v 40v
pt: S1 R U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.
pt: S2 R .U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.U.FOF.U.FOF
pt: S3 R ..O..O..O..O..O..O..O..O..O..O..O..O..O..O..O..O
The first line helps to locate a nth partition ('v' is an bottom arrow)
and it is omitted when the table has less than 10 partitions.
"""
node_list = sorted(self.count_dict)
if not node_list:
return ()
cell_state_dict = protocol.cell_state_prefix_dict
node_dict = defaultdict(partial(bytearray, '.' * self.np))
for offset, row in enumerate(self.partition_list):
for cell in row:
node_dict[cell.getNode()][offset] = \
cell_state_dict[cell.getState()]
n = len(uuid_str(node_list[-1].getUUID()))
result = [''.join('%9sv' % x if x else 'pt:' + ' ' * (5 + n)
for x in xrange(0, self.np, 10))
] if self.np > 10 else []
result.extend('pt: %-*s %s %s' % (n, uuid_str(node.getUUID()),
protocol.node_state_prefix_dict[node.getState()],
node_dict[node])
for node in node_list)
return result
def _formatRows(self, node_list):
cell_state_dict = protocol.cell_state_prefix_dict
for row in self.partition_list:
if row is None:
yield 'X' * len(node_list)
else:
cell_dict = {x.getNode(): cell_state_dict[x.getState()]
for x in row}
yield ''.join(cell_dict.get(x, '.') for x in node_list)
def operational(self, exclude_list=()):
if not self.filled():
return False
for row in self.partition_list:
for cell in row:
if cell.isReadable():
node = cell.getNode()
if node.isRunning() and node.getUUID() not in exclude_list:
break
else:
return False
return True
def getRow(self, offset):
return [(cell.getUUID(), cell.getState())
for cell in self.partition_list[offset]]
def getRowList(self):
return map(self.getRow, xrange(self.np))
class MTPartitionTable(PartitionTable):
""" Thread-safe aware version of the partition table, override only methods
used in the client """
def __init__(self, *args, **kw):
self._lock = Lock()
PartitionTable.__init__(self, *args, **kw)
def update(self, *args, **kw):
with self._lock:
return PartitionTable.update(self, *args, **kw)
def clear(self, *args, **kw):
with self._lock:
return PartitionTable.clear(self, *args, **kw)
def operational(self, *args, **kw):
with self._lock:
return PartitionTable.operational(self, *args, **kw)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/threaded_app.py 0000664 0000000 0000000 00000011305 13465024610 0025564 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import thread, threading, weakref
from . import debug, logging
from .app import BaseApplication
from .dispatcher import Dispatcher
from .locking import SimpleQueue
class app_set(weakref.WeakSet):
def on_log(self):
for app in self:
app.log()
app_set = app_set()
def registerLiveDebugger():
debug.register(app_set.on_log)
registerLiveDebugger()
class ThreadContainer(threading.local):
def __init__(self):
self.queue = SimpleQueue()
self.answer = None
class ThreadedApplication(BaseApplication):
"""The client node application."""
uuid = None
def __init__(self, master_nodes, name, **kw):
super(ThreadedApplication, self).__init__(**kw)
self.poll_thread = threading.Thread(target=self.run, name=name)
self.poll_thread.daemon = True
# Internal Attributes common to all thread
self.name = name
self.dispatcher = Dispatcher()
self.master_conn = None
# load master node list
for address in master_nodes:
self.nm.createMaster(address=address)
# Internal attribute distinct between thread
self._thread_container = ThreadContainer()
app_set.add(self) # to register self.on_log
def close(self):
# Clear all connection
self.master_conn = None
if self.poll_thread.is_alive():
for conn in self.em.getConnectionList():
conn.close()
# Stop polling thread
logging.debug('Stopping %s', self.poll_thread)
self.em.wakeup(thread.exit)
else:
super(ThreadedApplication, self).close()
def start(self):
self.poll_thread.is_alive() or self.poll_thread.start()
def run(self):
logging.debug("Started %s", self.poll_thread)
try:
self._run()
finally:
super(ThreadedApplication, self).close()
logging.debug("Poll thread stopped")
def _run(self):
poll = self.em.poll
while 1:
try:
while 1:
poll(1)
except Exception:
self.log()
logging.error("poll raised, retrying", exc_info=1)
def getHandlerData(self):
return self._thread_container.answer
def setHandlerData(self, data):
self._thread_container.answer = data
def log(self):
self.em.log()
self.nm.log()
pt = self.__dict__.get('pt')
if pt is not None:
pt.log()
def _handlePacket(self, conn, packet, kw={}, handler=None):
"""
conn
The connection which received the packet (forwarded to handler).
packet
The packet to handle.
handler
The handler to use to handle packet.
If not given, it will be guessed from connection's not type.
"""
if handler is None:
# Guess the handler to use based on the type of node on the
# connection
node = self.nm.getByAddress(conn.getAddress())
if node is None:
raise ValueError, 'Expecting an answer from a node ' \
'which type is not known... Is this right ?'
if node.isStorage():
handler = self.storage_handler
elif node.isMaster():
handler = self.primary_handler
else:
raise ValueError, 'Unknown node type: %r' % (node.__class__, )
with conn.lock:
handler.dispatch(conn, packet, kw)
def _ask(self, conn, packet, handler=None, **kw):
self.setHandlerData(None)
queue = self._thread_container.queue
msg_id = conn.ask(packet, queue=queue, **kw)
get = queue.get
_handlePacket = self._handlePacket
while True:
qconn, qpacket, kw = get(True)
if conn is qconn and msg_id == qpacket.getId():
_handlePacket(qconn, qpacket, kw, handler)
break
_handlePacket(qconn, qpacket, kw)
return self.getHandlerData()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/lib/util.py 0000664 0000000 0000000 00000015032 13465024610 0024122 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os, socket
from binascii import a2b_hex, b2a_hex
from datetime import timedelta, datetime
from hashlib import sha1
from Queue import deque
from struct import pack, unpack, Struct
from time import gmtime
# https://stackoverflow.com/a/6163157
def nextafter():
global nextafter
from ctypes import CDLL, util as ctypes_util, c_double
from time import time
_libm = CDLL(ctypes_util.find_library('m'))
nextafter = _libm.nextafter
nextafter.restype = c_double
nextafter.argtypes = c_double, c_double
x = time()
y = nextafter(x, float('inf'))
assert x < y and (x+y)/2 in (x,y), (x, y)
nextafter()
TID_LOW_OVERFLOW = 2**32
TID_LOW_MAX = TID_LOW_OVERFLOW - 1
SECOND_PER_TID_LOW = 60.0 / TID_LOW_OVERFLOW
TID_CHUNK_RULES = (
(-1900, 0),
(-1, 12),
(-1, 31),
(0, 24),
(0, 60),
)
def tidFromTime(tm):
gmt = gmtime(tm)
return packTID(
(gmt.tm_year, gmt.tm_mon, gmt.tm_mday, gmt.tm_hour, gmt.tm_min),
int((gmt.tm_sec + (tm - int(tm))) / SECOND_PER_TID_LOW))
def packTID(higher, lower):
"""
higher: a 5-tuple containing year, month, day, hour and minute
lower: seconds scaled to 60:2**32 into a 64 bits TID
"""
assert len(higher) == len(TID_CHUNK_RULES), higher
packed_higher = 0
for value, (offset, multiplicator) in zip(higher, TID_CHUNK_RULES):
assert isinstance(value, (int, long)), value
value += offset
assert 0 <= value, (value, offset, multiplicator)
assert multiplicator == 0 or value < multiplicator, (value,
offset, multiplicator)
packed_higher *= multiplicator
packed_higher += value
# If the machine is configured in such way that gmtime() returns leap
# seconds (e.g. TZ=right/UTC), then the best we can do is to use
# TID_LOW_MAX, because TID format was not designed to support them.
# For more information about leap seconds on Unix, see:
# https://en.wikipedia.org/wiki/Unix_time
# http://www.madore.org/~david/computers/unix-leap-seconds.html
return pack('!LL', packed_higher, min(lower, TID_LOW_MAX))
def unpackTID(ptid):
"""
Unpack given 64 bits TID in to a 2-tuple containing:
- a 5-tuple containing year, month, day, hour and minute
- seconds scaled to 60:2**32
"""
packed_higher, lower = unpack('!LL', ptid)
higher = []
append = higher.append
for offset, multiplicator in reversed(TID_CHUNK_RULES):
if multiplicator:
packed_higher, value = divmod(packed_higher, multiplicator)
else:
packed_higher, value = 0, packed_higher
append(value - offset)
higher.reverse()
return (tuple(higher), lower)
def timeStringFromTID(ptid):
"""
Return a string in the format "yyyy-mm-dd hh:mm:ss.ssssss" from a TID
"""
higher, lower = unpackTID(ptid)
seconds = lower * SECOND_PER_TID_LOW
return '%04d-%02d-%02d %02d:%02d:%09.6f' % (higher[0], higher[1], higher[2],
higher[3], higher[4], seconds)
def addTID(ptid, offset):
"""
Offset given packed TID.
"""
higher, lower = unpackTID(ptid)
high_offset, lower = divmod(lower + offset, TID_LOW_OVERFLOW)
if high_offset:
d = datetime(*higher) + timedelta(0, 60 * high_offset)
higher = (d.year, d.month, d.day, d.hour, d.minute)
return packTID(higher, lower)
p64, u64 = (lambda unpack: (
unpack.__self__.pack,
lambda s: unpack(s)[0]
))(Struct('!Q').unpack)
def add64(packed, offset):
"""Add a python number to a 64-bits packed value"""
return p64(u64(packed) + offset)
def dump(s):
"""Dump a binary string in hex."""
if s is not None:
if isinstance(s, bytes):
return b2a_hex(s)
return repr(s)
def bin(s):
"""Inverse of dump method."""
if s is not None:
return a2b_hex(s)
def makeChecksum(s):
"""Return a 20-byte checksum against a string."""
return sha1(s).digest()
def parseNodeAddress(address, port_opt=None):
if address[:1] == '[':
(host, port) = address[1:].split(']')
if port[:1] == ':':
port = port[1:]
else:
port = port_opt
elif address.count(':') == 1:
(host, port) = address.split(':')
else:
host = address
port = port_opt
# Resolve (maybe) and cast to canonical form
# XXX: Always pick the first result. This might not be what is desired, and
# if so this function should either take a hint on the desired address type
# or return either raw host & port or getaddrinfo return value.
return socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)[0][4][:2]
def parseMasterList(masters):
assert masters, 'At least one master must be defined'
return map(parseNodeAddress, masters.split())
class cached_property(object):
"""
A property that is only computed once per instance and then replaces itself
with an ordinary attribute. Deleting the attribute resets the property.
"""
def __init__(self, func):
self.__doc__ = func.__doc__
self.func = func
def __get__(self, obj, cls):
if obj is None: return self
value = obj.__dict__[self.func.__name__] = self.func(obj)
return value
# This module is always imported before multiprocessing is used, and the
# main process does not want to change name when task are run in threads.
spt_pid = os.getpid()
def setproctitle(title):
global spt_pid
pid = os.getpid()
if spt_pid == pid:
return
spt_pid = pid
# Try using https://pypi.org/project/setproctitle/
try:
# On Linux, this is done by clobbering argv, and the main process
# usually has a longer command line than the title of subprocesses.
os.environ['SPT_NOENV'] = '1'
from setproctitle import setproctitle
except ImportError:
return
finally:
del os.environ['SPT_NOENV']
setproctitle(title)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/ 0000775 0000000 0000000 00000000000 13465024610 0023317 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0025416 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/app.py 0000664 0000000 0000000 00000061255 13465024610 0024462 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import sys
from collections import defaultdict
from functools import partial
from time import time
from neo.lib import logging, util
from neo.lib.app import BaseApplication, buildOptionParser
from neo.lib.debug import register as registerLiveDebugger
from neo.lib.protocol import UUID_NAMESPACES, ZERO_TID
from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, Packets
from neo.lib.handler import EventHandler
from neo.lib.connection import ListeningConnection, ClientConnection
from neo.lib.exception import PrimaryElected, PrimaryFailure, StoppedOperation
class StateChangedException(Exception): pass
_previous_time = 0
def monotonic_time():
global _previous_time
now = time()
if _previous_time < now:
_previous_time = now
else:
_previous_time = now = _previous_time + 1e-3
return now
from .backup_app import BackupApplication
from .handlers import identification, administration, client, master, storage
from .pt import PartitionTable
from .recovery import RecoveryManager
from .transactions import TransactionManager
from .verification import VerificationManager
@buildOptionParser
class Application(BaseApplication):
"""The master node application."""
packing = None
storage_readiness = 0
# Latest completely committed TID
last_transaction = ZERO_TID
backup_tid = None
backup_app = None
truncate_tid = None
def setUUID(self, uuid):
node = self.nm.getByUUID(uuid)
if node is not self._node:
if node:
node.setUUID(None)
if node.isConnected(True):
node.getConnection().close()
self._node.setUUID(uuid)
logging.node(self.name, uuid)
uuid = property(lambda self: self._node.getUUID(), setUUID)
@property
def election(self):
if self.primary and self.cluster_state == ClusterStates.RECOVERING:
return self.primary
@classmethod
def _buildOptionParser(cls):
parser = cls.option_parser
parser.description = "NEO Master node"
cls.addCommonServerOptions('master', '127.0.0.1:10000', '')
_ = parser.group('master')
_.int('A', 'autostart',
help="minimum number of pending storage nodes to automatically"
" start new cluster (to avoid unwanted recreation of the"
" cluster, this should be the total number of storage nodes)")
_('C', 'upstream-cluster',
help='the name of cluster to backup')
_('M', 'upstream-masters', parse=util.parseMasterList,
help='list of master nodes in the cluster to backup')
_.int('i', 'nid',
help="specify an NID to use for this process (testing purpose)")
_ = parser.group('database creation')
_.int('r', 'replicas', default=0, help="replicas number")
_.int('p', 'partitions', default=100, help="partitions number")
def __init__(self, config):
super(Application, self).__init__(
config.get('ssl'), config.get('dynamic_master_list'))
self.tm = TransactionManager(self.onTransactionCommitted)
self.name = config['cluster']
self.server = config['bind']
self.autostart = config.get('autostart')
self.storage_ready_dict = {}
self.storage_starting_set = set()
for master_address in config['masters']:
self.nm.createMaster(address=master_address)
self._node = self.nm.createMaster(address=self.server,
uuid=config.get('nid'))
logging.node(self.name, self.uuid)
logging.debug('IP address is %s, port is %d', *self.server)
# Partition table
replicas = config['replicas']
partitions = config['partitions']
if replicas < 0:
sys.exit('replicas must be a positive integer')
if partitions <= 0:
sys.exit('partitions must be more than zero')
logging.info('Configuration:')
logging.info('Partitions: %d', partitions)
logging.info('Replicas : %d', replicas)
logging.info('Name : %s', self.name)
self.newPartitionTable = partial(PartitionTable, partitions, replicas)
self.listening_conn = None
self.cluster_state = None
self._current_manager = None
# backup
upstream_cluster = config.get('upstream_cluster')
if upstream_cluster:
if upstream_cluster == self.name:
raise ValueError("upstream cluster name must be"
" different from cluster name")
self.backup_app = BackupApplication(self, upstream_cluster,
config['upstream_masters'])
self.administration_handler = administration.AdministrationHandler(
self)
self.election_handler = master.ElectionHandler(self)
self.secondary_handler = master.SecondaryHandler(self)
self.client_service_handler = client.ClientServiceHandler(self)
self.client_ro_service_handler = client.ClientReadOnlyServiceHandler(self)
self.storage_service_handler = storage.StorageServiceHandler(self)
registerLiveDebugger(on_log=self.log)
def close(self):
self.listening_conn = None
if self.backup_app is not None:
self.backup_app.close()
super(Application, self).close()
def log(self):
self.em.log()
if self.backup_app is not None:
self.backup_app.log()
self.nm.log()
self.tm.log()
if self.pt is not None:
self.pt.log()
def run(self):
try:
self._run()
except Exception:
logging.exception('Pre-mortem data:')
self.log()
logging.flush()
raise
def _run(self):
self.listening_conn = ListeningConnection(self, None, self.server)
while True:
self.playPrimaryRole()
self.playSecondaryRole()
def getNodeInformationDict(self, node_list):
node_dict = defaultdict(list)
# group modified nodes by destination node type
for node in node_list:
node_info = node.asTuple()
if node.isAdmin():
continue
node_dict[NodeTypes.ADMIN].append(node_info)
node_dict[NodeTypes.STORAGE].append(node_info)
if node.isClient():
continue
node_dict[NodeTypes.CLIENT].append(node_info)
if node.isStorage():
continue
node_dict[NodeTypes.MASTER].append(node_info)
return node_dict
def broadcastNodesInformation(self, node_list):
"""
Broadcast changes for a set a nodes
Send only one packet per connection to reduce bandwidth
"""
node_dict = self.getNodeInformationDict(node_list)
now = monotonic_time()
# send at most one non-empty notification packet per node
for node in self.nm.getIdentifiedList():
node_list = node_dict.get(node.getType())
# We don't skip pending storage nodes because we don't send them
# the full list of nodes when they're added, and it's also quite
# useful to notify them about new masters.
if node_list:
node.send(Packets.NotifyNodeInformation(now, node_list))
def broadcastPartitionChanges(self, cell_list, num_replicas=None):
"""Broadcast a Notify Partition Changes packet."""
pt = self.pt
if num_replicas is not None:
pt.setReplicas(num_replicas)
elif cell_list:
num_replicas = pt.getReplicas()
else:
return
packet = Packets.NotifyPartitionChanges(
pt.setNextID(), num_replicas, cell_list)
pt.logUpdated()
for node in self.nm.getIdentifiedList():
# As for broadcastNodesInformation, we don't send the full PT
# when pending storage nodes are added, so keep them notified.
if not node.isMaster():
node.send(packet)
def provideService(self):
"""
This is the normal mode for a primary master node. Handle transactions
and stop the service only if a catastrophe happens or the user commits
a shutdown.
"""
logging.info('provide service')
poll = self.em.poll
self.changeClusterState(ClusterStates.RUNNING)
# Now everything is passive.
try:
while True:
poll(1)
except StateChangedException, e:
if e.args[0] != ClusterStates.STARTING_BACKUP:
raise
self.backup_tid = tid = self.getLastTransaction()
packet = Packets.StartOperation(True)
tid_dict = {}
for node in self.nm.getStorageList(only_identified=True):
tid_dict[node.getUUID()] = tid
if node.isRunning():
node.send(packet)
self.pt.setBackupTidDict(tid_dict)
def playPrimaryRole(self):
logging.info('play the primary role with %r', self.listening_conn)
self.primary_master = None
for conn in self.em.getConnectionList():
if conn.isListening():
conn.setHandler(identification.IdentificationHandler(self))
else:
conn.close()
# If I know any storage node, make sure that they are not in the
# running state, because they are not connected at this stage.
for node in self.nm.getStorageList():
assert node.isDown(), node
if self.uuid is None:
self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER)
self._node.setRunning()
self._node.id_timestamp = None
self.primary = monotonic_time()
# Do not restart automatically if an election happens, in order
# to avoid a split of the database. For example, with 2 machines with
# a master and a storage on each one and replicas=1, the secondary
# master becomes primary in case of network failure between the 2
# machines but must not start automatically: otherwise, each storage
# node would diverge.
self._startup_allowed = False
try:
while True:
self.runManager(RecoveryManager)
try:
self.runManager(VerificationManager)
if not self.backup_tid:
self.provideService()
# self.provideService only returns without raising
# when switching to backup mode.
if self.backup_app is None:
raise RuntimeError("No upstream cluster to backup"
" defined in configuration")
truncate = Packets.Truncate(
self.backup_app.provideService())
except StoppedOperation, e:
logging.critical('No longer operational')
truncate = Packets.Truncate(*e.args) if e.args else None
# Automatic restart except if we truncate or retry to.
self._startup_allowed = not (self.truncate_tid or truncate)
self.storage_readiness = 0
self.storage_ready_dict.clear()
self.storage_starting_set.clear()
node_list = []
for node in self.nm.getIdentifiedList():
if node.isStorage() or node.isClient():
conn = node.getConnection()
conn.send(Packets.StopOperation())
if node.isClient():
conn.abort()
continue
if truncate:
conn.send(truncate)
if node.isRunning():
node.setPending()
node_list.append(node)
self.broadcastNodesInformation(node_list)
except StateChangedException, e:
assert e.args[0] == ClusterStates.STOPPING
self.shutdown()
except PrimaryElected, e:
self.primary_master, = e.args
def playSecondaryRole(self):
"""
A master play the secondary role when it is unlikely to win the
election (it lost against against another master during identification
or it was notified that another is the primary master).
Its only task is to try again to become the primary master when the
later fail. When connected to the cluster, the only communication is
with the primary master, to stay informed about removed/added master
nodes, and exit if requested.
"""
logging.info('play the secondary role with %r', self.listening_conn)
self.primary = None
handler = master.PrimaryHandler(self)
# The connection to the probably-primary master can be in any state
# depending on how we were informed. The only case in which it can not
# be reused in when we have pending requests.
if self.primary_master.isConnected(True):
master_conn = self.primary_master.getConnection()
# When we find the primary during identification, we don't attach
# the connection (a server one) to any node, and it will be closed
# in the below 'for' loop.
assert master_conn.isClient(), master_conn
try:
# We want the handler to be effective immediately.
# If it's not possible, let's just reconnect.
if not master_conn.setHandler(handler):
master_conn.close()
assert False
except PrimaryFailure:
master_conn = None
else:
master_conn = None
for conn in self.em.getConnectionList():
if conn.isListening():
conn.setHandler(
identification.SecondaryIdentificationHandler(self))
elif conn is not master_conn:
conn.close()
failed = {self.server}
poll = self.em.poll
while True:
try:
if master_conn is None:
for node in self.nm.getMasterList():
node.setDown()
node = self.primary_master
failed.add(node.getAddress())
if not node.isConnected(True):
# On immediate connection failure,
# PrimaryFailure is raised.
ClientConnection(self, handler, node)
else:
master_conn = None
while True:
poll(1)
except PrimaryFailure:
if self.primary_master.isRunning():
# XXX: What's the best to do here ? Another option is to
# choose the RUNNING master node with the lowest
# election key (i.e. (id_timestamp, address) as
# defined in IdentificationHandler), and return if we
# have the lowest one.
failed = {self.server}
else:
# Since the last primary failure (or since we play the
# secondary role), do not try any node more than once.
for self.primary_master in self.nm.getMasterList():
if self.primary_master.getAddress() not in failed:
break
else:
# All known master nodes are either down or secondary.
# Let's play the primary role again.
break
except PrimaryElected, e:
node = self.primary_master
self.primary_master, = e.args
assert node is not self.primary_master, node
try:
node.getConnection().close()
except PrimaryFailure:
pass
def runManager(self, manager_klass):
self._current_manager = manager_klass(self)
try:
self._current_manager.run()
finally:
self._current_manager = None
def changeClusterState(self, state):
"""
Change the cluster state and apply right handler on each connections
"""
if self.cluster_state == state:
return
# select the storage handler
if state in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP, ClusterStates.STOPPING_BACKUP):
storage_handler = self.storage_service_handler
elif self._current_manager is not None:
storage_handler = self._current_manager.getHandler()
elif state == ClusterStates.STOPPING:
storage_handler = None
else:
raise RuntimeError('Unexpected cluster state')
# change handlers
notification_packet = Packets.NotifyClusterInformation(state)
for node in self.nm.getList():
if not node.isConnected(True):
continue
conn = node.getConnection()
if node.isIdentified():
conn.send(notification_packet)
elif conn.isServer():
continue
if node.isMaster():
if state == ClusterStates.RECOVERING:
handler = self.election_handler
else:
handler = self.secondary_handler
elif node.isStorage() and storage_handler:
handler = storage_handler
else:
# There's a single handler type for admins.
# Client can't change handler without being first disconnected.
assert state in (
ClusterStates.STOPPING,
ClusterStates.STOPPING_BACKUP,
) or not node.isClient(), (state, node)
continue # keep handler
if type(handler) is not type(conn.getLastHandler()):
conn.setHandler(handler)
handler.handlerSwitched(conn, new=False)
self.cluster_state = state
def getNewUUID(self, uuid, address, node_type):
getByUUID = self.nm.getByUUID
if None != uuid != self.uuid:
node = getByUUID(uuid)
if node is None or node.getAddress() == address:
return uuid
hob = UUID_NAMESPACES[node_type]
for uuid in xrange((hob << 24) + 1, hob + 0x10 << 24):
node = getByUUID(uuid)
if node is None or None is not address == node.getAddress():
assert uuid != self.uuid
return uuid
raise RuntimeError
def getClusterState(self):
return self.cluster_state
def shutdown(self):
"""Close all connections and exit"""
self.changeClusterState(ClusterStates.STOPPING)
# Marking a fictional storage node as starting operation blocks any
# request to start a new transaction. Do this way has 2 advantages:
# - It's simpler than changing the handler of all clients,
# which is anyway not supported by EventQueue.
# - Returning an error code would cause activity on client side for
# nothing.
# What's important is to not abort during the second phase of commits
# and for this, clients must even be able to reconnect, in case of
# failure during tpc_finish.
# We're rarely involved in vote, so we have to trust clients that they
# abort any transaction that is still in the first phase.
self.storage_starting_set.add(None)
try:
# wait for all transaction to be finished
while self.tm.hasPending():
self.em.poll(1)
except StoppedOperation:
logging.critical('No longer operational')
logging.info("asking remaining nodes to shutdown")
self.listening_conn.close()
handler = EventHandler(self)
for node in self.nm.getList():
if not node.isConnected(True):
continue
conn = node.getConnection()
conn.setHandler(handler)
if not conn.connecting:
if node.isStorage():
conn.send(Packets.NotifyNodeInformation(monotonic_time(), ((
node.getType(), node.getAddress(), node.getUUID(),
NodeStates.DOWN, None),)))
if conn.pending():
conn.abort()
continue
conn.close()
while self.em.connection_dict:
self.em.poll(1)
# then shutdown
sys.exit()
def identifyStorageNode(self, known):
if known:
state = NodeStates.RUNNING
else:
# same as for verification
state = NodeStates.PENDING
return state, self.storage_service_handler
def onTransactionCommitted(self, txn):
# I have received all the lock answers now:
# - send a Notify Transaction Finished to the initiated client node
# - Invalidate Objects to the other client nodes
ttid = txn.getTTID()
tid = txn.getTID()
transaction_node = txn.getNode()
invalidate_objects = Packets.InvalidateObjects(tid, txn.getOIDList())
for client_node in self.nm.getClientList(only_identified=True):
if client_node is transaction_node:
client_node.send(Packets.AnswerTransactionFinished(ttid, tid),
msg_id=txn.getMessageId())
else:
client_node.send(invalidate_objects)
# Unlock Information to relevant storage nodes.
notify_unlock = Packets.NotifyUnlockInformation(ttid)
getByUUID = self.nm.getByUUID
for storage_uuid in txn.getUUIDList():
getByUUID(storage_uuid).send(notify_unlock)
# Notify storage that have replications blocked by this transaction,
# and clients that try to recover from a failure during tpc_finish.
notify_finished = Packets.NotifyTransactionFinished(ttid, tid)
for uuid in txn.getNotificationUUIDList():
node = getByUUID(uuid)
if node.isClient():
# There should be only 1 client interested.
node.answer(Packets.AnswerFinalTID(tid))
else:
node.send(notify_finished)
assert self.last_transaction < tid, (self.last_transaction, tid)
self.setLastTransaction(tid)
def getLastTransaction(self):
return self.last_transaction
def setLastTransaction(self, tid):
self.last_transaction = tid
def setStorageNotReady(self, uuid):
self.storage_starting_set.discard(uuid)
self.storage_ready_dict.pop(uuid, None)
self.tm.executeQueuedEvents()
def startStorage(self, node):
# XXX: Is this boolean 'backup' field needed ?
# Maybe this can be deduced from cluster state.
node.send(Packets.StartOperation(bool(self.backup_tid)))
uuid = node.getUUID()
assert uuid not in self.storage_starting_set
if uuid not in self.storage_ready_dict:
self.storage_starting_set.add(uuid)
def setStorageReady(self, uuid):
self.storage_starting_set.remove(uuid)
assert uuid not in self.storage_ready_dict, self.storage_ready_dict
self.storage_readiness = self.storage_ready_dict[uuid] = \
self.storage_readiness + 1
self.tm.executeQueuedEvents()
def isStorageReady(self, uuid):
return uuid in self.storage_ready_dict
def getStorageReadySet(self, readiness=float('inf')):
return {k for k, v in self.storage_ready_dict.iteritems()
if v <= readiness}
def notifyTransactionAborted(self, ttid, uuids):
uuid_set = self.getStorageReadySet()
uuid_set.intersection_update(uuids)
if uuid_set:
p = Packets.AbortTransaction(ttid, ())
getByUUID = self.nm.getByUUID
for uuid in uuid_set:
getByUUID(uuid).send(p)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/backup_app.py 0000664 0000000 0000000 00000041251 13465024610 0026001 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2012-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import random, weakref
from bisect import bisect
from collections import defaultdict
from neo.lib import logging
from neo.lib.bootstrap import BootstrapManager
from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler
from neo.lib.node import NodeManager
from neo.lib.protocol import ClusterStates, \
NodeTypes, Packets, uuid_str, ZERO_TID
from neo.lib.util import add64, dump
from .app import StateChangedException
from .pt import PartitionTable
from .handlers.backup import BackupHandler
"""
Backup algorithm
This implementation relies on normal storage replication.
Storage nodes that are specialised for backup are not in the same NEO cluster,
but are managed by another master in a different cluster.
When the cluster is in BACKINGUP state, its master acts like a client to the
master of the main cluster. It gets notified of new data thanks to invalidation,
and notifies in turn its storage nodes what/when to replicate.
Storages stay in UP_TO_DATE state, even if partitions are synchronized up to
different tids. Storage nodes remember they are in such state and when
switching into RUNNING state, the cluster cuts the DB at the "backup TID", which
is the last TID for which we have all data. This TID can't be guessed from
'trans' and 'obj' tables, like it is done in normal mode, so:
- The master must even notify storages of transactions that don't modify their
partitions: see Replicate packets without any source.
- 'backup_tid' properties exist in many places, on the master and the storages,
so that the DB can be made consistent again at any moment, without losing
any (or little) data.
Out of backup storage nodes assigned to a partition, one is chosen as primary
for that partition. It means only this node will fetch data from the upstream
cluster, to minimize bandwidth between clusters. Other replicas will
synchronize from the primary node.
There is no conflict of node id between the 2 clusters:
- Storage nodes connect anonymously to upstream.
- The master node gets an id from the upstream master and uses it only when
communicating with it.
"""
class BackupApplication(object):
pt = None
server = None # like in BaseApplication
uuid = None
def __init__(self, app, name, master_addresses):
self.app = weakref.proxy(app)
self.name = name
self.nm = NodeManager()
for master_address in master_addresses:
self.nm.createMaster(address=master_address)
em = property(lambda self: self.app.em)
ssl = property(lambda self: self.app.ssl)
def close(self):
self.nm.close()
del self.__dict__
def setUUID(self, uuid):
if self.uuid != uuid:
self.uuid = uuid
logging.info('Upstream Node ID: %s', uuid_str(uuid))
def log(self):
self.nm.log()
if self.pt is not None:
self.pt.log()
def provideService(self):
logging.info('provide backup')
poll = self.em.poll
app = self.app
pt = app.pt
while True:
app.changeClusterState(ClusterStates.STARTING_BACKUP)
bootstrap = BootstrapManager(self, NodeTypes.CLIENT)
# {offset -> node}
self.primary_partition_dict = {}
# [[tid]]
self.tid_list = tuple([] for _ in xrange(pt.getPartitions()))
try:
while True:
for node in pt.getNodeSet(readable=True):
if not app.isStorageReady(node.getUUID()):
break
else:
break
poll(1)
node, conn = bootstrap.getPrimaryConnection()
try:
app.changeClusterState(ClusterStates.BACKINGUP)
del bootstrap, node
self.ignore_invalidations = True
conn.setHandler(BackupHandler(self))
conn.ask(Packets.AskLastTransaction())
# debug variable to log how big 'tid_list' can be.
self.debug_tid_count = 0
while True:
poll(1)
except PrimaryFailure, msg:
logging.error('upstream master is down: %s', msg)
finally:
app.backup_tid = pt.getBackupTid()
try:
conn.close()
except PrimaryFailure:
pass
try:
del self.pt
except AttributeError:
pass
except StateChangedException, e:
if e.args[0] != ClusterStates.STOPPING_BACKUP:
raise
app.changeClusterState(*e.args)
tid = app.backup_tid
# Wait for non-primary partitions to catch up,
# so that all UP_TO_DATE cells are really UP_TO_DATE.
# XXX: Another possibility could be to outdate such cells, and
# they would be quickly updated at the beginning of the
# RUNNING phase. This may simplify code.
# Any unfinished replication from upstream will be truncated.
while pt.getBackupTid(min) < tid:
poll(1)
last_tid = app.getLastTransaction()
handler = EventHandler(app)
if tid < last_tid:
assert tid != ZERO_TID
logging.warning("Truncating at %s (last_tid was %s)",
dump(app.backup_tid), dump(last_tid))
else:
# We will do a dummy truncation, just to leave backup mode,
# so it's fine to start automatically if there's any
# missing storage.
# XXX: Consider using another method to leave backup mode,
# at least when there's nothing to truncate. Because
# in case of StoppedOperation during VERIFYING state,
# this flag will be wrongly set to False.
app._startup_allowed = True
# If any error happened before reaching this line, we'd go back
# to backup mode, which is the right mode to recover.
del app.backup_tid
# Now back to RECOVERY...
return tid
finally:
del self.primary_partition_dict, self.tid_list
pt.clearReplicating()
def nodeLost(self, node):
getCellList = self.app.pt.getCellList
trigger_set = set()
for offset, primary_node in self.primary_partition_dict.items():
if primary_node is not node:
continue
cell_list = getCellList(offset, readable=True)
cell = max(cell_list, key=lambda cell: cell.backup_tid)
tid = cell.backup_tid
self.primary_partition_dict[offset] = primary_node = cell.getNode()
p = Packets.Replicate(tid, '', {offset: primary_node.getAddress()})
for cell in cell_list:
cell.replicating = tid
if cell.backup_tid < tid:
logging.debug(
"ask %s to replicate partition %u up to %s from %s",
uuid_str(cell.getUUID()), offset, dump(tid),
uuid_str(primary_node.getUUID()))
cell.getNode().send(p)
trigger_set.add(primary_node)
for node in trigger_set:
self.triggerBackup(node)
def invalidatePartitions(self, tid, prev_tid, partition_set):
app = self.app
app.setLastTransaction(tid)
pt = app.pt
trigger_set = set()
untouched_dict = defaultdict(dict)
for offset in xrange(pt.getPartitions()):
try:
last_max_tid = self.tid_list[offset][-1]
except IndexError:
last_max_tid = prev_tid
if offset in partition_set:
primary_list = []
node_list = []
cell_list = pt.getCellList(offset, readable=True)
for cell in cell_list:
node = cell.getNode()
assert node.isConnected(), node
if cell.backup_tid == prev_tid:
if prev_tid == tid:
# Connecting to upstream: any node is that is
# up-to-date wrt upstream is candidate for being
# primary.
assert self.ignore_invalidations
if app.isStorageReady(node.getUUID()):
primary_list.append(node)
continue
# Let's given 4 TID t0,t1,t2,t3: if a cell is only
# modified by t0 & t3 and has all data for t0, 4 values
# are possible for its 'backup_tid' until it replicates
# up to t3: t0, t1, t2 or t3 - 1
# Choosing the smallest one (t0) is easier to implement
# but when leaving backup mode, we would always lose
# data if the last full transaction does not modify
# all partitions. t1 is wrong for the same reason.
# So we have chosen the highest one (t3 - 1).
# t2 should also work but maybe harder to implement.
cell.backup_tid = add64(tid, -1)
logging.debug(
"partition %u: updating backup_tid of %r to %s",
offset, cell, dump(cell.backup_tid))
else:
assert cell.backup_tid < last_max_tid, (
cell.backup_tid, last_max_tid, prev_tid, tid)
if app.isStorageReady(node.getUUID()):
node_list.append(node)
# Make sure we have a primary storage for this partition.
if offset not in self.primary_partition_dict:
self.primary_partition_dict[offset] = \
random.choice(primary_list or node_list)
if node_list:
self.tid_list[offset].append(tid)
if primary_list:
# Resume replication to secondary cells.
self._triggerSecondary(
self.primary_partition_dict[offset],
offset, tid, cell_list)
else:
trigger_set.update(node_list)
else:
# Partition not touched, so increase 'backup_tid' of all
# "up-to-date" replicas, without having to replicate.
for cell in pt.getCellList(offset, readable=True):
if last_max_tid <= cell.backup_tid:
cell.backup_tid = tid
untouched_dict[cell.getNode()][offset] = None
elif last_max_tid <= cell.replicating:
# Same for 'replicating' to avoid useless orders.
logging.debug("silently update replicating order"
" of %s for partition %u, up to %s",
uuid_str(cell.getUUID()), offset, dump(tid))
cell.replicating = tid
for node, untouched_dict in untouched_dict.iteritems():
if app.isStorageReady(node.getUUID()):
node.send(Packets.Replicate(tid, '', untouched_dict))
for node in trigger_set:
self.triggerBackup(node)
count = sum(map(len, self.tid_list))
if self.debug_tid_count < count:
logging.debug("Maximum number of tracked tids: %u", count)
self.debug_tid_count = count
def triggerBackup(self, node):
tid_list = self.tid_list
tid = self.app.getLastTransaction()
replicate_list = []
for offset, cell in self.app.pt.iterNodeCell(node):
max_tid = tid_list[offset]
if max_tid and self.primary_partition_dict[offset] is node and \
max(cell.backup_tid, cell.replicating) < max_tid[-1]:
cell.replicating = tid
replicate_list.append(offset)
if not replicate_list:
return
getCellList = self.pt.getCellList
source_dict = {}
address_set = set()
for offset in replicate_list:
cell_list = getCellList(offset, readable=True)
random.shuffle(cell_list)
assert cell_list, offset
for cell in cell_list:
addr = cell.getAddress()
if addr in address_set:
break
else:
address_set.add(addr)
source_dict[offset] = addr
logging.debug("ask %s to replicate partition %u up to %s from %r",
uuid_str(node.getUUID()), offset, dump(tid), addr)
node.send(Packets.Replicate(tid, self.name, source_dict))
def notifyReplicationDone(self, node, offset, tid):
app = self.app
cell = app.pt.getCell(offset, node.getUUID())
tid_list = self.tid_list[offset]
if tid_list: # may be empty if the cell is out-of-date
# or if we're not fully initialized
if tid < tid_list[0]:
cell.replicating = tid
else:
try:
tid = add64(tid_list[bisect(tid_list, tid)], -1)
except IndexError:
last_tid = app.getLastTransaction()
if tid < last_tid:
tid = last_tid
node.send(Packets.Replicate(tid, '', {offset: None}))
logging.debug("partition %u: updating backup_tid of %r to %s",
offset, cell, dump(tid))
cell.backup_tid = tid
# TODO: Provide invalidation feedback about new txns to read-only
# clients connected to backup cluster. Not only here but also
# hooked to in-progress feedback from fetchObjects (storage).
# Forget tids we won't need anymore.
cell_list = app.pt.getCellList(offset, readable=True)
del tid_list[:bisect(tid_list, min(x.backup_tid for x in cell_list))]
primary_node = self.primary_partition_dict.get(offset)
primary = primary_node is node
result = None if primary else app.pt.setUpToDate(node, offset)
assert cell.isReadable()
if result: # was out-of-date
if primary_node is not None:
max_tid, = [x.backup_tid for x in cell_list
if x.getNode() is primary_node]
if tid < max_tid:
cell.replicating = max_tid
logging.debug(
"ask %s to replicate partition %u up to %s from %s",
uuid_str(node.getUUID()), offset, dump(max_tid),
uuid_str(primary_node.getUUID()))
node.send(Packets.Replicate(max_tid, '',
{offset: primary_node.getAddress()}))
else:
if app.getClusterState() == ClusterStates.BACKINGUP:
self.triggerBackup(node)
if primary:
self._triggerSecondary(node, offset, tid, cell_list)
return result
def _triggerSecondary(self, node, offset, tid, cell_list):
# Notify secondary storages that they can replicate from
# primary ones, even if they are already replicating.
p = Packets.Replicate(tid, '', {offset: node.getAddress()})
for cell in cell_list:
if max(cell.backup_tid, cell.replicating) < tid:
cell.replicating = tid
logging.debug(
"ask %s to replicate partition %u up to %s from %s",
uuid_str(cell.getUUID()), offset,
dump(tid), uuid_str(node.getUUID()))
cell.getNode().send(p)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/ 0000775 0000000 0000000 00000000000 13465024610 0025117 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/__init__.py 0000664 0000000 0000000 00000007273 13465024610 0027241 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from ..app import monotonic_time
from neo.lib import logging
from neo.lib.exception import StoppedOperation
from neo.lib.handler import EventHandler
from neo.lib.protocol import Packets
class MasterHandler(EventHandler):
"""This class implements a generic part of the event handlers."""
def connectionLost(self, conn, new_state=None):
if self.app.listening_conn: # if running
self._connectionLost(conn)
def askClusterState(self, conn):
state = self.app.getClusterState()
conn.answer(Packets.AnswerClusterState(state))
def askRecovery(self, conn):
app = self.app
conn.answer(Packets.AnswerRecovery(
app.pt.getID(),
app.backup_tid and app.pt.getBackupTid(),
app.truncate_tid))
def askLastIDs(self, conn):
tm = self.app.tm
conn.answer(Packets.AnswerLastIDs(tm.getLastOID(), tm.getLastTID()))
def askLastTransaction(self, conn):
conn.answer(Packets.AnswerLastTransaction(
self.app.getLastTransaction()))
def _notifyNodeInformation(self, conn):
app = self.app
node = app.nm.getByUUID(conn.getUUID())
node_list = app.nm.getList()
node_list.remove(node)
node_list = ([node.asTuple()] # for id_timestamp
+ app.getNodeInformationDict(node_list)[node.getType()])
conn.send(Packets.NotifyNodeInformation(monotonic_time(), node_list))
def handlerSwitched(self, conn, new):
pt = self.app.pt
# Except storages during recovery and secondary masters, all nodes
# receives the full partition table as soon as they're identified.
# It is also sent in 2 other cases:
# - to admins during recovery, whenever a newer PT is loaded;
# - to storage when switching from recovery to verification.
# After that, non-master nodes only receive incremental updates.
conn.send(Packets.SendPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
class BaseServiceHandler(MasterHandler):
"""Common handler class for storage nodes."""
def connectionLost(self, conn, new_state):
app = self.app
node = app.nm.getByUUID(conn.getUUID())
if node is None:
return # for example, when a storage is removed by an admin
assert node.isStorage(), node
logging.info('storage node lost')
if node.isPending():
# was in pending state, so drop it from the node manager to forget
# it and do not set in running state when it comes back
logging.info('drop a pending node from the node manager')
node.setUnknown()
elif node.isDown():
# Already put in DOWN state by AdministrationHandler.setNodeState
return
else:
node.setDown()
app.broadcastNodesInformation([node])
if app.truncate_tid:
raise StoppedOperation
app.broadcastPartitionChanges(app.pt.outdate(node))
if not app.pt.operational():
raise StoppedOperation
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/administration.py 0000664 0000000 0000000 00000026166 13465024610 0030531 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import random
from functools import wraps
from . import MasterHandler
from ..app import monotonic_time, StateChangedException
from neo.lib import logging
from neo.lib.exception import StoppedOperation
from neo.lib.handler import AnswerDenied
from neo.lib.pt import PartitionTableException
from neo.lib.protocol import ClusterStates, Errors, \
NodeStates, NodeTypes, Packets, uuid_str
from neo.lib.util import dump
CLUSTER_STATE_WORKFLOW = {
# destination: sources
ClusterStates.VERIFYING: (ClusterStates.RECOVERING,),
ClusterStates.STARTING_BACKUP: (ClusterStates.RUNNING,
ClusterStates.STOPPING_BACKUP),
ClusterStates.STOPPING_BACKUP: (ClusterStates.BACKINGUP,
ClusterStates.STARTING_BACKUP),
}
NODE_STATE_WORKFLOW = {
NodeTypes.MASTER: (NodeStates.DOWN,),
NodeTypes.STORAGE: (NodeStates.DOWN, NodeStates.UNKNOWN),
}
def check_state(*states):
def decorator(wrapped):
def wrapper(self, *args):
state = self.app.getClusterState()
if state not in states:
raise AnswerDenied('%s RPC can not be used in %s state'
% (wrapped.__name__, state))
wrapped(self, *args)
return wraps(wrapped)(wrapper)
return decorator
class AdministrationHandler(MasterHandler):
"""This class deals with messages from the admin node only"""
def handlerSwitched(self, conn, new):
assert new
super(AdministrationHandler, self).handlerSwitched(conn, new)
def connectionLost(self, conn, new_state):
node = self.app.nm.getByUUID(conn.getUUID())
if node is not None:
self.app.nm.remove(node)
def flushLog(self, conn):
p = Packets.FlushLog()
for node in self.app.nm.getConnectedList():
c = node.getConnection()
c is conn or c.send(p)
super(AdministrationHandler, self).flushLog(conn)
def setClusterState(self, conn, state):
app = self.app
# check request
try:
if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]:
raise AnswerDenied('Can not switch to this state')
except KeyError:
if state != ClusterStates.STOPPING:
raise AnswerDenied('Invalid state requested')
# change state
if state == ClusterStates.VERIFYING:
storage_list = app.nm.getStorageList(only_identified=True)
if not storage_list:
raise AnswerDenied(
'Cannot exit recovery without any storage node')
for node in storage_list:
assert node.isPending(), node
if node.getConnection().isPending():
raise AnswerDenied(
'Cannot exit recovery now: node %r is entering cluster'
% node,)
app._startup_allowed = True
state = app.cluster_state
elif state == ClusterStates.STARTING_BACKUP:
if app.tm.hasPending() or app.nm.getClientList(True):
raise AnswerDenied("Can not switch to %s state with pending"
" transactions or connected clients" % state)
conn.answer(Errors.Ack('Cluster state changed'))
if state != app.cluster_state:
raise StateChangedException(state)
def setNodeState(self, conn, uuid, state):
logging.info("set node state for %s: %s", uuid_str(uuid), state)
app = self.app
node = app.nm.getByUUID(uuid)
if node is None:
raise AnswerDenied('unknown node')
if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
raise AnswerDenied('can not switch node to %s state' % state)
if uuid == app.uuid:
raise AnswerDenied('can not kill primary master node')
state_changed = state != node.getState()
message = ('state changed' if state_changed else
'node already in %s state' % state)
if node.isStorage():
keep = state == NodeStates.DOWN
if node.isRunning() and not keep:
raise AnswerDenied(
"a running node must be stopped before removal")
try:
cell_list = app.pt.dropNodeList([node], keep)
except PartitionTableException, e:
raise AnswerDenied(str(e))
node.setState(state)
if node.isConnected():
# notify itself so it can shutdown
node.send(Packets.NotifyNodeInformation(
monotonic_time(), [node.asTuple()]))
# close to avoid handle the closure as a connection lost
node.getConnection().abort()
if keep:
cell_list = app.pt.outdate()
elif cell_list:
message = 'node permanently removed'
app.broadcastPartitionChanges(cell_list)
else:
node.setState(state)
# /!\ send the node information *after* the partition table change
conn.answer(Errors.Ack(message))
if state_changed:
# notify node explicitly because broadcastNodesInformation()
# ignores non-running nodes
assert not node.isRunning()
if node.isConnected():
node.send(Packets.NotifyNodeInformation(
monotonic_time(), [node.asTuple()]))
app.broadcastNodesInformation([node])
# XXX: Would it be safe to allow more states ?
__change_pt_rpc = check_state(
ClusterStates.RUNNING,
ClusterStates.STARTING_BACKUP,
ClusterStates.BACKINGUP)
@__change_pt_rpc
def addPendingNodes(self, conn, uuid_list):
uuids = ', '.join(map(uuid_str, uuid_list))
logging.debug('Add nodes %s', uuids)
app = self.app
# take all pending nodes
node_list = list(app.pt.addNodeList(node
for node in app.nm.getStorageList()
if node.isPending() and node.getUUID() in uuid_list))
if node_list:
for node in node_list:
node.setRunning()
app.startStorage(node)
app.broadcastNodesInformation(node_list)
conn.answer(Errors.Ack('Nodes added: %s' %
', '.join(uuid_str(x.getUUID()) for x in node_list)))
else:
logging.warning('No node added')
conn.answer(Errors.Ack('No node added'))
def repair(self, conn, uuid_list, *args):
getByUUID = self.app.nm.getByUUID
node_list = []
for uuid in uuid_list:
node = getByUUID(uuid)
if node is None or not (node.isStorage() and node.isIdentified()):
raise AnswerDenied("invalid storage node %s" % uuid_str(uuid))
node_list.append(node)
repair = Packets.NotifyRepair(*args)
for node in node_list:
node.send(repair)
conn.answer(Errors.Ack(''))
@__change_pt_rpc
def setNumReplicas(self, conn, num_replicas):
self.app.broadcastPartitionChanges((), num_replicas)
conn.answer(Errors.Ack(''))
@__change_pt_rpc
def tweakPartitionTable(self, conn, dry_run, uuid_list):
app = self.app
drop_list = []
for node in app.nm.getStorageList():
if node.getUUID() in uuid_list or node.isPending():
drop_list.append(node)
elif not node.isRunning():
drop_list.append(node)
raise AnswerDenied(
'tweak: down nodes must be listed explicitly')
if dry_run:
pt = object.__new__(app.pt.__class__)
new_nodes = pt.load(app.pt.getID(), app.pt.getReplicas(),
app.pt.getRowList(), app.nm)
assert not new_nodes
pt.addNodeList(node
for node, count in app.pt.count_dict.iteritems()
if not count)
else:
pt = app.pt
try:
changed_list = pt.tweak(drop_list)
except PartitionTableException, e:
raise AnswerDenied(str(e))
if not dry_run:
app.broadcastPartitionChanges(changed_list)
conn.answer(Packets.AnswerTweakPartitionTable(
bool(changed_list), pt.getRowList()))
@check_state(ClusterStates.RUNNING)
def truncate(self, conn, tid):
conn.answer(Errors.Ack(''))
raise StoppedOperation(tid)
def checkReplicas(self, conn, partition_dict, min_tid, max_tid):
app = self.app
pt = app.pt
backingup = bool(app.backup_tid)
if not max_tid:
max_tid = pt.getCheckTid(partition_dict) if backingup else \
app.getLastTransaction()
if min_tid > max_tid:
logging.warning("nothing to check: min_tid=%s > max_tid=%s",
dump(min_tid), dump(max_tid))
else:
getByUUID = app.nm.getByUUID
node_set = set()
for offset, source in partition_dict.iteritems():
# XXX: For the moment, code checking replicas is unable to fix
# corrupted partitions (when a good cell is known)
# so only check readable ones.
# (see also Checker._nextPartition of storage)
cell_list = pt.getCellList(offset, True)
#cell_list = [cell for cell in pt.getCellList(offset)
# if not cell.isOutOfDate()]
if len(cell_list) + (backingup and not source) <= 1:
continue
for cell in cell_list:
node = cell.getNode()
if node in node_set:
break
else:
node_set.add(node)
if source:
source = '', getByUUID(source).getAddress()
else:
readable = [cell for cell in cell_list if cell.isReadable()]
if 1 == len(readable) < len(cell_list):
source = '', readable[0].getAddress()
elif backingup:
source = app.backup_app.name, random.choice(
app.backup_app.pt.getCellList(offset, readable=True)
).getAddress()
else:
source = '', None
node.send(Packets.CheckPartition(
offset, source, min_tid, max_tid))
conn.answer(Errors.Ack(''))
del __change_pt_rpc
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/backup.py 0000664 0000000 0000000 00000005753 13465024610 0026750 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2012-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler
from neo.lib.protocol import ZERO_TID
from neo.lib.pt import PartitionTable
class BackupHandler(EventHandler):
"""Handler dedicated to upstream master during BACKINGUP state"""
def connectionLost(self, conn, new_state):
if self.app.app.listening_conn: # if running
raise PrimaryFailure('connection lost')
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
app = self.app
pt = app.pt = object.__new__(PartitionTable)
pt.load(ptid, num_replicas, row_list, self.app.nm)
if pt.getPartitions() != app.app.pt.getPartitions():
raise RuntimeError("inconsistent number of partitions")
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
self.app.pt.update(ptid, num_replicas, cell_list, self.app.nm)
def answerLastTransaction(self, conn, tid):
app = self.app
prev_tid = app.app.getLastTransaction()
if prev_tid <= tid:
# Since we don't know which partitions were modified during our
# absence, we must force replication on all storages. As long as
# they haven't done this first check, our backup tid will remain
# inferior to this 'tid'. We don't know the real prev_tid, which is:
# >= app.app.getLastTransaction()
# < tid
# but passing 'tid' is good enough.
# A special case is when prev_tid == tid: even in this case, we
# must restore the state of the backup app so that any interrupted
# replication (internal or not) is resumed, otherwise the global
# backup_tid could remain stuck to an old tid if upstream is idle.
app.invalidatePartitions(tid, tid, xrange(app.pt.getPartitions()))
else:
raise RuntimeError("upstream DB truncated")
app.ignore_invalidations = False
def invalidateObjects(self, conn, tid, oid_list):
app = self.app
if app.ignore_invalidations:
return
getPartition = app.app.pt.getPartition
partition_set = set(map(getPartition, oid_list))
partition_set.add(getPartition(tid))
prev_tid = app.app.getLastTransaction()
app.invalidatePartitions(tid, prev_tid, partition_set)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/client.py 0000664 0000000 0000000 00000014002 13465024610 0026744 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib.handler import DelayEvent
from neo.lib.protocol import Packets, ProtocolError, MAX_TID, Errors
from ..app import monotonic_time
from . import MasterHandler
class ClientServiceHandler(MasterHandler):
""" Handler dedicated to client during service state """
def handlerSwitched(self, conn, new):
assert new
super(ClientServiceHandler, self).handlerSwitched(conn, new)
def _connectionLost(self, conn):
# cancel its transactions and forgot the node
app = self.app
node = app.nm.getByUUID(conn.getUUID())
assert node is not None, conn
for x in app.tm.clientLost(node):
app.notifyTransactionAborted(*x)
node.setUnknown()
app.broadcastNodesInformation([node])
def askBeginTransaction(self, conn, tid):
"""
A client request a TID, nothing is kept about it until the finish.
"""
app = self.app
# Delay new transaction as long as we are waiting for NotifyReady
# answers, otherwise we can't know if the client is expected to commit
# the transaction in full to all these storage nodes.
if app.storage_starting_set:
raise DelayEvent
node = app.nm.getByUUID(conn.getUUID())
tid = app.tm.begin(node, app.storage_readiness, tid)
conn.answer(Packets.AnswerBeginTransaction(tid))
def askNewOIDs(self, conn, num_oids):
conn.answer(Packets.AnswerNewOIDs(self.app.tm.getNextOIDList(num_oids)))
def getEventQueue(self):
# for askBeginTransaction & failedVote
return self.app.tm
def failedVote(self, conn, *args):
app = self.app
conn.answer((Errors.Ack if app.tm.vote(app, *args) else
Errors.IncompleteTransaction)())
def askFinishTransaction(self, conn, ttid, oid_list, checked_list):
app = self.app
tid, node_list = app.tm.prepare(
app,
ttid,
oid_list,
checked_list,
conn.getPeerId(),
)
if tid:
p = Packets.AskLockInformation(ttid, tid)
for node in node_list:
node.ask(p)
else:
conn.answer(Errors.IncompleteTransaction())
# It's simpler to abort automatically rather than asking the client
# to send a notification on tpc_abort, since it would have keep the
# transaction longer in list of transactions.
# This should happen so rarely that we don't try to minimize the
# number of abort notifications by looking the modified partitions.
self.abortTransaction(conn, ttid, app.getStorageReadySet())
def askFinalTID(self, conn, ttid):
tm = self.app.tm
if tm.getLastTID() < ttid:
# Invalid ttid, or aborted transaction.
tid = None
elif ttid in tm:
# Transaction is being finished.
# We'll answer when it is unlocked.
tm[ttid].registerForNotification(conn.getUUID())
return
else:
# Transaction committed ? Tell client to ask storages.
tid = MAX_TID
conn.answer(Packets.AnswerFinalTID(tid))
def askPack(self, conn, tid):
app = self.app
if app.packing is None:
storage_list = app.nm.getStorageList(only_identified=True)
app.packing = (conn, conn.getPeerId(),
{x.getUUID() for x in storage_list})
p = Packets.AskPack(tid)
for storage in storage_list:
storage.getConnection().ask(p)
else:
conn.answer(Packets.AnswerPack(False))
def abortTransaction(self, conn, tid, uuid_list):
# Consider a failure when the connection between the storage and the
# client breaks while the answer to the first write is sent back.
# In other words, the client can not know the exact set of nodes that
# know this transaction, and it sends us all nodes it considered for
# writing.
# We must also add those that are waiting for this transaction to be
# finished (returned by tm.abort), because they may have join the
# cluster after that the client started to abort.
app = self.app
involved = app.tm.abort(tid, conn.getUUID())
involved.update(uuid_list)
app.notifyTransactionAborted(tid, involved)
# like ClientServiceHandler but read-only & only for tid <= backup_tid
class ClientReadOnlyServiceHandler(ClientServiceHandler):
def _readOnly(self, conn, *args, **kw):
conn.answer(Errors.ReadOnlyAccess(
'read-only access because cluster is in backuping mode'))
askBeginTransaction = _readOnly
askNewOIDs = _readOnly
askFinishTransaction = _readOnly
askFinalTID = _readOnly
askPack = _readOnly
abortTransaction = _readOnly
# XXX LastIDs is not used by client at all, and it requires work to determine
# last_oid up to backup_tid, so just make it non-functional for client.
askLastIDs = _readOnly
# like in MasterHandler but returns backup_tid instead of last_tid
def askLastTransaction(self, conn):
assert self.app.backup_tid is not None # we are in BACKUPING mode
backup_tid = self.app.pt.getBackupTid(min)
conn.answer(Packets.AnswerLastTransaction(backup_tid))
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/identification.py 0000664 0000000 0000000 00000015067 13465024610 0030473 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.exception import PrimaryElected
from neo.lib.handler import EventHandler
from neo.lib.protocol import CellStates, ClusterStates, NodeStates, \
NodeTypes, NotReadyError, Packets, ProtocolError, uuid_str
from ..app import monotonic_time
class IdentificationHandler(EventHandler):
def requestIdentification(self, conn, node_type, uuid,
address, name, id_timestamp, devpath, new_nid):
app = self.app
self.checkClusterName(name)
if address == app.server:
raise ProtocolError('address conflict')
node = app.nm.getByUUID(uuid)
by_addr = address and app.nm.getByAddress(address)
while 1:
if by_addr:
if not by_addr.isIdentified():
if node is by_addr:
break
if not node or uuid < 0:
# In case of address conflict for a peer with temporary
# ids, we'll generate a new id.
node = by_addr
break
elif node:
if node.isIdentified():
if uuid < 0:
# The peer wants a temporary id that's already assigned.
# Let's give it another one.
node = uuid = None
break
else:
if node is app._node:
node = None
else:
node.setAddress(address)
break
# Id conflict for a storage node.
else:
break
# cloned/evil/buggy node connecting to us
raise ProtocolError('already connected')
state = NodeStates.RUNNING
if node_type == NodeTypes.CLIENT:
if app.cluster_state == ClusterStates.RUNNING:
handler = app.client_service_handler
elif app.cluster_state == ClusterStates.BACKINGUP:
handler = app.client_ro_service_handler
else:
raise NotReadyError
human_readable_node_type = ' client '
elif node_type == NodeTypes.STORAGE:
if app.cluster_state == ClusterStates.STOPPING_BACKUP:
raise NotReadyError
manager = app._current_manager
if manager is None:
manager = app
state, handler = manager.identifyStorageNode(
uuid is not None and node is not None)
if not address:
if app.cluster_state == ClusterStates.RECOVERING:
raise NotReadyError
if uuid or not new_nid:
raise ProtocolError
state = NodeStates.DOWN
# We'll let the storage node close the connection. If we
# aborted it at the end of the method, BootstrapManager
# (which is used by storage nodes) could see the closure
# and try to reconnect to a master.
human_readable_node_type = ' storage (%s) ' % (state, )
elif node_type == NodeTypes.MASTER:
if app.election:
if id_timestamp and \
(id_timestamp, address) < (app.election, app.server):
raise PrimaryElected(by_addr or
app.nm.createMaster(address=address))
handler = app.election_handler
else:
handler = app.secondary_handler
human_readable_node_type = ' master '
elif node_type == NodeTypes.ADMIN:
handler = app.administration_handler
human_readable_node_type = 'n admin '
else:
raise ProtocolError
uuid = app.getNewUUID(uuid, address, node_type)
logging.info('Accept a' + human_readable_node_type + uuid_str(uuid))
if node is None:
node = app.nm.createFromNodeType(node_type,
uuid=uuid, address=address)
else:
node.setUUID(uuid)
if devpath:
node.devpath = tuple(devpath)
node.id_timestamp = monotonic_time()
node.setState(state)
app.broadcastNodesInformation([node])
if new_nid:
changed_list = []
for offset in new_nid:
changed_list.append((offset, uuid, CellStates.OUT_OF_DATE))
app.pt._setCell(offset, node, CellStates.OUT_OF_DATE)
app.broadcastPartitionChanges(changed_list)
conn.setHandler(handler)
node.setConnection(conn, not node.isIdentified())
conn.answer(Packets.AcceptIdentification(
NodeTypes.MASTER,
app.uuid,
uuid))
handler._notifyNodeInformation(conn)
handler.handlerSwitched(conn, True)
class SecondaryIdentificationHandler(EventHandler):
def requestIdentification(self, conn, node_type, uuid,
address, name, id_timestamp, devpath, new_nid):
app = self.app
self.checkClusterName(name)
if address == app.server:
raise ProtocolError('address conflict')
primary = app.primary_master.getAddress()
if primary == address:
primary = None
elif not app.primary_master.isIdentified():
if node_type == NodeTypes.MASTER:
node = app.nm.createMaster(address=address)
if id_timestamp:
conn.close()
raise PrimaryElected(node)
primary = None
# For some cases, we rely on the fact that the remote will not retry
# immediately (see SocketConnector.CONNECT_LIMIT).
known_master_list = [node.getAddress()
for node in app.nm.getMasterList()]
conn.send(Packets.NotPrimaryMaster(
primary and known_master_list.index(primary),
known_master_list))
conn.abort()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/master.py 0000664 0000000 0000000 00000007027 13465024610 0026772 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import sys
from . import MasterHandler
from neo.lib.exception import PrimaryElected, PrimaryFailure
from neo.lib.protocol import ClusterStates, NodeStates, NodeTypes, Packets
class SecondaryHandler(MasterHandler):
"""Handler used by primary to handle secondary masters"""
def handlerSwitched(self, conn, new):
pass
def _connectionLost(self, conn):
app = self.app
node = app.nm.getByUUID(conn.getUUID())
node.setDown()
app.broadcastNodesInformation([node])
class ElectionHandler(SecondaryHandler):
"""Handler used by primary to handle secondary masters during election"""
def connectionCompleted(self, conn):
super(ElectionHandler, self).connectionCompleted(conn)
app = self.app
conn.ask(Packets.RequestIdentification(NodeTypes.MASTER,
app.uuid, app.server, app.name, app.election, (), ()))
def connectionFailed(self, conn):
super(ElectionHandler, self).connectionFailed(conn)
self.connectionLost(conn)
def _acceptIdentification(self, node):
raise PrimaryElected(node)
def _connectionLost(self, *args):
if self.app.primary: # not switching to secondary role
self.app._current_manager.try_secondary = True
def notPrimaryMaster(self, *args):
try:
super(ElectionHandler, self).notPrimaryMaster(*args)
except PrimaryElected, e:
# We keep playing the primary role when the peer does not
# know yet that we won election against the returned node.
if not e.args[0].isIdentified():
raise
# There may be new master nodes. Connect to them.
self.app._current_manager.try_secondary = True
class PrimaryHandler(ElectionHandler):
"""Handler used by secondaries to handle primary master"""
def _acceptIdentification(self, node):
assert self.app.primary_master is node, (self.app.primary_master, node)
def _connectionLost(self, conn):
node = self.app.primary_master
# node is None when switching to primary role
if node and not node.isConnected(True):
raise PrimaryFailure('primary master is dead')
def notPrimaryMaster(self, *args):
try:
super(ElectionHandler, self).notPrimaryMaster(*args)
except PrimaryElected, e:
if e.args[0] is not self.app.primary_master:
raise
def notifyClusterInformation(self, conn, state):
if state == ClusterStates.STOPPING:
sys.exit()
def notifyNodeInformation(self, conn, timestamp, node_list):
super(PrimaryHandler, self).notifyNodeInformation(
conn, timestamp, node_list)
for node_type, _, uuid, state, _ in node_list:
assert node_type == NodeTypes.MASTER, node_type
if uuid == self.app.uuid and state == NodeStates.DOWN:
sys.exit()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/handlers/storage.py 0000664 0000000 0000000 00000011331 13465024610 0027134 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.protocol import (CellStates, ClusterStates, Packets, ProtocolError,
uuid_str)
from neo.lib.exception import StoppedOperation
from neo.lib.pt import PartitionTableException
from neo.lib.util import dump
from . import BaseServiceHandler
class StorageServiceHandler(BaseServiceHandler):
""" Handler dedicated to storages during service state """
def handlerSwitched(self, conn, new):
app = self.app
if new:
super(StorageServiceHandler, self).handlerSwitched(conn, new)
node = app.nm.getByUUID(conn.getUUID())
if node.isRunning(): # node may be PENDING
app.startStorage(node)
def notifyReady(self, conn):
self.app.setStorageReady(conn.getUUID())
def connectionLost(self, conn, new_state):
app = self.app
uuid = conn.getUUID()
node = app.nm.getByUUID(uuid)
super(StorageServiceHandler, self).connectionLost(conn, new_state)
app.setStorageNotReady(uuid)
app.tm.storageLost(uuid)
if (app.getClusterState() == ClusterStates.BACKINGUP
# Also check if we're exiting, because backup_app is not usable
# in this case. Maybe cluster state should be set to something
# else, like STOPPING, during cleanup (__del__/close).
and app.listening_conn):
app.backup_app.nodeLost(node)
if app.packing is not None:
self.answerPack(conn, False)
def askUnfinishedTransactions(self, conn, offset_list):
app = self.app
if app.backup_tid:
last_tid = app.pt.getBackupTid(min)
pending_list = ()
else:
# This can't be app.tm.getLastTID() for imported transactions,
# because outdated cells must at least wait that they're locked
# at source side. For normal transactions, it would not matter.
last_tid = app.getLastTransaction()
pending_list = app.tm.registerForNotification(conn.getUUID())
p = Packets.AnswerUnfinishedTransactions(last_tid, pending_list)
conn.answer(p)
app.pt.updatable(conn.getUUID(), offset_list)
def notifyDeadlock(self, conn, *args):
self.app.tm.deadlock(conn.getUUID(), *args)
def answerInformationLocked(self, conn, ttid):
self.app.tm.lock(ttid, conn.getUUID())
def notifyPartitionCorrupted(self, conn, partition, cell_list):
change_list = []
for cell in self.app.pt.getCellList(partition):
if cell.getUUID() in cell_list:
cell.setState(CellStates.CORRUPTED)
change_list.append((partition, cell.getUUID(),
CellStates.CORRUPTED))
self.app.broadcastPartitionChanges(change_list)
if not self.app.pt.operational():
raise StoppedOperation
def notifyReplicationDone(self, conn, offset, tid):
app = self.app
uuid = conn.getUUID()
node = app.nm.getByUUID(uuid)
if app.backup_tid:
cell_list = app.backup_app.notifyReplicationDone(node, offset, tid)
if not cell_list:
return
else:
try:
cell_list = self.app.pt.setUpToDate(node, offset)
except PartitionTableException, e:
raise ProtocolError(str(e))
if not cell_list:
logging.info("ignored late notification that"
" %s has replicated partition %s up to %s",
uuid_str(uuid), offset, dump(tid))
return
logging.debug("%s is up for partition %s (tid=%s)",
uuid_str(uuid), offset, dump(tid))
self.app.broadcastPartitionChanges(cell_list)
def answerPack(self, conn, status):
app = self.app
if app.packing is not None:
client, msg_id, uid_set = app.packing
uid_set.remove(conn.getUUID())
if not uid_set:
app.packing = None
if not client.isClosed():
client.send(Packets.AnswerPack(True), msg_id)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/pt.py 0000664 0000000 0000000 00000055022 13465024610 0024320 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import Counter, defaultdict
import neo.lib.pt
from neo.lib import logging
from neo.lib.protocol import CellStates, ZERO_TID
class Cell(neo.lib.pt.Cell):
replicating = ZERO_TID
updatable = False
def setState(self, state):
readable = self.isReadable()
super(Cell, self).setState(state)
if self.isReadable():
return
try:
del self.updatable
except AttributeError:
pass
if readable:
try:
del self.backup_tid, self.replicating
except AttributeError:
pass
neo.lib.pt.Cell = Cell
class PartitionTable(neo.lib.pt.PartitionTable):
"""This class manages a partition table for the primary master node"""
def setID(self, id):
assert isinstance(id, (int, long)) or id is None, id
self._id = id
def setNextID(self):
if self._id is None:
raise RuntimeError, 'I do not know the last Partition Table ID'
self._id += 1
return self._id
def setReplicas(self, num_replicas):
assert num_replicas >= 0, num_replicas
self.nr = num_replicas
def make(self, node_list):
"""Make a new partition table from scratch."""
assert self._id is None and node_list, (self._id, node_list)
for node in node_list:
assert node.isRunning() and node.getUUID() is not None, node
self.addNodeList(node_list)
self.tweak()
for node, count in self.count_dict.items():
if not count:
del self.count_dict[node]
def dropNodeList(self, node_list, simulate=False):
partition_list = []
change_list = []
feeding_list = []
for offset, row in enumerate(self.partition_list):
new_row = []
partition_list.append(new_row)
feeding = None
drop_readable = uptodate = False
for cell in row:
node = cell.getNode()
if node in node_list:
change_list.append((offset, node.getUUID(),
CellStates.DISCARDED))
if cell.isReadable():
drop_readable = True
else:
new_row.append(cell)
if cell.isFeeding():
feeding = cell
elif cell.isUpToDate():
uptodate = True
if feeding is not None:
if len(new_row) < len(row):
change_list.append((offset, feeding.getUUID(),
CellStates.UP_TO_DATE))
feeding_list.append(feeding)
elif drop_readable and not uptodate:
raise neo.lib.pt.PartitionTableException(
"Refuse to drop nodes that contain the only readable"
" copies of partition %u" % offset)
if not simulate:
self.partition_list = partition_list
for cell in feeding_list:
cell.setState(CellStates.UP_TO_DATE)
self.count_dict[cell.getNode()] += 1
for node in node_list:
self.count_dict.pop(node, None)
self.num_filled_rows = len(filter(None, self.partition_list))
return change_list
def load(self, ptid, num_replicas, row_list, nm):
"""
Load a partition table from a storage node during the recovery.
Return the new storage nodes registered
"""
new_nodes = []
def getByUUID(nid):
node = nm.getByUUID(nid)
if node is None:
node = nm.createStorage(uuid=nid)
new_nodes.append(node.asTuple())
return node
self._load(ptid, num_replicas, row_list, getByUUID)
return new_nodes
def setUpToDate(self, node, offset):
"""Set a cell as up-to-date"""
uuid = node.getUUID()
# Check the partition is assigned and known as outdated.
row = self.partition_list[offset]
for cell in row:
if cell.getUUID() == uuid:
if cell.isOutOfDate() and cell.updatable:
break
return
else:
raise neo.lib.pt.PartitionTableException('Non-assigned partition')
# Update the partition table.
self._setCell(offset, node, CellStates.UP_TO_DATE)
cell_list = [(offset, uuid, CellStates.UP_TO_DATE)]
# Do no keep too many feeding cells.
readable_list = filter(Cell.isReadable, row)
iter_feeding = (cell.getNode() for cell in readable_list
if cell.isFeeding())
# If all cells are readable, we can now drop all feeding cells.
if len(readable_list) != len(row):
# Else we normally discard at most 1 cell. In the case that cells
# became non-readable since the last tweak, we want to avoid going
# below the wanted number of replicas. Also first try to discard
# feeding cells from nodes that it was decided to drop.
iter_feeding = sorted(iter_feeding, key=lambda node: not all(
cell.isFeeding() for _, cell in self.iterNodeCell(node)
))[:max(0, len(readable_list) - self.nr)]
for node in iter_feeding:
self.removeCell(offset, node)
cell_list.append((offset, node.getUUID(), CellStates.DISCARDED))
return cell_list
def tweak(self, drop_list=()):
"""Optimize partition table
This reassigns cells in 4 ways:
- Discard cells of nodes listed in 'drop_list'. For partitions with too
few readable cells, some cells are instead marked as FEEDING. This is
a preliminary step to drop these nodes, otherwise the partition table
could become non-operational.
In fact, the code touching these cells is disabled (see NOTE below).
- Other nodes must have the same number of non-feeding cells, off by 1.
- When a transaction creates new objects (oids are roughly allocated
sequentially), we expect better performance by maximizing the number
of involved nodes (i.e. parallelizing writes).
- For maximum resiliency, cells of each partition are assigned as far
as possible from each other, by checking the topology path of nodes.
Examples of optimal partition tables with np=10, nr=1 and 5 nodes:
UU... ..UU.
..UU. U...U
U...U .UU..
.UU.. ...UU
...UU UU...
UU... ..UU.
..UU. U...U
U...U .UU..
.UU.. ...UU
...UU UU...
The above 2 PT only differ by permutation of nodes, and this method
plays on it to minimize the resulting amount of replication.
For performance reasons, this algorithm uses a heuristic.
When (np * nr) is not a multiple of the number of nodes, some nodes
have 1 extra cell compared to other. In such case, other optimal PT
could be considered by rotation of the partitions. Actually np times
more, but it's not worth it since they don't differ enough (if np is
big enough) and we don't already do an exhaustive search.
Example with np=3, nr=1 and 2 nodes:
U. .U U.
.U U. U.
U. U. .U
For the topology, let's consider an example with paths of the form
(room, machine, disk):
- if there are more rooms than the number of replicas, 2 cells of the
same partition must not be assigned in the same room;
- otherwise, topology paths are checked at a deeper depth,
e.g. not on the same machine and distributed evenly
(off by 1) among rooms.
But the topology is expected to be optimal, otherwise it is ignored.
In some cases, we could fall back to a non-optimal topology but
that would cause extra replication if the user wants to fix it.
"""
# Collect some data in a usable form for the rest of the method.
node_list = {node: {} for node in self.count_dict
if node not in drop_list}
if not node_list:
raise neo.lib.pt.PartitionTableException("Can't remove all nodes.")
drop_list = defaultdict(list)
for offset, row in enumerate(self.partition_list):
for cell in row:
cell_dict = node_list.get(cell.getNode())
if cell_dict is None:
drop_list[offset].append(cell)
else:
cell_dict[offset] = cell
# The sort by node id is cosmetic, to prefer result like the first one
# in __doc__.
node_list = sorted(node_list.iteritems(), key=lambda x: x[0].getUUID())
# Generate an optimal PT.
node_count = len(node_list)
repeats = min(self.nr + 1, node_count)
x = [[] for _ in xrange(node_count)]
i = 0
for offset in xrange(self.np):
for _ in xrange(repeats):
x[i % node_count].append(offset)
i += 1
option_dict = Counter(map(tuple, x))
# Initialize variables/functions to optimize the topology.
devpath_max = []
devpaths = [()] * node_count
if repeats > 1:
_devpaths = [x[0].devpath for x in node_list]
max_depth = min(map(len, _devpaths))
depth = 0
while 1:
if depth < max_depth:
depth += 1
x = Counter(x[:depth] for x in _devpaths)
n = len(x)
x = set(x.itervalues())
# TODO: Prove it works. If the code turns out to be:
# - too pessimistic, the topology is ignored when
# resiliency could be maximized;
# - or worse too optimistic, in which case this
# method raises, possibly after a very long time.
if len(x) == 1 or max(x) * repeats <= node_count:
i, x = divmod(repeats, n)
devpath_max.append((i + 1, x) if x else (i, n))
if n < repeats:
continue
devpaths = [x[:depth] for x in _devpaths]
break
logging.warning("Can't maximize resiliency: fix the topology"
" of your storage nodes and make sure they're all running."
" %s storage device failure(s) may be enough to lose all"
" the database." % (repeats - 1))
break
topology = [{} for _ in xrange(self.np)]
def update_topology():
for offset in option:
n = topology[offset]
for i, (j, k) in zip(devpath, devpath_max):
try:
i, x = n[i]
except KeyError:
n[i] = i, x = [0, {}]
if i == j or i + 1 == j and k == sum(
1 for i in n.itervalues() if i[0] == j):
# Too many cells would be assigned at this topology
# node.
return False
n = x
# The topology may be optimal with this option. Apply it.
for offset in option:
n = topology[offset]
for i in devpath:
n = n[i]
n[0] += 1
n = n[1]
return True
def revert_topology():
for offset in option:
n = topology[offset]
for i in devpath:
n = n[i]
n[0] -= 1
n = n[1]
# Strategies to find the "best" permutation of nodes.
def node_options():
# The second part of the key goes with the above cosmetic sort.
option_list = sorted(option_dict, key=lambda x: (-len(x), x))
# 1. Search for solution that does not cause extra replication.
# This is important because tweak() must does nothing if it's
# called a second time whereas the list of nodes hasn't changed.
result = []
for i, (_, cell_dict) in enumerate(node_list):
option = {offset for offset, cell in cell_dict.iteritems()
if not cell.isFeeding()}
x = filter(option.issubset, option_list)
if not x:
break
result.append((i, x))
else:
yield result
# 2. We have to move cells. Evaluating all options would have
# a complexity of O(node_count!), which is clearly too slow,
# so we use a heuristic.
# For each node, we compare the resulting amount of replication
# in the best (min_cost) and worst (max_cost) case, and we first
# iterate over nodes with the biggest difference. This minimizes
# the impact of bad allocation patterns for the last nodes.
result = []
np_complement = frozenset(xrange(self.np)).difference
for i, (_, cell_dict) in enumerate(node_list):
cost_list = []
for x, option in enumerate(option_list):
discard = [0, 0]
for offset in np_complement(option):
cell = cell_dict.get(offset)
if cell:
discard[cell.isReadable()] += 1
cost_list.append(((discard[1], discard[0]), x))
cost_list.sort()
min_cost = cost_list[0][0]
max_cost = cost_list[-1][0]
result.append((
min_cost[0] - max_cost[0],
min_cost[1] - max_cost[1],
i, [option_list[x[1]] for x in cost_list]))
result.sort()
yield result
# The main loop, which is where we evaluate options.
new = [] # the solution
stack = [] # data recursion
def options():
x = node_options[len(new)]
return devpaths[x[-2]], iter(x[-1])
for node_options in node_options(): # for each strategy
devpath, iter_option = options()
while 1:
try:
option = next(iter_option)
except StopIteration:
if new:
devpath, iter_option = stack.pop()
option = new.pop()
revert_topology()
option_dict[option] += 1
continue
break
if option_dict[option] and update_topology():
new.append(option)
if len(new) == node_count:
break
stack.append((devpath, iter_option))
devpath, iter_option = options()
option_dict[option] -= 1
if new:
break
else:
raise AssertionError
# Apply the solution.
if self._id is None:
self._id = 1
self.num_filled_rows = self.np
new_state = CellStates.UP_TO_DATE
else:
new_state = CellStates.OUT_OF_DATE
changed_list = []
outdated_list = [repeats] * self.np
discard_list = defaultdict(list)
for i, offset_list in enumerate(new):
node, cell_dict = node_list[node_options[i][-2]]
for offset in offset_list:
cell = cell_dict.pop(offset, None)
if cell is None:
self.count_dict[node] += 1
self.partition_list[offset].append(Cell(node, new_state))
changed_list.append((offset, node.getUUID(), new_state))
elif cell.isReadable():
if cell.isFeeding():
cell.setState(CellStates.UP_TO_DATE)
changed_list.append((offset, node.getUUID(),
CellStates.UP_TO_DATE))
outdated_list[offset] -= 1
for offset, cell in cell_dict.iteritems():
discard_list[offset].append(cell)
# NOTE: The following line disables the next 2 lines, which actually
# causes cells in drop_list to be discarded, now or later;
# drop_list could be renamed into ignore_list.
# 1. Deleting data partition per partition is a lot of work, so
# why ask nodes in drop_list to do that when the goal is
# simply to trash the whole underlying database?
# 2. By excluding nodes from a tweak, it becomes possible to have
# parts of the partition table that are tweaked differently.
# This may require to temporarily change the number of
# replicas for the part being tweaked. In the future, this
# number may be specified in the 'tweak' command, to avoid
# race conditions with setUpToDate().
# Overall, a common use case is when importing a ZODB to NEO,
# to keep the initial importing node up until the database is
# split and replicated to the final nodes.
drop_list = {}
for offset, drop_list in drop_list.iteritems():
discard_list[offset] += drop_list
# We have sorted cells to discard in order to first deallocate nodes
# in drop_list, and have feeding cells in other nodes.
# The following loop also makes sure not to discard cells too quickly,
# by keeping a minimum of 'repeats' readable cells.
for offset, outdated in enumerate(outdated_list):
row = self.partition_list[offset]
for cell in discard_list[offset]:
if outdated and cell.isReadable():
outdated -= 1
if cell.isFeeding():
continue
state = CellStates.FEEDING
cell.setState(state)
else:
self.count_dict[cell.getNode()] -= 1
state = CellStates.DISCARDED
row.remove(cell)
changed_list.append((offset, cell.getUUID(), state))
assert self.operational(), changed_list
return changed_list
def outdate(self, lost_node=None):
"""Outdate all non-working nodes
Do not outdate cells of 'lost_node' for partitions it was the last node
to serve. This allows a cluster restart.
"""
change_list = []
fully_readable = all(cell.isReadable()
for row in self.partition_list
for cell in row)
for offset, row in enumerate(self.partition_list):
lost = lost_node
cell_list = []
for cell in row:
if cell.isReadable():
if cell.getNode().isRunning():
lost = None
else:
cell_list.append(cell)
for cell in cell_list:
node = cell.getNode()
if node is not lost:
if cell.isFeeding():
self.removeCell(offset, node)
state = CellStates.DISCARDED
else:
state = CellStates.OUT_OF_DATE
cell.setState(state)
change_list.append((offset, node.getUUID(), state))
if fully_readable and change_list:
logging.warning(self._first_outdated_message)
return change_list
def updatable(self, uuid, offset_list):
for offset in offset_list:
for cell in self.partition_list[offset]:
if cell.getUUID() == uuid and not cell.isReadable():
cell.updatable = True
def iterNodeCell(self, node):
for offset, row in enumerate(self.partition_list):
for cell in row:
if cell.getNode() is node:
yield offset, cell
break
def getOperationalNodeSet(self):
"""
Return a set of all nodes which are part of at least one UP TO DATE
partition. An empty list is returned if these nodes aren't enough to
become operational.
"""
node_set = set()
for row in self.partition_list:
if not any(cell.isReadable() and cell.getNode().isPending()
for cell in row):
return () # not operational
node_set.update(cell.getNode() for cell in row if cell.isReadable())
return node_set
def clearReplicating(self):
for row in self.partition_list:
for cell in row:
try:
del cell.replicating
except AttributeError:
pass
def setBackupTidDict(self, backup_tid_dict):
for row in self.partition_list:
for cell in row:
if cell.isReadable():
cell.backup_tid = backup_tid_dict.get(cell.getUUID(),
ZERO_TID)
def getBackupTid(self, mean=max):
try:
return min(mean(x.backup_tid for x in row if x.isReadable())
for row in self.partition_list)
except ValueError:
return ZERO_TID
def getCheckTid(self, partition_list):
try:
return min(min(cell.backup_tid
for cell in self.partition_list[offset]
if cell.isReadable())
for offset in partition_list)
except ValueError:
return ZERO_TID
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/recovery.py 0000664 0000000 0000000 00000021414 13465024610 0025531 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.connection import ClientConnection
from neo.lib.protocol import Packets, ProtocolError, ClusterStates, NodeStates
from .app import monotonic_time
from .handlers import MasterHandler
class RecoveryManager(MasterHandler):
"""
Manage the cluster recovery
"""
def __init__(self, app):
# The target node's uuid to request next.
self.target_ptid = 0
self.ask_pt = []
self.backup_tid_dict = {}
self.truncate_dict = {}
def getHandler(self):
return self
def identifyStorageNode(self, _):
"""
Returns the handler for storage nodes
"""
return NodeStates.PENDING, self
def run(self):
"""
Recover the status about the cluster. Obtain the last OID, the last
TID, and the last Partition Table ID from storage nodes, then get
back the latest partition table or make a new table from scratch,
if this is the first time.
A new primary master may also arise during this phase.
"""
logging.info('begin the recovery of the status')
app = self.app
pt = app.pt = app.newPartitionTable()
app.changeClusterState(ClusterStates.RECOVERING)
self.try_secondary = True
# collect the last partition table available
poll = app.em.poll
while 1:
if self.try_secondary:
# Keep trying to connect to all other known masters,
# to make sure there is a challege between each pair
# of masters in the cluster. If we win, all connections
# opened here will be closed.
self.try_secondary = False
node_list = []
for node in app.nm.getMasterList():
if not (node is app._node or node.isConnected(True)):
# During recovery, master nodes are not put back in
# DOWN state by handlers. This is done
# entirely in this method (here and after this poll
# loop), to minimize the notification packets.
if not node.isDown():
node.setDown()
node_list.append(node)
ClientConnection(app, app.election_handler, node)
if node_list:
app.broadcastNodesInformation(node_list)
poll(1)
if pt.filled():
# A partition table exists, we are starting an existing
# cluster.
node_list = pt.getOperationalNodeSet()
if app._startup_allowed:
node_list = [node for node in node_list if node.isPending()]
elif node_list:
# we want all nodes to be there if we're going to truncate
if app.truncate_tid:
node_list = pt.getNodeSet()
if not all(node.isPending() for node in node_list):
continue
elif app._startup_allowed or app.autostart:
# No partition table and admin allowed startup, we are
# creating a new cluster out of all pending nodes.
node_list = app.nm.getStorageList(only_identified=True)
if not app._startup_allowed and len(node_list) < app.autostart:
continue
else:
continue
if node_list and not any(node.getConnection().isPending()
for node in node_list):
if pt.filled():
if app.truncate_tid:
node_list = app.nm.getIdentifiedList(pool_set={uuid
for uuid, tid in self.truncate_dict.iteritems()
if not tid or app.truncate_tid < tid})
if node_list:
truncate = Packets.Truncate(app.truncate_tid)
for node in node_list:
conn = node.getConnection()
conn.send(truncate)
self.handlerSwitched(conn, False)
continue
node_list = pt.getConnectedNodeList()
break
logging.info('startup allowed')
for node in node_list:
assert node.isPending(), node
node.setRunning()
for node in app.nm.getMasterList():
if not (node is app._node or node.isIdentified()):
if node.isConnected(True):
node.getConnection().close()
assert node.isDown(), node
elif not node.isDown():
assert self.try_secondary, node
node.setDown()
node_list.append(node)
app.broadcastNodesInformation(node_list)
if pt.getID() is None:
logging.info('creating a new partition table')
pt.make(node_list)
self._notifyAdmins(Packets.SendPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
else:
cell_list = pt.outdate()
if cell_list:
self._notifyAdmins(Packets.NotifyPartitionChanges(
pt.setNextID(), pt.getReplicas(), cell_list))
if app.backup_tid:
pt.setBackupTidDict(self.backup_tid_dict)
app.backup_tid = pt.getBackupTid()
logging.debug('cluster starts this partition table:')
pt.log()
def connectionLost(self, conn, new_state):
uuid = conn.getUUID()
self.backup_tid_dict.pop(uuid, None)
self.truncate_dict.pop(uuid, None)
node = self.app.nm.getByUUID(uuid)
try:
i = self.ask_pt.index(uuid)
except ValueError:
pass
else:
del self.ask_pt[i]
if not i:
if self.ask_pt:
self.app.nm.getByUUID(self.ask_pt[0]) \
.ask(Packets.AskPartitionTable())
else:
logging.warning("Waiting for %r to come back."
" No other node has version %s of the partition table.",
node, self.target_ptid)
if node is None or node.getState() == new_state:
return
node.setState(new_state)
self.app.broadcastNodesInformation([node])
def handlerSwitched(self, conn, new):
# ask the last IDs to perform the recovery
conn.ask(Packets.AskRecovery())
def answerRecovery(self, conn, ptid, backup_tid, truncate_tid):
uuid = conn.getUUID()
# ptid is None if the node has an empty partition table.
if ptid and self.target_ptid <= ptid:
# Maybe a newer partition table.
if self.target_ptid == ptid and self.ask_pt:
# Another node is already asked.
self.ask_pt.append(uuid)
elif self.target_ptid < ptid or self.ask_pt is not ():
# No node asked yet for the newest partition table.
self.target_ptid = ptid
self.ask_pt = [uuid]
conn.ask(Packets.AskPartitionTable())
self.backup_tid_dict[uuid] = backup_tid
self.truncate_dict[uuid] = truncate_tid
def answerPartitionTable(self, conn, ptid, num_replicas, row_list):
# If this is not from a target node, ignore it.
if ptid == self.target_ptid:
app = self.app
new_nodes = app.pt.load(ptid, num_replicas, row_list, app.nm)
self._notifyAdmins(
Packets.NotifyNodeInformation(monotonic_time(), new_nodes),
Packets.SendPartitionTable(ptid, num_replicas, row_list))
self.ask_pt = ()
uuid = conn.getUUID()
app.backup_tid = self.backup_tid_dict[uuid]
app.truncate_tid = self.truncate_dict[uuid]
def _notifyAdmins(self, *packets):
for node in self.app.nm.getAdminList(only_identified=True):
for packet in packets:
node.send(packet)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/transactions.py 0000664 0000000 0000000 00000036106 13465024610 0026407 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import deque
from time import time
from struct import pack, unpack
from neo.lib import logging
from neo.lib.handler import DelayEvent, EventQueue
from neo.lib.protocol import Packets, ProtocolError, uuid_str, \
ZERO_OID, ZERO_TID
from neo.lib.util import dump, u64, addTID, tidFromTime
class Transaction(object):
"""
A pending transaction
"""
locking_tid = ZERO_TID
_tid = None
_msg_id = None
_oid_list = None
_failed = frozenset()
_prepared = False
# uuid dict hold flag to known who has locked the transaction
_uuid_set = None
_lock_wait_uuid_set = None
def __init__(self, node, storage_readiness, ttid):
"""
Prepare the transaction, set OIDs and UUIDs related to it
"""
self._node = node
self._storage_readiness = storage_readiness
self._ttid = ttid
self._birth = time()
# store storage uuids that must be notified at commit
self._notification_set = set()
def __repr__(self):
return "<%s(client=%r, tid=%r, oids=%r, storages=%r, age=%.2fs) at %x>" % (
self.__class__.__name__,
self._node,
dump(self._tid),
map(dump, self._oid_list or ()),
map(uuid_str, self._uuid_set or ()),
time() - self._birth,
id(self),
)
def getNode(self):
"""
Return the node that had began the transaction
"""
return self._node
def getTTID(self):
"""
Return the temporary transaction ID.
"""
return self._ttid
def getTID(self):
"""
Return the transaction ID
"""
return self._tid
def getMessageId(self):
"""
Returns the packet ID to use in the answer
"""
return self._msg_id
def getUUIDList(self):
"""
Returns the list of node's UUID that lock the transaction
"""
return list(self._uuid_set)
def getOIDList(self):
"""
Returns the list of OIDs used in the transaction
"""
return list(self._oid_list)
def isPrepared(self):
"""
Returns True if the commit has been requested by the client
"""
return self._prepared
def registerForNotification(self, uuid):
"""
Register a node that requires a notification at commit
"""
self._notification_set.add(uuid)
def getNotificationUUIDList(self):
"""
Returns the list of nodes waiting for the transaction to be
finished
"""
return list(self._notification_set)
def prepare(self, tid, oid_list, uuid_set, msg_id):
self._tid = tid
self._oid_list = oid_list
self._msg_id = msg_id
self._uuid_set = uuid_set
self._lock_wait_uuid_set = uuid_set.copy()
self._prepared = True
def storageLost(self, uuid):
"""
Given storage was lost while waiting for its lock, stop waiting
for it.
Does nothing if the node was not part of the transaction.
"""
self._notification_set.discard(uuid)
# XXX: We might lose information that a storage successfully locked
# data but was later found to be disconnected. This loss has no impact
# on current code, but it might be disturbing to reader or future code.
if self._prepared:
self._lock_wait_uuid_set.discard(uuid)
self._uuid_set.discard(uuid)
return self.locked()
return False
def clientLost(self, node):
if self._node is node:
if self._prepared:
self._node = None # orphan
else:
return True # abort
else:
self._notification_set.discard(node.getUUID())
return False
def lock(self, uuid):
"""
Define that a node has locked the transaction
Returns true if all nodes are locked
"""
self._lock_wait_uuid_set.remove(uuid)
return self.locked()
def locked(self):
"""
Returns true if all nodes are locked
"""
return not self._lock_wait_uuid_set
class TransactionManager(EventQueue):
"""
Manage current transactions
"""
def __init__(self, on_commit):
self._on_commit = on_commit
self.reset()
def reset(self):
EventQueue.__init__(self)
# ttid -> transaction
self._ttid_dict = {}
self._last_oid = ZERO_OID
self._last_tid = ZERO_TID
# queue filled with ttids pointing to transactions with increasing tids
self._queue = deque()
def __getitem__(self, ttid):
"""
Return the transaction object for this TID
"""
try:
return self._ttid_dict[ttid]
except KeyError:
raise ProtocolError("unknown ttid %s" % dump(ttid))
def __delitem__(self, ttid):
try:
self._queue.remove(ttid)
except ValueError:
pass
del self._ttid_dict[ttid]
self.executeQueuedEvents()
def __contains__(self, ttid):
"""
Returns True if this is a pending transaction
"""
return ttid in self._ttid_dict
def getNextOIDList(self, num_oids):
""" Generate a new OID list """
oid = unpack('!Q', self._last_oid)[0] + 1
oid_list = [pack('!Q', oid + i) for i in xrange(num_oids)]
self._last_oid = oid_list[-1]
return oid_list
def setLastOID(self, oid):
if self._last_oid < oid:
self._last_oid = oid
def getLastOID(self):
return self._last_oid
def _nextTID(self, ttid=None, divisor=None):
"""
Compute the next TID based on the current time and check collisions.
Also, if ttid is not None, divisor is mandatory adjust it so that
tid % divisor == ttid % divisor
while preserving
min_tid < tid
If ttid is None, divisor is ignored.
When constraints allow, prefer decreasing generated TID, to avoid
fast-forwarding to future dates.
"""
tid = tidFromTime(time())
min_tid = self._last_tid
if tid <= min_tid:
tid = addTID(min_tid, 1)
if ttid is not None:
remainder = u64(ttid) % divisor
delta_remainder = remainder - u64(tid) % divisor
if delta_remainder:
tid = addTID(tid, delta_remainder)
if tid <= min_tid:
tid = addTID(tid, divisor)
assert u64(tid) % divisor == remainder, (dump(tid), remainder)
assert min_tid < tid, (dump(min_tid), dump(tid))
self._last_tid = tid
return self._last_tid
def getLastTID(self):
"""
Returns the last TID used
"""
return self._last_tid
def setLastTID(self, tid):
"""
Set the last TID, keep the previous if lower
"""
if self._last_tid < tid:
self._last_tid = tid
def hasPending(self):
"""
Returns True if some transactions are pending
"""
return bool(self._ttid_dict)
def registerForNotification(self, uuid):
"""
Return the list of pending transaction IDs
"""
# remember that this node must be notified when pending transactions
# will be finished
for txn in self._ttid_dict.itervalues():
txn.registerForNotification(uuid)
return self._ttid_dict.keys()
def begin(self, node, storage_readiness, tid=None):
"""
Generate a new TID
"""
if tid is None:
# No TID requested, generate a temporary one
tid = self._nextTID()
else:
# Use of specific TID requested, queue it immediately and update
# last TID.
self._queue.append(tid)
self.setLastTID(tid)
txn = self._ttid_dict[tid] = Transaction(node, storage_readiness, tid)
logging.debug('Begin %s', txn)
return tid
def deadlock(self, storage_id, ttid, locking_tid):
try:
txn = self._ttid_dict[ttid]
except KeyError:
return
if txn.locking_tid <= locking_tid:
client = txn.getNode()
txn.locking_tid = locking_tid = self._nextTID()
logging.info('Deadlock avoidance triggered by %s for %s:'
' new locking tid for TXN %s is %s', uuid_str(storage_id),
uuid_str(client.getUUID()), dump(ttid), dump(locking_tid))
client.send(Packets.NotifyDeadlock(ttid, locking_tid))
def vote(self, app, ttid, uuid_list):
"""
Check that the transaction can be voted
when the client reports failed nodes.
"""
txn = self[ttid]
# The client does not know which nodes are not expected to have
# transactions in full. Let's filter out them.
failed = app.getStorageReadySet(txn._storage_readiness)
failed.intersection_update(uuid_list)
if failed:
operational = app.pt.operational
if not operational(failed):
# No way to commit this transaction because there are
# non-replicated storage nodes with failed stores.
return False
failed = failed.copy()
for t in self._ttid_dict.itervalues():
failed |= t._failed
if not operational(failed):
# Other transactions were voted and unless they're aborted,
# we won't be able to finish this one, because that would make
# the cluster non-operational. Let's tell the caller to retry
# later.
raise DelayEvent
# Allow the client to finish the transaction,
# even if it will disconnect storage nodes.
txn._failed = failed
return True
def prepare(self, app, ttid, oid_list, checked_list, msg_id):
"""
Prepare a transaction to be finished
"""
txn = self[ttid]
pt = app.pt
failed = txn._failed
if failed and not pt.operational(failed):
return None, None
ready = app.getStorageReadySet(txn._storage_readiness)
getPartition = pt.getPartition
partition_set = set(map(getPartition, oid_list))
partition_set.update(map(getPartition, checked_list))
partition_set.add(getPartition(ttid))
node_list = []
uuid_set = set()
for partition in partition_set:
for cell in pt.getCellList(partition):
node = cell.getNode()
if node.isIdentified():
uuid = node.getUUID()
if uuid in uuid_set:
continue
if uuid in failed:
# This will commit a new PT with outdated cells before
# locking the transaction, which is important during
# the verification phase.
node.getConnection().close()
elif uuid in ready:
uuid_set.add(uuid)
node_list.append(node)
# A node that was not ready at the beginning of the transaction
# can't have readable cells. And if we're still operational without
# the 'failed' nodes, then there must still be 1 node in 'ready'
# that is UP.
assert node_list, (ready, failed)
# maybe not the fastest but _queue should be often small
if ttid in self._queue:
tid = ttid
else:
tid = self._nextTID(ttid, pt.getPartitions())
self._queue.append(ttid)
logging.debug('Finish TXN %s for %s (was %s)',
dump(tid), txn.getNode(), dump(ttid))
txn.prepare(tid, oid_list, uuid_set, msg_id)
# check if greater and foreign OID was stored
if oid_list:
self.setLastOID(max(oid_list))
return tid, node_list
def abort(self, ttid, uuid):
"""
Abort a transaction
"""
logging.debug('Abort TXN %s for %s', dump(ttid), uuid_str(uuid))
txn = self[ttid]
if txn.isPrepared():
raise ProtocolError("commit already requested for ttid %s"
% dump(ttid))
del self[ttid]
return txn._notification_set
def lock(self, ttid, uuid):
"""
Set that a node has locked the transaction.
If transaction is completely locked, calls function given at
instantiation time.
"""
logging.debug('Lock TXN %s for %s', dump(ttid), uuid_str(uuid))
if self[ttid].lock(uuid) and self._queue[0] == ttid:
# all storage are locked and we unlock the commit queue
self._unlockPending()
def storageLost(self, uuid):
"""
A storage node has been lost, don't expect a reply from it for
current transactions
"""
unlock = False
for ttid, txn in self._ttid_dict.iteritems():
if txn.storageLost(uuid) and self._queue[0] == ttid:
unlock = True
# do not break: we must call storageLost() on all transactions
if unlock:
self._unlockPending()
def _unlockPending(self):
"""Serialize transaction unlocks
This should rarely delay unlocks since the time needed to lock a
transaction is roughly constant. The most common case where reordering
is required is when some storages are already busy by other tasks.
"""
queue = self._queue
self._on_commit(self._ttid_dict.pop(queue.popleft()))
while queue:
ttid = queue[0]
txn = self._ttid_dict[ttid]
if not txn.locked():
break
del queue[0], self._ttid_dict[ttid]
self._on_commit(txn)
self.executeQueuedEvents()
def clientLost(self, node):
for txn in self._ttid_dict.values():
if txn.clientLost(node):
tid = txn.getTTID()
del self[tid]
yield tid, txn.getNotificationUUIDList()
def log(self):
logging.info('Transactions:')
for txn in self._ttid_dict.itervalues():
logging.info(' %r', txn)
self.logQueuedEvents()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/master/verification.py 0000664 0000000 0000000 00000014623 13465024610 0026361 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import defaultdict
from neo.lib import logging
from neo.lib.protocol import ClusterStates, Packets, NodeStates
from .handlers import BaseServiceHandler
class VerificationManager(BaseServiceHandler):
def __init__(self, app):
self._locked_dict = {}
self._voted_dict = defaultdict(set)
self._uuid_set = set()
def _askStorageNodesAndWait(self, packet, node_list):
poll = self.app.em.poll
uuid_set = self._uuid_set
uuid_set.clear()
for node in node_list:
uuid_set.add(node.getUUID())
node.ask(packet)
while uuid_set:
poll(1)
def getHandler(self):
return self
def identifyStorageNode(self, known):
"""
Returns the handler to manager the given node
"""
if known:
state = NodeStates.RUNNING
else:
# if node is unknown, it has been forget when the current
# partition was validated by the admin
# Here the uuid is not cleared to allow lookup pending nodes by
# uuid from the test framework. It's safe since nodes with a
# conflicting UUID are rejected in the identification handler.
state = NodeStates.PENDING
return state, self
def run(self):
app = self.app
app.changeClusterState(ClusterStates.VERIFYING)
app.tm.reset()
if not app.backup_tid:
self.verifyData()
# This is where storages truncate if requested:
# - we make sure all nodes are running with a truncate_tid value saved
# - there's no unfinished data
# - just before they return the last tid/oid
self._askStorageNodesAndWait(Packets.AskLastIDs(),
[x for x in app.nm.getIdentifiedList() if x.isStorage()])
app.setLastTransaction(app.tm.getLastTID())
# Just to not return meaningless information in AnswerRecovery.
app.truncate_tid = None
def verifyData(self):
app = self.app
logging.info('start to verify data')
getIdentifiedList = app.nm.getIdentifiedList
# Gather all transactions that may have been partially finished.
# It's safe to query outdated cells from nodes with readable cells.
# For other nodes, it's more complicated:
# 1. pt: U|U ltid: 10
# 2. S1: restart with voted ttid=13
# S2: stop with locked ttid=13
# 3. pt: U|O ltid: 10
# 4. verification drops ttid=13 because it's not locked
# 5. new commits -> ltid: 20
# 6. S1 restarted, S2 started
# 7. ttid=13 must be dropped
# And we can't ignore ttid < last tid for all nodes, even if the
# master serializes unlock notifications:
# 1. pt: U.|.U ltid: 15
# 2. unlock ttid=18 to S1
# 3. unlock ttid=20 to S2
# 4. S1 stopped before unlocking ttid=18
# 5. S2 unlocks ttid=20
# 6. back to recovery, S1 started
# 7. verification must validate ttid=18
# So for nodes without any readable cell, and only for them, we only
# check if they have locked transactions. Replication will do the rest.
self._askStorageNodesAndWait(Packets.AskLockedTransactions(),
[x for x in getIdentifiedList() if x.isStorage()])
# Some nodes may have already unlocked these transactions and
# _locked_dict is incomplete, but we can ask them the final tid.
for ttid, voted_set in self._voted_dict.iteritems():
if ttid in self._locked_dict:
continue
partition = app.pt.getPartition(ttid)
for node in getIdentifiedList(pool_set={cell.getUUID()
# If an outdated cell had unlocked ttid, then either
# it is already in _locked_dict or a readable cell also
# unlocked it.
for cell in app.pt.getCellList(partition, readable=True)
} - voted_set):
self._askStorageNodesAndWait(Packets.AskFinalTID(ttid), (node,))
if self._tid is not None:
self._locked_dict[ttid] = self._tid
break
else:
# Transaction not locked. No need to tell nodes to delete it,
# since they drop any unfinished data just before being
# operational.
pass
# Finish all transactions for which we know that tpc_finish was called
# but not fully processed. This may include replicas with transactions
# that were not even locked.
for ttid, tid in self._locked_dict.iteritems():
uuid_set = self._voted_dict.get(ttid)
if uuid_set:
packet = Packets.ValidateTransaction(ttid, tid)
for node in getIdentifiedList(pool_set=uuid_set):
node.send(packet)
def answerLastIDs(self, conn, loid, ltid):
self._uuid_set.remove(conn.getUUID())
tm = self.app.tm
tm.setLastOID(loid)
tm.setLastTID(ltid)
def answerLockedTransactions(self, conn, tid_dict):
uuid = conn.getUUID()
self._uuid_set.remove(uuid)
app = self.app
node = app.nm.getByUUID(uuid)
vote = any(x[1].isReadable() for x in app.pt.iterNodeCell(node))
for ttid, tid in tid_dict.iteritems():
if tid:
self._locked_dict[ttid] = tid
if vote:
self._voted_dict[ttid].add(uuid)
def answerFinalTID(self, conn, tid):
self._uuid_set.remove(conn.getUUID())
self._tid = tid
def connectionLost(self, conn, new_state):
self._uuid_set.discard(conn.getUUID())
super(VerificationManager, self).connectionLost(conn, new_state)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/neoctl/ 0000775 0000000 0000000 00000000000 13465024610 0023310 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/neoctl/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0025407 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/neoctl/app.py 0000664 0000000 0000000 00000032123 13465024610 0024443 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import sys
from .neoctl import NeoCTL, NotReadyException
from neo.lib.node import NodeManager
from neo.lib.pt import PartitionTable
from neo.lib.util import p64, u64, tidFromTime, timeStringFromTID
from neo.lib.protocol import uuid_str, formatNodeList, \
ClusterStates, NodeStates, NodeTypes, UUID_NAMESPACES, ZERO_TID
action_dict = {
'print': {
'ids': 'getLastIds',
'pt': 'getPartitionRowList',
'node': 'getNodeList',
'cluster': 'getClusterState',
'primary': 'getPrimary',
},
'set': {
'cluster': 'setClusterState',
'replicas': 'setNumReplicas',
},
'check': 'checkReplicas',
'start': 'startCluster',
'add': 'enableStorageList',
'tweak': 'tweakPartitionTable',
'drop': 'dropNode',
'kill': 'killNode',
'prune_orphan': 'pruneOrphan',
'truncate': 'truncate',
'flush_log': 'flushLog',
}
uuid_int = (lambda ns: lambda uuid:
(ns[uuid[0]] << 24) + int(uuid[1:])
)({str(k)[0]: v for k, v in UUID_NAMESPACES.iteritems()})
class dummy_app:
id_timestamp = uuid = 0
class TerminalNeoCTL(object):
def __init__(self, *args, **kw):
self.neoctl = NeoCTL(*args, **kw)
def __del__(self):
self.neoctl.close()
# Utility methods (could be functions)
def asNodeType(self, value):
return getattr(NodeTypes, value.upper())
def asClusterState(self, value):
return getattr(ClusterStates, value.upper())
def asTID(self, value):
if '.' in value:
return tidFromTime(float(value))
return p64(int(value, 0))
asNode = staticmethod(uuid_int)
def formatPartitionTable(self, row_list):
nm = NodeManager()
nm.update(dummy_app, 1,
self.neoctl.getNodeList(node_type=NodeTypes.STORAGE))
pt = object.__new__(PartitionTable)
pt._load(None, None, row_list, nm.getByUUID)
pt.addNodeList(nm.getByStateList(NodeStates.RUNNING))
return '\n'.join(line[4:] for line in pt._format())
def formatRowList(self, row_list):
return '\n'.join('%03d |%s' % (offset,
''.join(' %s - %s |' % (uuid_str(uuid), state)
for (uuid, state) in cell_list))
for (offset, cell_list) in row_list)
# Actual actions
def getLastIds(self, params):
"""
Get last ids.
"""
assert not params
ptid, backup_tid, truncate_tid = self.neoctl.getRecovery()
if backup_tid:
ltid = self.neoctl.getLastTransaction()
r = "backup_tid = 0x%x (%s)" % (u64(backup_tid),
timeStringFromTID(backup_tid))
else:
loid, ltid = self.neoctl.getLastIds()
r = "last_oid = 0x%x" % (u64(loid))
return r + "\nlast_tid = 0x%x (%s)\nlast_ptid = %s" % \
(u64(ltid), timeStringFromTID(ltid), ptid)
def getPartitionRowList(self, params):
"""
Get a list of partition rows, bounded by min & max and involving
given node.
Parameters: [min [max [node]]]
min: offset of the first row to fetch (starts at 0)
max: offset of the last row to fetch (0 for no limit)
node: filters the list of nodes serving a line to this node
"""
params = params + [0, 0, None][len(params):]
min_offset, max_offset, node = params
min_offset = int(min_offset)
max_offset = int(max_offset)
if node is not None:
node = self.asNode(node)
ptid, num_replicas, row_list = self.neoctl.getPartitionRowList(
min_offset=min_offset, max_offset=max_offset, node=node)
return '# ptid: %s, replicas: %s\n%s' % (ptid, num_replicas,
self.formatRowList(enumerate(row_list, min_offset))
if min_offset or max_offset else
self.formatPartitionTable(row_list))
def getNodeList(self, params):
"""
Get a list of nodes, filtering with given type.
Parameters: [type]
type: type of node to display
"""
assert len(params) < 2
if len(params):
node_type = self.asNodeType(params[0])
else:
node_type = None
node_list = self.neoctl.getNodeList(node_type=node_type)
return '\n'.join(formatNodeList(node_list)) or 'Empty list!'
def getClusterState(self, params):
"""
Get cluster state.
"""
assert len(params) == 0
return str(self.neoctl.getClusterState())
def setClusterState(self, params):
"""
Set cluster state.
Parameters: state
state: state to put the cluster in
"""
assert len(params) == 1
return self.neoctl.setClusterState(self.asClusterState(params[0]))
def setNumReplicas(self, params):
"""
Set number of replicas.
Parameters: nr
nr: positive number (0 means no redundancy)
"""
assert len(params) == 1
nr = int(params[0])
if nr < 0:
sys.exit('invalid number of replicas')
return self.neoctl.setNumReplicas(nr)
def startCluster(self, params):
"""
Starts cluster operation after a startup.
Equivalent to:
set cluster verifying
"""
assert len(params) == 0
return self.neoctl.startCluster()
def _getStorageList(self, params):
if len(params) == 1 and params[0] == 'all':
node_list = self.neoctl.getNodeList(NodeTypes.STORAGE)
return [node[2] for node in node_list]
return map(self.asNode, params)
def enableStorageList(self, params):
"""
Enable cluster to make use of pending storages.
Parameters: node [node [...]]
node: if "all", add all pending storage nodes,
otherwise, the list of storage nodes to enable.
"""
return self.neoctl.enableStorageList(self._getStorageList(params))
def tweakPartitionTable(self, params):
"""
Optimize partition table.
No change is done to the specified/down storage nodes and they don't
count as replicas. The purpose of listing nodes is usually to drop
them once the data is replicated to other nodes.
Parameters: [-n] [node [...]]
-n: dry run
"""
dry_run = params[0] == '-n'
changed, row_list = self.neoctl.tweakPartitionTable(
map(self.asNode, params[dry_run:]), dry_run)
if changed:
return self.formatPartitionTable(row_list)
return 'No change done.'
def killNode(self, params):
"""
Kill redundant nodes (either a storage or a secondary master).
Parameters: node
"""
return self.neoctl.killNode(self.asNode(*params))
def dropNode(self, params):
"""
Remove storage node permanently.
Parameters: node
"""
return self.neoctl.dropNode(self.asNode(*params))
def getPrimary(self, params):
"""
Get primary master node.
"""
return uuid_str(self.neoctl.getPrimary())
def pruneOrphan(self, params):
"""
Fix database by deleting unreferenced raw data
This can take a long time.
Parameters: dry_run node [node [...]]
dry_run: 0 or 1
node: if "all", ask all connected storage nodes to repair,
otherwise, only the given list of storage nodes.
"""
dry_run = "01".index(params.pop(0))
return self.neoctl.repair(self._getStorageList(params), dry_run)
def truncate(self, params):
"""
Truncate the database at the given tid.
The cluster must be in RUNNING state, without any pending transaction.
This causes the cluster to go back in RECOVERING state, waiting all
nodes to be pending (do not use 'start' command unless you're sure
the missing nodes don't need to be truncated).
Parameters: tid
"""
self.neoctl.truncate(self.asTID(*params))
def checkReplicas(self, params):
"""
Test whether partitions have corrupted metadata
Any corrupted cell is put in CORRUPTED state, possibly make the
cluster non operational.
Parameters: [partition]:[reference] ... [min_tid [max_tid]]
reference: node id of a storage with known good data
If not given, and if the cluster is in backup mode, an upstream
cell is automatically taken as reference.
"""
partition_dict = {}
params = iter(params)
min_tid = ZERO_TID
max_tid = None
for p in params:
try:
partition, source = p.split(':')
except ValueError:
min_tid = self.asTID(p)
try:
max_tid = self.asTID(params.next())
except StopIteration:
pass
break
source = self.asNode(source) if source else None
if partition:
partition_dict[int(partition)] = source
else:
assert not partition_dict
np = len(self.neoctl.getPartitionRowList()[1])
partition_dict = dict.fromkeys(xrange(np), source)
self.neoctl.checkReplicas(partition_dict, min_tid, max_tid)
def flushLog(self, params):
"""
Ask all nodes in the cluster to flush their logs.
If there are backup clusters, only their primary masters flush.
"""
assert not params
self.neoctl.flushLog()
class Application(object):
"""The storage node application."""
def __init__(self, *args, **kw):
self.neoctl = TerminalNeoCTL(*args, **kw)
def execute(self, args):
"""Execute the command given."""
# print node type : print list of node of the given type
# (STORAGE_NODE_TYPE, MASTER_NODE_TYPE...)
# set node uuid state [1|0] : set the node for the given uuid to the
# state (RUNNING, DOWN...) and modify the partition if asked
# set cluster name [shutdown|operational] : either shutdown the
# cluster or mark it as operational
if not args:
return self.usage()
current_action = action_dict
level = 0
try:
while level < len(args) and \
isinstance(current_action, dict):
current_action = current_action[args[level]]
level += 1
except KeyError:
sys.exit('invalid command: ' + ' '.join(args))
action = getattr(self.neoctl, current_action)
try:
return action(args[level:])
except NotReadyException, message:
return 'ERROR: %s' % (message, )
def _usage(self, action_dict, level=0):
result = []
append = result.append
sub_level = level + 1
for name, action in action_dict.iteritems():
append('%s%s' % (' ' * level, name))
if isinstance(action, dict):
append(self._usage(action, level=sub_level))
else:
real_action = getattr(self.neoctl, action, None)
if real_action is None:
continue
docstring = getattr(real_action, '__doc__', None)
if docstring is None:
docstring = '(no docstring)'
docstring_line_list = docstring.split('\n')
# Strip empty lines at beginning & end of line list
for end in (0, -1):
while len(docstring_line_list) \
and docstring_line_list[end] == '':
docstring_line_list.pop(end)
# Get the indentation of first line, to preserve other lines
# relative indentation.
first_line = docstring_line_list[0]
base_indentation = len(first_line) - len(first_line.lstrip())
result.extend([(' ' * sub_level) + x[base_indentation:] \
for x in docstring_line_list])
return '\n'.join(result)
def usage(self):
output_list = ('Available commands:', self._usage(action_dict),
"TID arguments can be either integers or timestamps as floats,"
" e.g. '257684787499560686', '0x3937af2eeeeeeee' or '1325421296.'"
" for 2012-01-01 12:34:56 UTC")
return '\n'.join(output_list)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/neoctl/handler.py 0000664 0000000 0000000 00000004510 13465024610 0025277 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import sys
from neo.lib.handler import EventHandler
from neo.lib.protocol import ErrorCodes, Packets
class CommandEventHandler(EventHandler):
""" Base handler for command """
def connectionCompleted(self, conn):
# connected to admin node
self.app.connected = True
super(CommandEventHandler, self).connectionCompleted(conn)
def __disconnected(self):
app = self.app
app.connected = False
app.connection = None
def __respond(self, response):
self.app.response_queue.append(response)
def connectionClosed(self, conn):
super(CommandEventHandler, self).connectionClosed(conn)
self.__disconnected()
def connectionFailed(self, conn):
super(CommandEventHandler, self).connectionFailed(conn)
self.__disconnected()
def ack(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.ACK, msg))
def denied(self, conn, msg):
sys.exit(msg)
def notReady(self, conn, msg):
self.__respond((Packets.Error, ErrorCodes.NOT_READY, msg))
def __answer(packet_type):
def answer(self, conn, *args):
self.__respond((packet_type, ) + args)
return answer
answerPartitionList = __answer(Packets.AnswerPartitionList)
answerNodeList = __answer(Packets.AnswerNodeList)
answerClusterState = __answer(Packets.AnswerClusterState)
answerPrimary = __answer(Packets.AnswerPrimary)
answerLastIDs = __answer(Packets.AnswerLastIDs)
answerLastTransaction = __answer(Packets.AnswerLastTransaction)
answerRecovery = __answer(Packets.AnswerRecovery)
answerTweakPartitionTable = __answer(Packets.AnswerTweakPartitionTable)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/neoctl/neoctl.py 0000664 0000000 0000000 00000017106 13465024610 0025153 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import argparse
from neo.lib import util
from neo.lib.app import BaseApplication, buildOptionParser
from neo.lib.connection import ClientConnection, ConnectionClosed
from neo.lib.protocol import ClusterStates, NodeStates, ErrorCodes, Packets
from .handler import CommandEventHandler
class NotReadyException(Exception):
pass
@buildOptionParser
class NeoCTL(BaseApplication):
connection = None
connected = False
@classmethod
def _buildOptionParser(cls):
# XXX: Use argparse sub-commands.
parser = cls.option_parser
parser.description = "NEO Control node"
parser('a', 'address', default='127.0.0.1:9999',
parse=lambda x: util.parseNodeAddress(x, 9999),
help="address of an admin node")
parser.argument('cmd', nargs=argparse.REMAINDER,
help="command to execute; if not supplied,"
" the list of available commands is displayed")
def __init__(self, address, **kw):
super(NeoCTL, self).__init__(**kw)
self.server = self.nm.createAdmin(address=address)
self.handler = CommandEventHandler(self)
self.response_queue = []
def __getConnection(self):
if not self.connected:
self.connection = ClientConnection(self, self.handler, self.server)
# Never delay reconnection to master. This speeds up unit tests
# and it should not change anything for normal use.
try:
self.connection.setReconnectionNoDelay()
except ConnectionClosed:
self.connection = None
while not self.connected:
if self.connection is None:
raise NotReadyException('not connected')
self.em.poll(1)
return self.connection
def __ask(self, packet):
# TODO: make thread-safe
connection = self.__getConnection()
connection.ask(packet)
response_queue = self.response_queue
assert len(response_queue) == 0
while self.connected:
self.em.poll(1)
if response_queue:
break
else:
raise NotReadyException, 'Connection closed'
response = response_queue.pop()
if response[0] == Packets.Error and \
response[1] == ErrorCodes.NOT_READY:
raise NotReadyException(response[2])
return response
def enableStorageList(self, uuid_list):
"""
Put all given storage nodes in "running" state.
"""
packet = Packets.AddPendingNodes(uuid_list)
response = self.__ask(packet)
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def tweakPartitionTable(self, uuid_list=(), dry_run=False):
response = self.__ask(Packets.TweakPartitionTable(dry_run, uuid_list))
if response[0] != Packets.AnswerTweakPartitionTable:
raise RuntimeError(response)
return response[1:]
def setNumReplicas(self, nr):
response = self.__ask(Packets.SetNumReplicas(nr))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def setClusterState(self, state):
"""
Set cluster state.
"""
packet = Packets.SetClusterState(state)
response = self.__ask(packet)
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def _setNodeState(self, node, state):
"""
Kill node, or remove it permanently
"""
response = self.__ask(Packets.SetNodeState(node, state))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def getClusterState(self):
"""
Get cluster state.
"""
packet = Packets.AskClusterState()
response = self.__ask(packet)
if response[0] != Packets.AnswerClusterState:
raise RuntimeError(response)
return response[1]
def getLastIds(self):
response = self.__ask(Packets.AskLastIDs())
if response[0] != Packets.AnswerLastIDs:
raise RuntimeError(response)
return response[1:]
def getLastTransaction(self):
response = self.__ask(Packets.AskLastTransaction())
if response[0] != Packets.AnswerLastTransaction:
raise RuntimeError(response)
return response[1]
def getRecovery(self):
response = self.__ask(Packets.AskRecovery())
if response[0] != Packets.AnswerRecovery:
raise RuntimeError(response)
return response[1:]
def getNodeList(self, node_type=None):
"""
Get a list of nodes, filtering with given type.
"""
packet = Packets.AskNodeList(node_type)
response = self.__ask(packet)
if response[0] != Packets.AnswerNodeList:
raise RuntimeError(response)
return response[1] # node_list
def getPartitionRowList(self, min_offset=0, max_offset=0, node=None):
"""
Get a list of partition rows, bounded by min & max and involving
given node.
"""
packet = Packets.AskPartitionList(min_offset, max_offset, node)
response = self.__ask(packet)
if response[0] != Packets.AnswerPartitionList:
raise RuntimeError(response)
return response[1:]
def startCluster(self):
"""
Set cluster into "verifying" state.
"""
return self.setClusterState(ClusterStates.VERIFYING)
def killNode(self, node):
return self._setNodeState(node, NodeStates.DOWN)
def dropNode(self, node):
return self._setNodeState(node, NodeStates.UNKNOWN)
def getPrimary(self):
"""
Return the primary master UUID.
"""
packet = Packets.AskPrimary()
response = self.__ask(packet)
if response[0] != Packets.AnswerPrimary:
raise RuntimeError(response)
return response[1]
def repair(self, *args):
response = self.__ask(Packets.Repair(*args))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def truncate(self, tid):
response = self.__ask(Packets.Truncate(tid))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def checkReplicas(self, *args):
response = self.__ask(Packets.CheckReplicas(*args))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def flushLog(self):
conn = self.__getConnection()
conn.send(Packets.FlushLog())
while conn.pending():
self.em.poll(1)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/ 0000775 0000000 0000000 00000000000 13465024610 0023513 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0025612 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/neoadmin.py 0000775 0000000 0000000 00000002032 13465024610 0025657 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# neoadmin - run an administrator node of NEO
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
def main(args=None):
from neo.admin.app import Application
config = Application.option_parser.parse(args)
# setup custom logging
logging.setup(config.get('logfile'))
# and then, load and run the application
app = Application(config)
app.run()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/neoctl.py 0000775 0000000 0000000 00000002657 13465024610 0025366 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# neoctl - command-line interface to an administrator node of NEO
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
def main(args=None):
from neo.neoctl.neoctl import NeoCTL
config = NeoCTL.option_parser.parse(args)
logfile = config.get('logfile')
if logfile:
# Contrary to daemons, we log everything to disk automatically
# because a user using -l option here:
# - is certainly debugging an issue and wants everything,
# - would not have to time to send SIGRTMIN before neoctl exits.
logging.backlog(None)
logging.setup(logfile)
from neo.neoctl.app import Application
app = Application(config['address'], ssl=config.get('ssl'))
r = app.execute(config['cmd'])
if r is not None:
print r
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/neolog.py 0000775 0000000 0000000 00000034421 13465024610 0025357 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# neolog - read a NEO log
#
# Copyright (C) 2012-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import argparse, bz2, gzip, errno, os, signal, sqlite3, sys, time
from bisect import insort
from itertools import chain
from logging import getLevelName
from zlib import decompress
try:
import zstd
except ImportError:
zstdcat = 'zstdcat'
else:
from cStringIO import StringIO
def zstdcat(path):
with open(path, 'rb') as f:
return StringIO(zstd.decompress(f.read()))
comp_dict = dict(bz2=bz2.BZ2File, gz=gzip.GzipFile, xz='xzcat', zst=zstdcat)
class Log(object):
_log_date = _packet_date = 0
_protocol_date = None
def __init__(self, db_path, decode=0, date_format=None,
filter_from=None, show_cluster=False, no_nid=False,
node_column=True, node_list=None):
self._date_format = '%F %T' if date_format is None else date_format
self._decode = decode
self._filter_from = filter_from
self._no_nid = no_nid
self._node_column = node_column
self._node_list = node_list
self._node_dict = {}
self._show_cluster = show_cluster
name = os.path.basename(db_path)
try:
name, ext = name.rsplit(os.extsep, 1)
ZipFile = comp_dict[ext]
except (KeyError, ValueError):
# BBB: Python 2 does not support URI so we can't open in read-only
# mode. See https://bugs.python.org/issue13773
os.stat(db_path) # do not create empty DB if file is missing
self._db = sqlite3.connect(db_path)
else:
import shutil, subprocess, tempfile
with tempfile.NamedTemporaryFile() as f:
if type(ZipFile) is str:
subprocess.check_call((ZipFile, db_path), stdout=f)
else:
shutil.copyfileobj(ZipFile(db_path), f)
self._db = sqlite3.connect(f.name)
name = name.rsplit(os.extsep, 1)[0]
self._default_name = name
def __iter__(self):
q = self._db.execute
try:
q("BEGIN")
yield
date = self._filter_from
if date and max(self._log_date, self._packet_date) < date:
log_args = packet_args = date,
date = " WHERE date>=?"
else:
self._filter_from = None
log_args = self._log_date,
packet_args = self._packet_date,
date = " WHERE date>?"
old = "SELECT date, name, NULL, NULL, %s FROM %s" + date
new = ("SELECT date, name, cluster, nid, %s"
" FROM %s LEFT JOIN node ON node=id" + date)
log = 'level, pathname, lineno, msg'
pkt = 'msg_id, code, peer, body'
try:
nl = q(new % (log, 'log'), log_args)
except sqlite3.OperationalError:
nl = q(old % (log, 'log'), log_args)
np = q(old % (pkt, 'packet'), packet_args)
else:
np = q(new % (pkt, 'packet'), packet_args)
try:
nl = chain(q(old % (log, 'log1'), log_args), nl)
np = chain(q(old % (pkt, 'packet1'), packet_args), np)
except sqlite3.OperationalError:
pass
try:
p = np.next()
self._reload(p[0])
except StopIteration:
p = None
except sqlite3.DatabaseError, e:
yield time.time(), None, 'PACKET', self._exc(e)
p = None
try:
for date, name, cluster, nid, level, pathname, lineno, msg in nl:
while p and p[0] < date:
yield self._packet(*p)
try:
p = next(np, None)
except sqlite3.DatabaseError, e:
yield time.time(), None, 'PACKET', self._exc(e)
p = None
self._log_date = date
yield (date, self._node(name, cluster, nid),
getLevelName(level), msg.splitlines())
except sqlite3.DatabaseError, e:
yield time.time(), None, 'LOG', self._exc(e)
if p:
yield self._packet(*p)
try:
for p in np:
yield self._packet(*p)
except sqlite3.DatabaseError, e:
yield time.time(), None, 'PACKET', self._exc(e)
finally:
self._db.rollback()
@staticmethod
def _exc(e):
return ('%s: %s' % (type(e).__name__, e)).splitlines()
def _node(self, name, cluster, nid):
if nid and not self._no_nid:
name = self.uuid_str(nid)
if self._show_cluster:
name = cluster + '/' + name
return name
def _reload(self, date):
q = self._db.execute
date, text = q("SELECT * FROM protocol WHERE date<=?"
" ORDER BY date DESC", (date,)).next()
if self._protocol_date == date:
return
self._protocol_date = date
g = {}
exec bz2.decompress(text) in g
for x in 'uuid_str', 'Packets', 'PacketMalformedError':
setattr(self, x, g[x])
x = {}
try:
Unpacker = g['Unpacker']
except KeyError:
unpackb = None
else:
from msgpack import ExtraData, UnpackException
def unpackb(data):
u = Unpacker()
u.feed(data)
data = u.unpack()
if u.read_bytes(1):
raise ExtraData
return data
self.PacketMalformedError = UnpackException
self.unpackb = unpackb
if self._decode > 1:
try:
PStruct = g['PStruct']
except KeyError:
for p in self.Packets.itervalues():
data_path = getattr(p, 'data_path', (None,))
if p._code >> 15 == data_path[0]:
x[p._code] = data_path[1:]
else:
PBoolean = g['PBoolean']
def hasData(item):
items = item._items
for i, item in enumerate(items):
if isinstance(item, PStruct):
j = hasData(item)
if j:
return (i,) + j
elif (isinstance(item, PBoolean)
and item._name == 'compression'
and i + 2 < len(items)
and items[i+2]._name == 'data'):
return i,
for p in self.Packets.itervalues():
if p._fmt is not None:
path = hasData(p._fmt)
if path:
assert not hasattr(p, '_neolog'), p
x[p._code] = path
self._getDataPath = x.get
try:
self._next_protocol, = q("SELECT date FROM protocol WHERE date>?",
(date,)).next()
except StopIteration:
self._next_protocol = float('inf')
def _emit(self, date, name, levelname, msg_list):
if not name:
name = self._default_name
if self._node_list and name not in self._node_list:
return
prefix = self._date_format
if prefix:
d = int(date)
prefix = '%s.%04u ' % (time.strftime(prefix, time.localtime(d)),
int((date - d) * 10000))
prefix += ('%-9s %-10s ' % (levelname, name) if self._node_column else
'%-9s ' % levelname)
for msg in msg_list:
print prefix + msg
def _packet(self, date, name, cluster, nid, msg_id, code, peer, body):
self._packet_date = date
if self._next_protocol <= date:
self._reload(date)
try:
p = self.Packets[code]
msg = p.__name__
except KeyError:
msg = 'UnknownPacket[%u]' % code
body = None
msg = ['#0x%04x %-30s %s' % (msg_id, msg, peer)]
if body is not None:
log = getattr(p, '_neolog', None)
if log or self._decode:
try:
if self.unpackb:
args = self.unpackb(body)
else:
p = p()
p._body = body
args = p.decode()
except self.PacketMalformedError:
msg.append("Can't decode packet")
else:
if log:
args, extra = log(*args)
msg += extra
else:
path = self._getDataPath(code)
if path:
args = self._decompress(args, path)
if args and self._decode:
msg[0] += ' \t| ' + repr(args)
return date, self._node(name, cluster, nid), 'PACKET', msg
def _decompress(self, args, path):
if args:
args = list(args)
i = path[0]
path = path[1:]
if path:
args[i] = self._decompress(args[i], path)
else:
data = args[i+2]
if args[i]:
data = decompress(data)
args[i:i+3] = (len(data), data),
return tuple(args)
def emit_many(log_list):
log_list = [(log, iter(log).next) for log in log_list]
for x in log_list: # try to start all transactions at the same time
x[1]()
event_list = []
for log, next in log_list:
try:
event = next()
except StopIteration:
continue
event_list.append((-event[0], next, log._emit, event))
if event_list:
event_list.sort()
while True:
key, next, emit, event = event_list.pop()
try:
next_date = - event_list[-1][0]
except IndexError:
next_date = float('inf')
try:
while event[0] <= next_date:
emit(*event)
event = next()
except IOError, e:
if e.errno == errno.EPIPE:
sys.exit(1)
raise
except StopIteration:
if not event_list:
break
else:
insort(event_list, (-event[0], next, emit, event))
def main():
parser = argparse.ArgumentParser(description='NEO Log Reader')
_ = parser.add_argument
_('-a', '--all', action="store_true",
help='decode body of packets')
_('-A', '--decompress', action="store_true",
help='decompress data when decode body of packets (implies --all)')
_('-d', '--date', metavar='FORMAT',
help='custom date format, according to strftime(3)')
_('-f', '--follow', action="store_true",
help='output appended data as the file grows')
_('-F', '--flush', action="append", type=int, metavar='PID',
help='with -f, tell process PID to flush logs approximately N'
' seconds (see -s)')
_('-n', '--node', action="append",
help='only show log entries from the given node'
' (only useful for logs produced by threaded tests),'
" special value '-' hides the column")
_('-s', '--sleep-interval', type=float, default=1., metavar='N',
help='with -f, sleep for approximately N seconds (default %(default)s)'
' between iterations')
_('--from', dest='filter_from', metavar='N',
help='show records more recent that timestamp N if N > 0, or now+N'
' if N < 0; N can also be a string that is parseable by dateutil')
_('file', nargs='+',
help='log file, compressed (gz, bz2 or xz) or not')
_ = parser.add_mutually_exclusive_group().add_argument
_('-C', '--cluster', action="store_true",
help='show cluster name in node column')
_('-N', '--no-nid', action="store_true",
help='always show node name (instead of NID) in node column')
args = parser.parse_args()
if args.sleep_interval <= 0:
parser.error("sleep_interval must be positive")
filter_from = args.filter_from
if filter_from:
try:
filter_from = float(args.filter_from)
except ValueError:
from dateutil.parser import parse
x = parse(filter_from)
if x.tzinfo:
filter_from = (x - x.fromtimestamp(0, x.tzinfo)).total_seconds()
else:
filter_from = time.mktime(x.timetuple()) + x.microsecond * 1e-6
else:
if filter_from < 0:
filter_from += time.time()
node_list = args.node or []
try:
node_list.remove('-')
node_column = False
except ValueError:
node_column = True
log_list = [Log(db_path,
2 if args.decompress else 1 if args.all else 0,
args.date, filter_from, args.cluster, args.no_nid,
node_column, node_list)
for db_path in args.file]
if args.follow:
try:
pid_list = args.flush or ()
while True:
emit_many(log_list)
for pid in pid_list:
os.kill(pid, signal.SIGRTMIN)
time.sleep(args.sleep_interval)
except KeyboardInterrupt:
pass
else:
emit_many(log_list)
if __name__ == "__main__":
main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/neomaster.py 0000775 0000000 0000000 00000002022 13465024610 0026061 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# neomaster - run a master node of NEO
#
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
def main(args=None):
from neo.master.app import Application
config = Application.option_parser.parse(args)
# setup custom logging
logging.setup(config.get('logfile'))
# and then, load and run the application
app = Application(config)
app.run()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/neomigrate.py 0000775 0000000 0000000 00000005737 13465024610 0026236 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# neomigrate - import/export data between NEO and a FileStorage
#
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from __future__ import print_function
import time
import os
from neo.lib.app import buildOptionParser
import_warning = (
"WARNING: This is not the recommended way to import data to NEO:"
" you should use the Importer backend instead.\n"
"NEO also does not implement IStorageRestoreable interface, which"
" means that undo information is not preserved when using this tool:"
" conflict resolution could happen when undoing an old transaction."
)
@buildOptionParser
class NEOMigrate(object):
from neo.lib.config import OptionList
@classmethod
def _buildOptionParser(cls):
parser = cls.option_parser
parser.description = "NEO <-> FileStorage conversion tool"
parser('c', 'cluster', required=True, help='the NEO cluster name')
parser.bool('q', 'quiet', help='print nothing to standard output')
parser.argument('source', help='the source database')
parser.argument('destination', help='the destination database')
def __init__(self, config):
self.name = config.pop('cluster')
self.source = config.pop('source')
self.destination = config.pop('destination')
self.quiet = config.pop('quiet', False)
from ZODB.FileStorage import FileStorage
from neo.client.Storage import Storage as NEOStorage
if os.path.exists(self.source):
if not self.quiet:
print(import_warning)
self.src = FileStorage(file_name=self.source, read_only=True)
self.dst = NEOStorage(master_nodes=self.destination, name=self.name,
**config)
else:
self.src = NEOStorage(master_nodes=self.source, name=self.name,
read_only=True, **config)
self.dst = FileStorage(file_name=self.destination)
def run(self):
if not self.quiet:
print("Migrating from %s to %s" % (self.source, self.destination))
start = time.time()
self.dst.copyTransactionsFrom(self.src)
if not self.quiet:
elapsed = time.time() - start
print("Migration done in %3.5f" % elapsed)
def main(args=None):
config = NEOMigrate.option_parser.parse(args)
NEOMigrate(config).run()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/neostorage.py 0000775 0000000 0000000 00000002071 13465024610 0026236 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# neostorage - run a storage node of NEO
#
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
def main(args=None):
from neo.storage.app import Application
config = Application.option_parser.parse(args)
# setup custom logging
logging.setup(config.get('logfile'))
# and then, load and run the application
app = Application(config)
if not config.get('reset'):
app.run()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/runner.py 0000775 0000000 0000000 00000041675 13465024610 0025416 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import argparse
import traceback
import unittest
import time
import sys
import os
import re
from collections import Counter, defaultdict
from cStringIO import StringIO
from fnmatch import fnmatchcase
from unittest.runner import _WritelnDecorator
if filter(re.compile(r'--coverage$|-\w*c').match, sys.argv[1:]):
# Start coverage as soon as possible.
import coverage
coverage = coverage.Coverage()
coverage.neotestrunner = []
coverage.start()
from neo.lib import logging
from neo.tests import getTempDirectory, NeoTestBase, Patch, \
__dict__ as neo_tests__dict__
from neo.tests.benchmark import BenchmarkRunner
# list of test modules
# each of them have to import its TestCase classes
UNIT_TEST_MODULES = [
# generic parts
'neo.tests.testHandler',
'neo.tests.testNodes',
'neo.tests.testUtil',
'neo.tests.testPT',
# master application
'neo.tests.master.testClientHandler',
'neo.tests.master.testMasterApp',
'neo.tests.master.testMasterPT',
'neo.tests.master.testStorageHandler',
'neo.tests.master.testTransactions',
# storage application
'neo.tests.storage.testClientHandler',
'neo.tests.storage.testMasterHandler',
'neo.tests.storage.testStorage' + os.getenv('NEO_TESTS_ADAPTER', 'SQLite'),
'neo.tests.storage.testTransactions',
# client application
'neo.tests.client.testClientApp',
'neo.tests.client.testMasterHandler',
'neo.tests.client.testZODBURI',
# light functional tests
'neo.tests.threaded.test',
'neo.tests.threaded.testConfig',
'neo.tests.threaded.testImporter',
'neo.tests.threaded.testReplication',
'neo.tests.threaded.testSSL',
]
FUNC_TEST_MODULES = [
'neo.tests.functional.testMaster',
'neo.tests.functional.testClient',
'neo.tests.functional.testCluster',
'neo.tests.functional.testStorage',
]
ZODB_TEST_MODULES = [
('neo.tests.zodb.testBasic', 'check'),
('neo.tests.zodb.testConflict', 'check'),
('neo.tests.zodb.testHistory', 'check'),
('neo.tests.zodb.testIterator', 'check'),
('neo.tests.zodb.testMT', 'check'),
('neo.tests.zodb.testPack', 'check'),
('neo.tests.zodb.testPersistent', 'check'),
('neo.tests.zodb.testReadOnly', 'check'),
('neo.tests.zodb.testRevision', 'check'),
#('neo.tests.zodb.testRecovery', 'check'),
('neo.tests.zodb.testSynchronization', 'check'),
# ('neo.tests.zodb.testVersion', 'check'),
('neo.tests.zodb.testUndo', 'check'),
('neo.tests.zodb.testZODB', 'check'),
]
class StopOnSuccess(Exception):
pass
class NeoTestRunner(unittest.TextTestResult):
""" Custom result class to build report with statistics per module """
_readable_tid = ()
def __init__(self, title, verbosity, stop_on_success, readable_tid):
super(NeoTestRunner, self).__init__(
_WritelnDecorator(sys.stderr), False, verbosity)
self._title = title
self.stop_on_success = stop_on_success
if readable_tid:
from neo.lib import util
from neo.lib.util import dump, p64, u64
from neo.master.transactions import TransactionManager
def _nextTID(orig, tm, ttid=None, divisor=None):
n = self._next_tid
self._next_tid = n + 1
n = str(n).rjust(3, '-')
if ttid:
t = u64('T%s%s-' % (n, ttid[1:4]))
m = (u64(ttid) - t) % divisor
assert m < 211, (p64(t), divisor)
t = p64(t + m)
else:
t = 'T%s----' % n
assert tm._last_tid < t, (tm._last_tid, t)
tm._last_tid = t
return t
self._readable_tid = (
Patch(self, 1, _next_tid=0),
Patch(TransactionManager, _nextTID=_nextTID),
Patch(util, 1, orig_dump=type(dump)(
dump.__code__, dump.__globals__)),
Patch(dump, __code__=(lambda s:
s if type(s) is str and s.startswith('T') else
orig_dump(s)).__code__),
)
self.modulesStats = {}
self.failedImports = {}
self.run_dict = defaultdict(int)
self.time_dict = defaultdict(int)
self.temp_directory = getTempDirectory()
def wasSuccessful(self):
return not (self.failures or self.errors or self.unexpectedSuccesses)
def run(self, name, modules, only):
suite = unittest.TestSuite()
loader = unittest.TestLoader()
if only:
exclude = only[0] == '!'
test_only = only[exclude + 1:]
only = only[exclude]
if test_only:
def getTestCaseNames(testCaseClass):
tests = loader.__class__.getTestCaseNames(
loader, testCaseClass)
x = testCaseClass.__name__ + '.'
return [t for t in tests
if exclude != any(fnmatchcase(x + t, o)
for o in test_only)]
loader.getTestCaseNames = getTestCaseNames
if not only:
only = '*'
else:
print '\n', name
for test_module in modules:
# load prefix if supplied
if isinstance(test_module, tuple):
test_module, loader.testMethodPrefix = test_module
if only and not (exclude and test_only or
exclude != fnmatchcase(test_module, only)):
continue
try:
test_module = __import__(test_module, fromlist=('*',), level=0)
except ImportError, err:
self.failedImports[test_module] = err
print "Import of %s failed : %s" % (test_module, err)
traceback.print_exc()
continue
suite.addTests(loader.loadTestsFromModule(test_module))
try:
suite.run(self)
finally:
# Workaround weird behaviour of Python.
self._previousTestClass = None
def startTest(self, test):
super(NeoTestRunner, self).startTest(test)
for patch in self._readable_tid:
patch.apply()
self.run_dict[test.__class__.__module__] += 1
self.start_time = time.time()
def stopTest(self, test):
self.time_dict[test.__class__.__module__] += \
time.time() - self.start_time
for patch in self._readable_tid:
patch.revert()
super(NeoTestRunner, self).stopTest(test)
if self.stop_on_success is not None:
count = self.getUnexpectedCount()
if (count < self.testsRun - len(self.skipped)
if self.stop_on_success else count):
raise StopOnSuccess
def getUnexpectedCount(self):
return (len(self.errors) + len(self.failures)
+ len(self.unexpectedSuccesses))
def _buildSummary(self, add_status):
unexpected_count = self.getUnexpectedCount()
expected_count = len(self.expectedFailures)
success = self.testsRun - unexpected_count - expected_count
add_status('Directory', self.temp_directory)
if self.testsRun:
add_status('Status', '%.3f%%' % (success * 100.0 / self.testsRun))
for var in os.environ:
if var.startswith('NEO_TEST'):
add_status(var, os.environ[var])
# visual
header = "%25s | run | unexpected | expected | skipped | time \n" % 'Test Module'
separator = "%25s-+-------+------------+----------+---------+----------\n" % ('-' * 25)
format = "%25s | %3s | %3s | %3s | %3s | %6.2fs \n"
group_f = "%25s | | | | | \n"
# header
s = ' ' * 30 + ' NEO TESTS REPORT\n\n' + header + separator
group = None
unexpected = Counter(x[0].__class__.__module__
for x in (self.errors, self.failures)
for x in x)
unexpected.update(x.__class__.__module__
for x in self.unexpectedSuccesses)
expected = Counter(x[0].__class__.__module__
for x in self.expectedFailures)
skipped = Counter(x[0].__class__.__module__
for x in self.skipped)
total_time = 0
# for each test case
for k, v in sorted(self.run_dict.iteritems()):
# display group below its content
_group, name = k.rsplit('.', 1)
if _group != group:
if group:
s += separator + group_f % group + separator
group = _group
t = self.time_dict[k]
total_time += t
s += format % (name.lstrip('test'), v, unexpected.get(k, '.'),
expected.get(k, '.'), skipped.get(k, '.'), t)
# the last group
s += separator + group_f % group + separator
# the final summary
s += format % ("Summary", self.testsRun, unexpected_count or '.',
expected_count or '.', len(self.skipped) or '.',
total_time) + separator + '\n'
return "%s Tests, %s Failed" % (self.testsRun, unexpected_count), s
def buildReport(self, add_status):
subject, summary = self._buildSummary(add_status)
if self.stop_on_success:
return subject, summary
body = StringIO()
body.write(summary)
for test in self.unexpectedSuccesses:
body.write("UNEXPECTED SUCCESS: %s\n" % self.getDescription(test))
self.stream = _WritelnDecorator(body)
self.printErrorList('ERROR', self.errors)
self.printErrorList('FAIL', self.failures)
return subject, body.getvalue()
class TestRunner(BenchmarkRunner):
def add_options(self, parser):
x = parser.add_mutually_exclusive_group().add_argument
x('-c', '--coverage', action='store_true',
help='Enable coverage')
x('-C', '--cov-unit', action='store_true',
help='Same as -c but output 1 file per test,'
' in the temporary test directory')
_ = parser.add_argument
_('-L', '--log', action='store_true',
help='Force all logs to be emitted immediately and keep'
' packet body in logs of successful threaded tests')
_('-l', '--loop', type=int, default=1,
help='Repeat tests several times')
_('-f', '--functional', action='store_true',
help='Functional tests')
x = parser.add_mutually_exclusive_group().add_argument
x('-s', '--stop-on-error', action='store_false',
dest='stop_on_success', default=None,
help='Continue as long as tests pass successfully.'
' It is usually combined with --loop, to check that tests'
' do not fail randomly.')
x('-S', '--stop-on-success', action='store_true', default=None,
help='Opposite of --stop-on-error: stop as soon as a test'
' passes. Details about errors are not printed at exit.')
x = parser.add_mutually_exclusive_group().add_argument
x('-p', '--dump-protocol', const=True,
dest='protocol', action='store_const',
help='Dump schema of protocol instead of checking it.')
x('-P', '--no-check-protocol', const=False,
dest='protocol', action='store_const',
help='Do not check schema of protocol.')
_('-r', '--readable-tid', action='store_true',
help='Change master behaviour to generate readable TIDs for easier'
' debugging (rather than from current time).')
_('-u', '--unit', action='store_true',
help='Unit & threaded tests')
_('-z', '--zodb', action='store_true',
help='ZODB test suite running on a NEO')
_('-v', '--verbose', action='store_true',
help='Verbose output')
_('only', nargs=argparse.REMAINDER, metavar='[[!] module [test...]]',
help="Filter by given module/test. These arguments are shell"
" patterns. This implies -ufz if none of this option is"
" passed.")
parser.epilog = """
Environment Variables:
NEO_TESTS_ADAPTER Default is SQLite for threaded clusters,
MySQL otherwise.
MySQL specific:
NEO_DB_SOCKET default: libmysqlclient.so default
NEO_DB_PREFIX default: %(DB_PREFIX)s
NEO_DB_ADMIN default: %(DB_ADMIN)s
NEO_DB_PASSWD default: %(DB_PASSWD)s
NEO_DB_USER default: %(DB_USER)s
ZODB tests:
NEO_TEST_ZODB_FUNCTIONAL Clusters are threaded by default. If true,
they are built like in functional tests.
NEO_TEST_ZODB_MASTERS default: 1
NEO_TEST_ZODB_PARTITIONS default: 1
NEO_TEST_ZODB_REPLICAS default: 0
NEO_TEST_ZODB_STORAGES default: 1
""" % neo_tests__dict__
def load_options(self, args):
if not (args.unit or args.functional or args.zodb):
if not args.only:
sys.exit('Nothing to run, please give one of -f, -u, -z')
args.unit = args.functional = args.zodb = True
return dict(
log = args.log,
loop = args.loop,
unit = args.unit,
functional = args.functional,
zodb = args.zodb,
verbosity = 2 if args.verbose else 1,
coverage = args.coverage,
cov_unit = args.cov_unit,
only = args.only,
protocol = args.protocol,
stop_on_success = args.stop_on_success,
readable_tid = args.readable_tid,
)
def start(self):
config = self._config
logging.backlog(max_packet=1<<20,
**({'max_size': None} if config.log else {}))
only = config.only
# run requested tests
runner = NeoTestRunner(config.title or 'Neo', config.verbosity,
config.stop_on_success, config.readable_tid)
if config.cov_unit:
from coverage import Coverage
cov_dir = runner.temp_directory + '/coverage'
os.mkdir(cov_dir)
@Patch(NeoTestBase)
def setUp(orig, self):
orig(self)
self.__coverage = Coverage('%s/%s' % (cov_dir, self.id()))
self.__coverage.start()
@Patch(NeoTestBase)
def _tearDown(orig, self, success):
self.__coverage.stop()
self.__coverage.save()
del self.__coverage
orig(self, success)
if config.protocol is False:
from contextlib import nested
protocol_checker = nested()
else:
from neo.tests.protocol_checker import protocolChecker
protocol_checker = protocolChecker(config.protocol)
with protocol_checker:
try:
for _ in xrange(config.loop):
if config.unit:
runner.run('Unit tests', UNIT_TEST_MODULES, only)
if config.functional:
runner.run('Functional tests', FUNC_TEST_MODULES, only)
if config.zodb:
runner.run('ZODB tests', ZODB_TEST_MODULES, only)
except KeyboardInterrupt:
config['mail_to'] = None
traceback.print_exc()
except StopOnSuccess:
pass
if config.coverage:
coverage.stop()
if coverage.neotestrunner:
coverage.combine(coverage.neotestrunner)
coverage.save()
if runner.dots:
print
# build report
if (only or config.stop_on_success) and not config.mail_to:
runner._buildSummary = lambda *args: (
runner.__class__._buildSummary(runner, *args)[0], '')
self.build_report = str
self._successful = runner.wasSuccessful()
return runner.buildReport(self.add_status)
def main(args=None):
from neo.storage.database.manager import DatabaseManager
DatabaseManager.UNSAFE = True
runner = TestRunner()
runner.run()
return sys.exit(not runner.was_successful())
if __name__ == "__main__":
main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/scripts/simple.py 0000664 0000000 0000000 00000004251 13465024610 0025360 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# Copyright (C) 2011-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import argparse, inspect, random
from logging import getLogger, INFO
from neo.lib import logging
from neo.tests import functional
logging.backlog()
del logging.default_root_handler.handle
def main():
args, _, _, defaults = inspect.getargspec(functional.NEOCluster.__init__)
option_list = zip(args[-len(defaults):], defaults)
parser = argparse.ArgumentParser(
description="Quickly setup a simple NEO cluster for testing purpose.")
parser.add_argument('--seed', help="settings like node ports/uuids and"
" cluster name are random: pass any string to initialize the RNG")
defaults = {}
for option, default in sorted(option_list):
kw = {}
if type(default) is bool:
kw['action'] = "store_true"
defaults[option] = False
elif default is not None:
defaults[option] = default
if isinstance(default, int):
kw['type'] = int
parser.add_argument('--' + option, **kw)
parser.set_defaults(**defaults)
parser.add_argument('db', nargs='+')
args = parser.parse_args()
if args.seed:
functional.random = random.Random(args.seed)
getLogger().setLevel(INFO)
cluster = functional.NEOCluster(args.db, **{x: getattr(args, x)
for x, _ in option_list})
try:
cluster.run()
logging.info("Cluster running ...")
cluster.waitAll()
finally:
cluster.stop()
if __name__ == "__main__":
main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/ 0000775 0000000 0000000 00000000000 13465024610 0023470 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0025567 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/app.py 0000664 0000000 0000000 00000027551 13465024610 0024634 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import sys
from collections import deque
from neo.lib import logging
from neo.lib.app import BaseApplication, buildOptionParser
from neo.lib.protocol import CellStates, ClusterStates, NodeTypes, Packets
from neo.lib.connection import ListeningConnection
from neo.lib.exception import StoppedOperation, PrimaryFailure
from neo.lib.pt import PartitionTable
from neo.lib.util import dump
from neo.lib.bootstrap import BootstrapManager
from .checker import Checker
from .database import buildDatabaseManager, DATABASE_MANAGER_DICT
from .handlers import identification, initialization, master
from .replicator import Replicator
from .transactions import TransactionManager
from neo.lib.debug import register as registerLiveDebugger
option_defaults = {
'adapter': 'MySQL',
'wait': 0,
}
assert option_defaults['adapter'] in DATABASE_MANAGER_DICT
@buildOptionParser
class Application(BaseApplication):
"""The storage node application."""
checker = replicator = tm = None
@classmethod
def _buildOptionParser(cls):
parser = cls.option_parser
parser.description = "NEO Storage node"
cls.addCommonServerOptions('storage', '127.0.0.1')
_ = parser.group('storage')
_('a', 'adapter', choices=sorted(DATABASE_MANAGER_DICT),
help="database adapter to use")
_('d', 'database', required=True,
help="database connections string")
_.float('w', 'wait',
help="seconds to wait for backend to be available,"
" before erroring-out (-1 = infinite)")
_.bool('disable-drop-partitions',
help="do not delete data of discarded cells, which is useful for"
" big databases because the current implementation is"
" inefficient (this option should disappear in the future)")
_.bool('new-nid',
help="request a new NID from a cluster that is already"
" operational, update the database with the new NID and exit,"
" which makes easier to quickly set up a replica by copying"
" the database of another node while it was stopped")
_ = parser.group('database creation')
_.int('i', 'nid',
help="specify an NID to use for this process. Previously"
" assigned NID takes precedence (i.e. you should"
" always use reset with this switch)")
_('e', 'engine', help="database engine (MySQL only)")
_.bool('dedup',
help="enable deduplication of data"
" when setting up a new storage node")
# TODO: Forbid using "reset" along with any unneeded argument.
# "reset" is too dangerous to let user a chance of accidentally
# letting it slip through in a long option list.
# It should even be forbidden in configuration files.
_.bool('reset',
help="remove an existing database if any, and exit")
parser.set_defaults(**option_defaults)
def __init__(self, config):
super(Application, self).__init__(
config.get('ssl'), config.get('dynamic_master_list'))
# set the cluster name
self.name = config['cluster']
self.dm = buildDatabaseManager(config['adapter'],
(config['database'], config.get('engine'), config['wait']),
)
self.disable_drop_partitions = config.get('disable_drop_partitions',
False)
# load master nodes
for master_address in config['masters']:
self.nm.createMaster(address=master_address)
# set the bind address
self.server = config['bind']
logging.debug('IP address is %s, port is %d', *self.server)
# The partition table is initialized after getting the number of
# partitions.
self.pt = None
self.listening_conn = None
self.master_conn = None
self.master_node = None
# operation related data
self.operational = False
self.dm.setup(reset=config.get('reset', False),
dedup=config.get('dedup', False))
self.loadConfiguration()
self.devpath = self.dm.getTopologyPath()
if config.get('new_nid'):
self.new_nid = [x[0] for x in self.dm.iterAssignedCells()]
if not self.new_nid:
sys.exit('database is empty')
self.uuid = None
else:
self.new_nid = ()
if 'nid' in config: # for testing purpose only
self.uuid = config['nid']
logging.node(self.name, self.uuid)
registerLiveDebugger(on_log=self.log)
def close(self):
self.listening_conn = None
self.dm.close()
super(Application, self).close()
def _poll(self):
self.em.poll(1)
def log(self):
self.em.log()
self.nm.log()
if self.tm:
self.tm.log()
if self.pt is not None:
self.pt.log()
def loadConfiguration(self):
"""Load persistent configuration data from the database.
If data is not present, generate it."""
dm = self.dm
# check cluster name
name = dm.getName()
if name is None:
dm.setName(self.name)
elif name != self.name:
raise RuntimeError('name %r does not match with the database: %r'
% (self.name, name))
# load configuration
self.uuid = dm.getUUID()
logging.node(self.name, self.uuid)
logging.info('Configuration loaded:')
logging.info('PTID : %s', dump(dm.getPTID()))
logging.info('Name : %s', self.name)
def loadPartitionTable(self):
"""Load a partition table from the database."""
ptid = self.dm.getPTID()
if ptid is None:
self.pt = PartitionTable(0, 0)
return
row_list = []
for offset, uuid, state in self.dm.getPartitionTable():
while len(row_list) <= offset:
row_list.append([])
# register unknown nodes
if self.nm.getByUUID(uuid) is None:
self.nm.createStorage(uuid=uuid)
row_list[offset].append((uuid, CellStates[state]))
self.pt = object.__new__(PartitionTable)
self.pt.load(ptid, self.dm.getNumReplicas(), row_list, self.nm)
def run(self):
try:
self._run()
except Exception:
logging.exception('Pre-mortem data:')
self.log()
logging.flush()
raise
def _run(self):
"""Make sure that the status is sane and start a loop."""
if len(self.name) == 0:
raise RuntimeError, 'cluster name must be non-empty'
# Make a listening port
handler = identification.IdentificationHandler(self)
self.listening_conn = ListeningConnection(self, handler, self.server)
self.server = self.listening_conn.getAddress()
# Connect to a primary master node, verify data, and
# start the operation. This cycle will be executed permanently,
# until the user explicitly requests a shutdown.
self.operational = False
while True:
self.cluster_state = None
if self.master_node is None:
# look for the primary master
self.connectToPrimary()
self.checker = Checker(self)
self.replicator = Replicator(self)
self.tm = TransactionManager(self)
try:
self.initialize()
self.doOperation()
raise RuntimeError, 'should not reach here'
except StoppedOperation, msg:
logging.error('operation stopped: %s', msg)
except PrimaryFailure, msg:
logging.error('primary master is down: %s', msg)
finally:
self.operational = False
# When not ready, we reject any incoming connection so for
# consistency, we also close any connection except that to the
# master. This includes connections to other storage nodes and any
# replication is aborted, whether we are feeding or out-of-date.
for conn in self.em.getConnectionList():
if conn not in (self.listening_conn, self.master_conn):
conn.close()
del self.checker, self.replicator, self.tm
def connectToPrimary(self):
"""Find a primary master node, and connect to it.
If a primary master node is not elected or ready, repeat
the attempt of a connection periodically.
Note that I do not accept any connection from non-master nodes
at this stage."""
# search, find, connect and identify to the primary master
bootstrap = BootstrapManager(self, NodeTypes.STORAGE,
None if self.new_nid else self.server,
self.devpath, self.new_nid)
self.master_node, self.master_conn = bootstrap.getPrimaryConnection()
self.dm.setUUID(self.uuid)
# Reload a partition table from the database,
# in case that we're in RECOVERING phase.
self.loadPartitionTable()
def initialize(self):
logging.debug('initializing...')
_poll = self._poll
self.master_conn.setHandler(initialization.InitializationHandler(self))
while not self.operational:
_poll()
self.master_conn.send(Packets.NotifyReady())
def doOperation(self):
"""Handle everything, including replications and transactions."""
logging.info('doing operation')
poll = self._poll
_poll = self.em._poll
isIdle = self.em.isIdle
self.master_conn.setHandler(master.MasterOperationHandler(self))
self.replicator.populate()
# Forget all unfinished data.
self.dm.dropUnfinishedData()
self.task_queue = task_queue = deque()
try:
self.dm.doOperation(self)
while True:
while task_queue:
try:
while isIdle():
next(task_queue[-1]) or task_queue.rotate()
_poll(0)
break
except StopIteration:
task_queue.pop()
poll()
finally:
del self.task_queue
def changeClusterState(self, state):
self.cluster_state = state
if state == ClusterStates.STOPPING_BACKUP:
self.replicator.stop()
def newTask(self, iterator):
self.task_queue.appendleft(iterator)
def closeClient(self, connection):
if connection is not self.replicator.getCurrentConnection() and \
connection not in self.checker.conn_dict:
connection.closeClient()
def shutdown(self, erase=False):
"""Close all connections and exit"""
for c in self.em.getConnectionList():
try:
c.close()
except PrimaryFailure:
pass
# clear database to avoid polluting the cluster at restart
if erase:
self.dm.erase()
logging.info("Application has been asked to shut down")
sys.exit()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/checker.py 0000664 0000000 0000000 00000021516 13465024610 0025453 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2012-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import deque
from neo.lib import logging
from neo.lib.connection import ClientConnection, ConnectionClosed
from neo.lib.protocol import NodeTypes, Packets, ZERO_OID
from neo.lib.util import add64, dump
from .handlers.storage import StorageOperationHandler
# TODO: Use a dynamic value such that each chunk takes a few seconds to compute,
# because a too small value wastes network bandwidth. However, a too big
# one prevents the storage from replying quickly to other requests, so
# checkRange() must also be changed to process a chunk in several times,
# with a total time that must not cause timeouts.
CHECK_COUNT = 40000
class Checker(object):
def __init__(self, app):
self.app = app
self.queue = deque()
self.conn_dict = {}
def __call__(self, partition, source, min_tid, max_tid):
self.queue.append((partition, source, min_tid, max_tid))
if not self.conn_dict:
self._nextPartition()
def _nextPartition(self):
app = self.app
def connect(node, uuid=app.uuid, name=app.name):
if node.getUUID() == app.uuid:
return
if node.isConnected(connecting=True):
conn = node.getConnection()
conn.asClient()
else:
conn = ClientConnection(app, StorageOperationHandler(app), node)
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
uuid, app.server, name, app.id_timestamp, (), ()))
self.conn_dict[conn] = node.isIdentified()
conn_set = set(self.conn_dict)
conn_set.discard(None)
try:
self.conn_dict.clear()
while True:
try:
partition, (name, source), min_tid, max_tid = \
self.queue.popleft()
except IndexError:
return
cell = app.pt.getCell(partition, app.uuid)
if cell is None or cell.isOutOfDate():
msg = "discarded or out-of-date"
else:
try:
for cell in app.pt.getCellList(partition):
# XXX: Ignore corrupted cells for the moment
# because we're still unable to fix them
# (see also AdministrationHandler of master)
if cell.isReadable(): #if not cell.isOutOfDate():
connect(cell.getNode())
if source:
node = app.nm.getByAddress(source)
if name:
source = app.nm.createStorage(address=source) \
if node is None else node
connect(source, None, name)
elif (node.getUUID() == app.uuid or
node.isConnected(connecting=True) and
node.getConnection() in self.conn_dict):
source = node
else:
msg = "unavailable source"
if self.conn_dict:
break
msg = "no replica"
except ConnectionClosed:
msg = "connection closed"
finally:
conn_set.update(self.conn_dict)
self.conn_dict.clear()
logging.error("Failed to start checking partition %u (%s)",
partition, msg)
conn_set.difference_update(self.conn_dict)
finally:
for conn in conn_set:
app.closeClient(conn)
logging.debug("start checking partition %u from %s to %s",
partition, dump(min_tid), dump(max_tid))
self.min_tid = self.next_tid = min_tid
self.max_tid = max_tid
self.next_oid = None
self.partition = partition
self.source = source
def start():
if app.tm.isLockedTid(max_tid):
app.tm.read_queue.queueEvent(start)
return
args = partition, CHECK_COUNT, min_tid, max_tid
p = Packets.AskCheckTIDRange(*args)
for conn, identified in self.conn_dict.items():
self.conn_dict[conn] = conn.ask(p) if identified else None
self.conn_dict[None] = app.dm.checkTIDRange(*args)
start()
def connected(self, node):
conn = node.getConnection()
if self.conn_dict.get(conn, self) is None:
self.conn_dict[conn] = conn.ask(Packets.AskCheckTIDRange(
self.partition, CHECK_COUNT, self.next_tid, self.max_tid))
def connectionLost(self, conn):
try:
del self.conn_dict[conn]
except KeyError:
return
if self.source is not None and self.source.getConnection() is conn:
del self.source
elif len(self.conn_dict) > 1:
logging.warning("node lost but keep up checking partition %u",
self.partition)
return
logging.warning("check of partition %u aborted", self.partition)
self._nextPartition()
def _nextRange(self):
if self.next_oid:
args = self.partition, CHECK_COUNT, self.next_tid, self.max_tid, \
self.next_oid
p = Packets.AskCheckSerialRange(*args)
check = self.app.dm.checkSerialRange
else:
args = self.partition, CHECK_COUNT, self.next_tid, self.max_tid
p = Packets.AskCheckTIDRange(*args)
check = self.app.dm.checkTIDRange
for conn in self.conn_dict.keys():
self.conn_dict[conn] = check(*args) if conn is None else conn.ask(p)
def checkRange(self, conn, *args):
if self.conn_dict.get(conn, self) != conn.getPeerId():
# Ignore answers to old requests,
# because we did nothing to cancel them.
logging.info("ignored AnswerCheck*Range%r", args)
return
self.conn_dict[conn] = args
answer_set = set(self.conn_dict.itervalues())
if len(answer_set) > 1:
for answer in answer_set:
if type(answer) is not tuple:
return
# TODO: Automatically tell corrupted cells to fix their data
# if we know a good source.
# For the moment, tell master to put them in CORRUPTED state
# and keep up checking if useful.
uuid = self.app.uuid
args = None if self.source is None else self.conn_dict[
None if self.source.getUUID() == uuid
else self.source.getConnection()]
uuid_list = []
for conn, answer in self.conn_dict.items():
if answer != args:
del self.conn_dict[conn]
if conn is None:
uuid_list.append(uuid)
else:
uuid_list.append(conn.getUUID())
self.app.closeClient(conn)
p = Packets.NotifyPartitionCorrupted(self.partition, uuid_list)
self.app.master_conn.send(p)
if len(self.conn_dict) <= 1:
logging.warning("check of partition %u aborted", self.partition)
self.queue.clear()
self._nextPartition()
return
try:
count, _, max_tid = args
except ValueError: # AnswerCheckSerialRange
count, _, self.next_tid, _, max_oid = args
if count < CHECK_COUNT:
logging.debug("partition %u checked from %s to %s",
self.partition, dump(self.min_tid), dump(self.max_tid))
self._nextPartition()
return
self.next_oid = add64(max_oid, 1)
else: # AnswerCheckTIDRange
if count < CHECK_COUNT:
self.next_tid = self.min_tid
self.next_oid = ZERO_OID
else:
self.next_tid = add64(max_tid, 1)
self._nextRange()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/database/ 0000775 0000000 0000000 00000000000 13465024610 0025234 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/database/__init__.py 0000664 0000000 0000000 00000002377 13465024610 0027356 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
LOG_QUERIES = False
DATABASE_MANAGER_DICT = {
'Importer': 'importer.ImporterDatabaseManager',
'MySQL': 'mysqldb.MySQLDatabaseManager',
'SQLite': 'sqlite.SQLiteDatabaseManager',
}
def getAdapterKlass(name):
try:
module, name = DATABASE_MANAGER_DICT[name or 'MySQL'].split('.')
except KeyError:
raise DatabaseFailure('Cannot find a database adapter <%s>' % name)
return getattr(__import__(module, globals(), level=1), name)
def buildDatabaseManager(name, args=(), kw={}):
return getAdapterKlass(name)(*args, **kw)
class DatabaseFailure(Exception):
pass
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/database/importer.py 0000664 0000000 0000000 00000073027 13465024610 0027460 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2014-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os
import cPickle, pickle, sys, time
from bisect import bisect, insort
from collections import deque
from cStringIO import StringIO
from ConfigParser import SafeConfigParser
from ZConfig import loadConfigFile
from ZODB import BaseStorage
from ZODB.config import getStorageSchema, storageFromString
from ZODB.POSException import POSKeyError
try:
from ZODB._compat import dumps, loads, _protocol
except ImportError:
from cPickle import dumps, loads
_protocol = 1
from ZODB.FileStorage import FileStorage
from ..app import option_defaults
from . import buildDatabaseManager, DatabaseFailure
from .manager import DatabaseManager, Fallback
from neo.lib import compress, logging, patch, util
from neo.lib.interfaces import implements
from neo.lib.protocol import BackendNotImplemented, MAX_TID
patch.speedupFileStorageTxnLookup()
FORK = sys.platform != 'win32'
def transactionAsTuple(txn):
ext = txn.extension
return (txn.user, txn.description,
dumps(ext, _protocol) if ext else '',
txn.status == 'p', txn.tid)
class Reference(object):
__slots__ = "value",
def __init__(self, value):
self.value = value
class Repickler(pickle.Unpickler):
def __init__(self, persistent_map):
self._f = StringIO()
# Use python implementation for unpickling because loading can not
# be customized enough with cPickle.
pickle.Unpickler.__init__(self, self._f)
# For pickling, it is possible to use the fastest implementation,
# which also generates fewer useless PUT opcodes.
self._p = cPickle.Pickler(self._f, 1)
self.memo = self._p.memo # just a tiny optimization
def persistent_id(obj):
if isinstance(obj, Reference):
r = obj.value
del obj.value # minimize refcnt like for deque+popleft
return r
self._p.inst_persistent_id = persistent_id
def persistent_load(obj):
new_obj = persistent_map(obj)
if new_obj is not obj:
self._changed = True
return Reference(new_obj)
self.persistent_load = persistent_load
def _save(self, data):
self._p.dump(data.popleft())
# remove STOP (no need to truncate since it will always be overridden)
self._f.seek(-1, 1)
def __call__(self, data):
f = self._f
f.truncate(0)
f.write(data)
f.reset()
self._changed = False
try:
classmeta = self.load()
state = self.load()
finally:
self.memo.clear()
if self._changed:
f.truncate(0)
try:
self._p.dump(classmeta).dump(state)
finally:
self.memo.clear()
return f.getvalue()
return data
dispatch = pickle.Unpickler.dispatch.copy()
class _noload(object):
state = None
def __new__(cls, dump):
def load(*args):
self = object.__new__(cls)
self.dump = dump
# We use deque+popleft everywhere to minimize the number of
# references at the moment cPickle considers memoizing an
# object. This reduces the number of useless PUT opcodes and
# usually produces smaller pickles than ZODB. Without this,
# they would, on the contrary, increase in size.
# We could also use optimize from pickletools module.
self.args = deque(args)
self._list = deque()
self.append = self._list.append
self.extend = self._list.extend
self._dict = deque()
return self
return load
def __setitem__(self, *args):
self._dict.append(args)
def dict(self):
while self._dict:
yield self._dict.popleft()
def list(self, pos):
pt = self.args.popleft()
f = pt._f
f.seek(pos + 3) # NONE + EMPTY_TUPLE + REDUCE
put = f.read() # preserve memo if any
f.truncate(pos)
f.write(self.dump(pt, self.args) + put)
while self._list:
yield self._list.popleft()
def __reduce__(self):
return None, (), self.state, \
self.list(self.args[0]._f.tell()), self.dict()
@_noload
def _obj(self, args):
self._f.write(pickle.MARK)
while args:
self._save(args)
return pickle.OBJ
def _instantiate(self, klass, k):
args = self.stack[k+1:]
self.stack[k:] = self._obj(klass, *args),
del dispatch[pickle.NEWOBJ] # ZODB < 5 has never used protocol 2
@_noload
def find_class(self, args):
module, name = args
return pickle.GLOBAL + module + '\n' + name + '\n'
@_noload
def _reduce(self, args):
self._save(args)
self._save(args)
return pickle.REDUCE
def load_reduce(self):
stack = self.stack
args = stack.pop()
stack[-1] = self._reduce(stack[-1], args)
dispatch[pickle.REDUCE] = load_reduce
def load_build(self):
stack = self.stack
state = stack.pop()
inst = stack[-1]
assert inst.state is None
inst.state = state
dispatch[pickle.BUILD] = load_build
class ZODB(object):
def __init__(self, storage, oid=0, **kw):
self.oid = int(oid)
self.mountpoints = {k: int(v) for k, v in kw.iteritems()}
self.connect(storage)
self.ltid = util.u64(self.lastTransaction())
if not self.ltid:
raise DatabaseFailure("Can not import empty storage: %s" % storage)
self.mapping = {}
def __getstate__(self):
state = self.__dict__.copy()
del state["_connect"], state["data_tid"], state["storage"]
return state
def connect(self, storage):
self.data_tid = {}
config, _ = loadConfigFile(getStorageSchema(), StringIO(storage))
section = config.storage
def _connect():
self.storage = section.open()
self._connect = _connect
config = section.config
if 'read_only' in config.getSectionAttributes():
has_next_oid = config.read_only = 'next_oid' in self.__dict__
if not has_next_oid:
import gc
# This will reopen read-only as soon as we know the last oid.
def new_oid():
del self.new_oid
new_oid = self.storage.new_oid()
self.storage.close()
# A FileStorage index can be huge, and close() does not
# delete it. Stop reference it before loading it again,
# to avoid having it twice in memory.
del self.storage
gc.collect() # to be sure (maybe only required for PyPy,
# if one day we support it)
config.read_only = True
self._connect()
return new_oid
self.new_oid = new_oid
self._connect()
def setup(self, zodb_dict, shift_oid=0):
self.shift_oid = shift_oid
self.next_oid = util.u64(self.new_oid())
shift_oid += self.next_oid
for mp, oid in self.mountpoints.iteritems():
mp = zodb_dict[mp]
new_oid = mp.oid
try:
new_oid += mp.shift_oid
except AttributeError:
new_oid += shift_oid
shift_oid = mp.setup(zodb_dict, shift_oid)
self.mapping[oid] = new_oid
del self.mountpoints
return shift_oid
def repickle(self, data):
if not (self.shift_oid or self.mapping):
self.repickle = lambda x: x
return data
u64 = util.u64
p64 = util.p64
def map_oid(obj):
if isinstance(obj, tuple) and len(obj) == 2:
oid = u64(obj[0])
# If this oid pointed to a mount point, drop 2nd item because
# it's probably different than the real class of the new oid.
elif isinstance(obj, bytes):
oid = u64(obj)
else:
raise NotImplementedError(
"Unsupported external reference: %r" % obj)
try:
return p64(self.mapping[oid])
except KeyError:
if not self.shift_oid:
return obj # common case for root db
oid = p64(oid + self.shift_oid)
return oid if isinstance(obj, bytes) else (oid, obj[1])
self.repickle = Repickler(map_oid)
return self.repickle(data)
def __getattr__(self, attr):
if attr == '__setstate__':
return object.__getattribute__(self, attr)
return getattr(self.storage, attr)
def getDataTid(self, oid, tid):
try:
return self.data_tid[tid].get(oid)
except KeyError:
assert tid not in self.data_tid, (oid, tid)
p_tid = util.p64(tid)
txn = next(self.storage.iterator(p_tid))
if txn.tid != p_tid:
raise
u64 = util.u64
txn = self.data_tid[tid] = {
u64(x.oid): x.data_txn
for x in txn if x.data_txn}
return txn.get(oid)
class ZODBIterator(object):
def __new__(cls, zodb_list, *args):
def _init(zodb):
self = object.__new__(cls)
iterator = zodb.iterator(*args)
def _next():
self.transaction = next(iterator)
self.zodb = zodb
self.next = _next
return self
def init(zodb):
# FileStorage is fork-safe and in case we don't start iteration
# from the beginning, we want the tid index built at most once
# (by speedupFileStorageTxnLookup).
if FORK and not isinstance(zodb.storage, FileStorage):
def init():
zodb._connect()
return _init(zodb)
return init
return _init(zodb)
def result(zodb_list):
for self in zodb_list:
if callable(self):
self = self()
try:
self.next()
yield self
except StopIteration:
pass
return result(map(init, zodb_list))
tid = property(lambda self: self.transaction.tid)
def __lt__(self, other):
return self.tid < other.tid or self.tid == other.tid \
and self.zodb.shift_oid < other.zodb.shift_oid
is_true = ('false', 'true').index
class ImporterDatabaseManager(DatabaseManager):
"""Proxy that transparently imports data from a ZODB storage
"""
_writeback = None
_last_commit = 0
def __init__(self, *args, **kw):
super(ImporterDatabaseManager, self).__init__(*args, **kw)
implements(self, """_getNextTID checkSerialRange checkTIDRange
deleteObject deleteTransaction dropPartitions _getLastTID
getReplicationObjectList _getTIDList nonempty""".split())
_getPartition = property(lambda self: self.db._getPartition)
_getReadablePartition = property(lambda self: self.db._getReadablePartition)
_uncommitted_data = property(lambda self: self.db._uncommitted_data)
def _parse(self, database):
config = SafeConfigParser()
config.read(os.path.expanduser(database))
sections = config.sections()
main = self._conf = option_defaults.copy()
main.update(config.items(sections.pop(0)))
self.zodb = [(x, dict(config.items(x))) for x in sections]
x = main.get('compress', 'true')
try:
self.compress = bool(is_true(x))
except ValueError:
self.compress = compress.parseOption(x)
if is_true(main.get('writeback', 'false')):
if len(self.zodb) > 1:
raise Exception(
"Can not forward new transactions to splitted DB.")
self._writeback = self.zodb[0][1]['storage']
def _connect(self):
conf = self._conf
db = self.db = buildDatabaseManager(conf['adapter'],
(conf['database'], conf.get('engine'), conf['wait']))
for x in """getConfiguration _setConfiguration _getMaxPartition
query erase getPartitionTable iterAssignedCells
updateCellTID getUnfinishedTIDDict dropUnfinishedData
abortTransaction storeTransaction lockTransaction
loadData storeData getOrphanList _pruneData deferCommit
_getDevPath dropPartitionsTemporary
""".split():
setattr(self, x, getattr(db, x))
if self._writeback:
self._writeback = WriteBack(db, self._writeback)
db_commit = db.commit
def commit():
db_commit()
self._last_commit = time.time()
if self._writeback:
self._writeback.committed()
self.commit = db.commit = commit
def _updateReadable(*_):
raise AssertionError
def setUUID(self, nid):
old_nid = self.getUUID()
if old_nid:
assert old_nid == nid, (old_nid, nid)
else:
self.setConfiguration('nid', str(nid))
def changePartitionTable(self, *args, **kw):
self.db.changePartitionTable(*args, **kw)
if self._writeback:
self._writeback.changed()
def unlockTransaction(self, *args):
self.db.unlockTransaction(*args)
if self._writeback:
self._writeback.changed()
def close(self):
if self._writeback:
self._writeback.close()
self.db.close()
if isinstance(self.zodb, tuple): # _setup called
for zodb in self.zodb:
zodb.close()
def setup(self, reset=False, dedup=False):
self.db.setup(reset, dedup)
zodb_state = self.getConfiguration("zodb")
if zodb_state:
logging.warning("Ignoring configuration file for oid mapping."
" Reloading it from NEO storage.")
zodb = cPickle.loads(zodb_state)
for k, v in self.zodb:
zodb[k].connect(v["storage"])
else:
zodb = {k: ZODB(**v) for k, v in self.zodb}
x, = (x for x in zodb.itervalues() if not x.oid)
x.setup(zodb)
self.setConfiguration("zodb", cPickle.dumps(zodb))
self.zodb_index, self.zodb = zip(*sorted(
(x.shift_oid, x) for x in zodb.itervalues()))
self.zodb_ltid = max(x.ltid for x in self.zodb)
zodb = self.zodb[-1]
self.zodb_loid = zodb.shift_oid + zodb.next_oid - 1
self.zodb_tid = self._getMaxPartition() is not None and \
self.db.getLastTID(self.zodb_ltid) or 0
if callable(self._import): # XXX: why ?
if self.zodb_tid == self.zodb_ltid:
self._finished()
else:
self._import = self._import()
def doOperation(self, app):
if self._import:
app.newTask(self._import)
def _import(self):
p64 = util.p64
u64 = util.u64
tid = p64(self.zodb_tid + 1) if self.zodb_tid else None
zodb_list = ZODBIterator(self.zodb, tid, p64(self.zodb_ltid))
if FORK:
from multiprocessing import Process
from ..shared_queue import Queue
queue = Queue(1<<24)
process = self._import_process = Process(
target=lambda zodb_list: queue(self._iter_zodb(zodb_list)),
args=(zodb_list,))
process.daemon = True
process.start()
else:
queue = self._iter_zodb(zodb_list)
process = None
del zodb_list
object_list = []
data_id_list = []
for txn in queue:
if txn is None:
break
if len(txn) == 3:
oid, data_id, data_tid = txn
if data_id is not None:
checksum, data, compression = data_id
data_id = self.holdData(checksum, oid, data, compression)
data_id_list.append(data_id)
object_list.append((oid, data_id, data_tid))
# Give the main loop the opportunity to process requests
# from other nodes. In particular, clients may commit. If the
# storage node exits after such commit, and before we actually
# update 'obj' with 'object_list', some rows in 'data' may be
# unreferenced. This is not a problem because the leak is
# solved when resuming the migration.
# XXX: The leak was solved by the deduplication,
# but it was disabled by default.
else:
tid = txn[-1]
self.storeTransaction(tid, object_list,
((x[0] for x in object_list),) + txn,
False)
self.releaseData(data_id_list)
logging.debug("TXN %s imported (user=%r, desc=%r, len(oid)=%s)",
util.dump(tid), txn[0], txn[1], len(object_list))
del object_list[:], data_id_list[:]
if self._last_commit + 1 < time.time():
self.commit()
self.zodb_tid = u64(tid)
yield
if process:
process.join()
self.commit()
self._finished()
def _finished(self):
logging.warning("All data are imported. You should change"
" your configuration to use the native backend and restart.")
self._import = None
for x in """getObject getReplicationTIDList getReplicationObjectList
_fetchObject
""".split():
setattr(self, x, getattr(self.db, x))
for zodb in self.zodb:
zodb.close()
self.zodb = None
def _iter_zodb(self, zodb_list):
util.setproctitle('neostorage: import')
p64 = util.p64
u64 = util.u64
zodb_list = list(zodb_list)
if zodb_list:
tid = None
_compress = compress.getCompress(self.compress)
while 1:
zodb_list.sort()
z = zodb_list[0]
# Merge transactions with same tid. Only
# user/desc/ext from first ZODB are kept.
if tid != z.tid:
if tid:
yield txn
txn = transactionAsTuple(z.transaction)
tid = txn[-1]
zodb = z.zodb
for r in z.transaction:
oid = p64(u64(r.oid) + zodb.shift_oid)
data_tid = r.data_txn
if data_tid or r.data is None:
data = None
else:
_, compression, data = _compress(zodb.repickle(r.data))
data = util.makeChecksum(data), data, compression
yield oid, data, data_tid
try:
z.next()
except StopIteration:
del zodb_list[0]
if not zodb_list:
break
yield txn
yield
def inZodb(self, oid, tid=None, before_tid=None):
return oid <= self.zodb_loid and (
self.zodb_tid < before_tid if before_tid else
tid is None or self.zodb_tid < tid <= self.zodb_ltid)
def zodbFromOid(self, oid):
zodb = self.zodb[bisect(self.zodb_index, oid) - 1]
return zodb, oid - zodb.shift_oid
def getLastIDs(self):
tid, oid = self.db.getLastIDs()
return (max(tid, util.p64(self.zodb_ltid)),
max(oid, util.p64(self.zodb_loid)))
def getObject(self, oid, tid=None, before_tid=None):
u64 = util.u64
u_oid = u64(oid)
u_tid = tid and u64(tid)
u_before_tid = before_tid and u64(before_tid)
db = self.db
if self.zodb_tid < (u_before_tid - 1 if before_tid else
u_tid or 0) <= self.zodb_ltid:
o = None
else:
o = db.getObject(oid, tid, before_tid)
if o and self.zodb_ltid < u64(o[0]) or \
not self.inZodb(u_oid, u_tid, u_before_tid):
return o
p64 = util.p64
zodb, z_oid = self.zodbFromOid(u_oid)
try:
value, serial, next_serial = zodb.loadBefore(p64(z_oid),
before_tid or (util.p64(u_tid + 1) if tid else MAX_TID))
except TypeError: # loadBefore returned None
return False
except POSKeyError:
assert not o, o
return o
if serial != tid:
if tid:
return False
u_tid = u64(serial)
if u_tid <= self.zodb_tid and o:
return o
if value:
value = zodb.repickle(value)
checksum = util.makeChecksum(value)
else:
# CAVEAT: Although we think loadBefore should not return an empty
# value for a deleted object (BBB: fixed in ZODB4),
# there's no need to distinguish this case in the above
# except clause because it would be crazy to import a
# NEO DB using this backend.
checksum = None
if not next_serial:
next_serial = db._getNextTID(db._getPartition(u_oid), u_oid, u_tid)
if next_serial:
next_serial = p64(next_serial)
return (serial, next_serial,
0, checksum, value, zodb.getDataTid(z_oid, u_tid))
def getTransaction(self, tid, all=False):
u64 = util.u64
if self.zodb_tid < u64(tid) <= self.zodb_ltid:
for zodb in self.zodb:
for txn in zodb.iterator(tid, tid):
p64 = util.p64
shift_oid = zodb.shift_oid
return ([p64(u64(x.oid) + shift_oid) for x in txn],
) + transactionAsTuple(txn)
else:
return self.db.getTransaction(tid, all)
def getFinalTID(self, ttid):
if util.u64(ttid) <= self.zodb_ltid and self._import:
raise NotImplementedError
return self.db.getFinalTID(ttid)
def _deleteRange(self, partition, min_tid=None, max_tid=None):
# Even if everything is imported, we can't truncate below
# because it would import again if we restart with this backend.
# This is also incompatible with writeback, because ZODB has
# no API to truncate.
if min_tid < self.zodb_ltid or self._writeback:
# XXX: That's late to fail. The master should ask storage nodes
# whether truncation is possible before going further.
raise NotImplementedError
self.db._deleteRange(partition, min_tid, max_tid)
def getReplicationTIDList(self, min_tid, max_tid, length, partition):
# This method is not tested and it is anyway
# useless without getReplicationObjectList.
raise BackendNotImplemented(self.getReplicationTIDList)
p64 = util.p64
tid = p64(self.zodb_tid)
if min_tid <= tid:
r = self.db.getReplicationTIDList(min_tid, min(max_tid, tid),
length, partition)
if max_tid <= tid:
return r
length -= len(r)
min_tid = p64(self.zodb_tid + 1)
else:
r = []
if length:
tid = p64(self.zodb_ltid)
if min_tid <= tid:
u64 = util.u64
def next_tid(i):
for txn in i:
tid = u64(txn.tid)
if self._getPartition(tid) == partition:
insort(z, (-tid, i))
break
z = []
for zodb in self.zodb:
next_tid(zodb.iterator(min_tid, min(max_tid, tid)))
while z:
t, i = z.pop()
r.append(p64(-t))
length -= 1
if not length:
return r
next_tid(i)
if tid < max_tid:
r += self.db.getReplicationTIDList(max(min_tid, tid), max_tid,
length, partition)
return r
def _fetchObject(*_):
raise AssertionError
getLastObjectTID = Fallback.getLastObjectTID.__func__
_getDataTID = Fallback._getDataTID.__func__
def getObjectHistory(self, *args, **kw):
raise BackendNotImplemented(self.getObjectHistory)
def pack(self, *args, **kw):
raise BackendNotImplemented(self.pack)
class WriteBack(object):
_changed = False
_process = None
chunk_size = 100
def __init__(self, db, storage):
self._db = db
self._storage = storage
def close(self):
if self._process:
self._stop.set()
self._event.set()
self._process.join()
def changed(self):
self._changed = True
def committed(self):
if self._changed:
self._changed = False
if self._process:
self._event.set()
else:
if FORK:
from multiprocessing import Process, Event
else:
from threading import Thread as Process, Event
self._event = Event()
self._idle = Event()
self._stop = Event()
self._np = 1 + self._db._getMaxPartition()
self._db = cPickle.dumps(self._db, 2)
self._process = Process(target=self._run)
self._process.daemon = True
self._process.start()
@property
def wait(self):
# For unit tests.
return self._idle.wait
def _run(self):
util.setproctitle('neostorage: write back')
self._db = cPickle.loads(self._db)
try:
@self._db.autoReconnect
def _():
# Unfortunately, copyTransactionsFrom does not abort in case
# of failure, so we have to reopen.
zodb = storageFromString(self._storage)
try:
self.min_tid = util.add64(zodb.lastTransaction(), 1)
zodb.copyTransactionsFrom(self)
finally:
zodb.close()
finally:
self._idle.set()
self._db.close()
def iterator(self):
db = self._db
np = self._np
offset_list = xrange(np)
while 1:
with db:
# Check the partition table at the beginning of every
# transaction. Once the import is finished and at least one
# cell is replicated, it is possible that some of this node
# get outdated. In this case, wait for the next PT change.
if np == len(db._readable_set):
while 1:
tid_list = []
max_tid = MAX_TID
for offset in offset_list:
x = db.getReplicationTIDList(
self.min_tid, max_tid, self.chunk_size, offset)
tid_list += x
if len(x) == self.chunk_size:
max_tid = x[-1]
if not tid_list:
break
tid_list.sort()
for tid in tid_list:
if self._stop.is_set():
return
yield TransactionRecord(db, tid)
if tid == max_tid:
break
else:
self.min_tid = util.add64(tid, 1)
break
self.min_tid = util.add64(tid, 1)
if not self._event.is_set():
self._idle.set()
self._event.wait()
self._idle.clear()
self._event.clear()
if self._stop.is_set():
break
class TransactionRecord(BaseStorage.TransactionRecord):
def __init__(self, db, tid):
self._oid_list, user, desc, ext, _, _ = db.getTransaction(tid)
super(TransactionRecord, self).__init__(tid, ' ', user, desc,
loads(ext) if ext else {})
self._db = db
def __iter__(self):
tid = self.tid
for oid in self._oid_list:
r = self._db.fetchObject(oid, tid)
if r is None: # checkCurrentSerialInTransaction
continue
_, compression, _, data, data_tid = r
if data is not None:
data = compress.decompress_list[compression](data)
yield BaseStorage.DataRecord(oid, tid, data, data_tid)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/database/manager.py 0000664 0000000 0000000 00000105440 13465024610 0027224 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import os, errno, socket, struct, sys, threading
from collections import defaultdict
from contextlib import contextmanager
from copy import copy
from functools import wraps
from neo.lib import logging, util
from neo.lib.interfaces import abstract, requires
from neo.lib.protocol import CellStates, NonReadableCell, MAX_TID, ZERO_TID
from . import DatabaseFailure
READABLE = CellStates.UP_TO_DATE, CellStates.FEEDING
def fallback(func):
setattr(Fallback, func.__name__, func)
return abstract(func)
def splitOIDField(tid, oids):
if len(oids) % 8:
raise DatabaseFailure('invalid oids length for tid %s: %s'
% (tid, len(oids)))
return [oids[i:i+8] for i in xrange(0, len(oids), 8)]
class CreationUndone(Exception):
pass
class Fallback(object):
pass
class DatabaseManager(object):
"""This class only describes an interface for database managers."""
ENGINES = ()
UNSAFE = False
__lock = None
LOCK = "neostorage"
LOCKED = "error: database is locked"
_deferred = 0
_repairing = None
def __init__(self, database, engine=None, wait=None):
"""
Initialize the object.
"""
if engine:
if engine not in self.ENGINES:
raise ValueError("Unsupported engine: %r not in %r"
% (engine, self.ENGINES))
self._engine = engine
# XXX: Maybe the default should be to retry indefinitely.
# But for unit tests, we really want to never retry.
self._wait = wait or 0
self._parse(database)
self._init_attrs = tuple(self.__dict__)
self._connect()
def __getstate__(self):
state = {x: getattr(self, x) for x in self._init_attrs}
assert state # otherwise, __setstate__ is not called
return state
def __setstate__(self, state):
self.__dict__.update(state)
# For the moment, no need to duplicate secondary connections.
#self._init_attrs = tuple(self.__dict__)
# Secondary connections don't lock.
self.LOCK = None
self._connect()
@contextmanager
def _duplicate(self):
db = copy(self)
try:
yield db
finally:
db.close()
_cached_attr_list = (
'_readable_set', '_getPartition', '_getReadablePartition')
def __getattr__(self, attr):
if attr in self._cached_attr_list:
self._updateReadable()
return self.__getattribute__(attr)
def __enter__(self):
assert not self.LOCK, "not a secondary connection"
# XXX: All config caching should be done in this class,
# rather than in backend classes.
self._config.clear()
try:
for attr in self._cached_attr_list:
delattr(self, attr)
except AttributeError:
pass
def __exit__(self, t, v, tb):
if v is None:
# Deferring commits make no sense for secondary connections.
assert not self._deferred
self._commit()
@abstract
def _parse(self, database):
"""Called during instantiation, to process database parameter."""
@abstract
def _connect(self):
"""Connect to the database"""
def autoReconnect(self, f):
"""
Placeholder for backends that may lose connection to the underlying
database: although a primary connection is reestablished transparently
when possible, secondary connections use transactions and they must
restart from the beginning.
For other backends, there's no expected transient failure so the
default implementation is to execute the given task exactly once.
"""
f()
def lock(self, db_path):
if self.LOCK:
assert self.__lock is None, self.__lock
# For platforms that don't support anonymous sockets,
# we can either use zc.lockfile or an empty SQLite db
# (with BEGIN EXCLUSIVE).
try:
stat = os.stat(db_path)
except OSError as e:
if e.errno != errno.ENOENT:
raise
return # in-memory or temporary database
s = self.__lock = socket.socket(socket.AF_UNIX)
try:
s.bind('\0%s:%s:%s' % (self.LOCK, stat.st_dev, stat.st_ino))
except socket.error as e:
if e.errno != errno.EADDRINUSE:
raise
sys.exit(self.LOCKED)
def _getDevPath(self):
"""
"""
@requires(_getDevPath)
def getTopologyPath(self):
# On Windows, st_dev only exists since Python 3.4
return socket.gethostname(), str(os.stat(self._getDevPath()).st_dev)
@abstract
def erase(self):
""""""
def restore(self, dump): # for tests
self.erase()
self._restore(dump)
def _setup(self, dedup=False):
"""To be overridden by the backend to set up a database
It must recover self._uncommitted_data from temporary object table.
_uncommitted_data is already instantiated and must be updated with
refcounts to data of write-locked objects, except in case of undo,
where the refcount is increased later, when the object is read-locked.
Keys are data ids and values are number of references.
"""
@requires(_setup)
def setup(self, reset=False, dedup=False):
"""Set up a database, discarding existing data first if reset is True
"""
if reset:
self.erase()
self._uncommitted_data = defaultdict(int)
self._setup(dedup)
@abstract
def nonempty(self, table):
"""Check whether table is empty or return None if it does not exist"""
def _checkNoUnfinishedTransactions(self, *hint):
if self.nonempty('ttrans') or self.nonempty('tobj'):
raise DatabaseFailure(
"The database can not be upgraded because you have unfinished"
" transactions. Use an older version of NEO to verify them.")
def migrate(self, *args, **kw):
version = int(self.getConfiguration("version") or 0)
if self.VERSION < version:
raise DatabaseFailure("The database can not be downgraded.")
while version < self.VERSION:
version += 1
getattr(self, '_migrate%s' % version)(*args, **kw)
self.setConfiguration("version", version)
def doOperation(self, app):
pass
def _close(self):
"""Backend-specific code to close the database"""
@requires(_close)
def close(self):
self._deferredCommit()
self._close()
if self.__lock:
self.__lock.close()
del self.__lock
def _commit(self):
"""Backend-specific code to commit the pending changes"""
@requires(_commit)
def commit(self):
logging.debug('committing...')
self._commit()
# Instead of cancelling a timeout that would be set to defer a commit,
# we simply use to a boolean so that _deferredCommit() does nothing.
# IOW, epoll may wait wake up for nothing but that should be rare,
# because most immediate commits are usually quickly followed by
# deferred commits.
self._deferred = 0
def deferCommit(self):
self._deferred = 1
return self._deferredCommit
def _deferredCommit(self):
if self._deferred:
self.commit()
@abstract
def getConfiguration(self, key):
"""
Return a configuration value, returns None if not found or not set
"""
def setConfiguration(self, key, value):
"""
Set a configuration value
"""
self._setConfiguration(key, value)
self.commit()
@abstract
def _setConfiguration(self, key, value):
""""""
def _changePartitionTable(self, cell_list, reset=False):
"""Change a part of a partition table. The list of cells is
a tuple of tuples, each of which consists of an offset (row ID),
the NID of a storage node, and a cell state. If reset is True,
existing data is first thrown away.
"""
def _getPartitionTable(self):
"""Return a whole partition table as a sequence of rows. Each row
is again a tuple of an offset (row ID), the NID of a storage
node, and a cell state."""
def getUUID(self):
"""
Load a NID from a database.
"""
nid = self.getConfiguration('nid')
if nid is not None:
return int(nid)
@requires(_changePartitionTable, _getPartitionTable)
def setUUID(self, nid):
"""
Store a NID into a database.
"""
old_nid = self.getUUID()
if nid != old_nid:
if old_nid:
self._changePartitionTable((offset, x, tid)
for offset, x, tid in self._getPartitionTable()
if x == old_nid
for x, tid in ((x, None), (nid, tid)))
self.setConfiguration('nid', str(nid))
def getNumReplicas(self):
"""
Load the number of replicas from a database.
"""
n = self.getConfiguration('replicas')
if n is not None:
return int(n)
def getName(self):
"""
Load a name from a database.
"""
return self.getConfiguration('name')
def setName(self, name):
"""
Store a name into a database.
"""
self.setConfiguration('name', name)
def getPTID(self):
"""
Load a Partition Table ID from a database.
"""
ptid = self.getConfiguration('ptid')
if ptid is not None:
return int(ptid)
def getBackupTID(self):
return util.bin(self.getConfiguration('backup_tid'))
def _setBackupTID(self, tid):
tid = util.dump(tid)
logging.debug('backup_tid = %s', tid)
return self._setConfiguration('backup_tid', tid)
def getTruncateTID(self):
return util.bin(self.getConfiguration('truncate_tid'))
def _setTruncateTID(self, tid):
tid = util.dump(tid)
logging.debug('truncate_tid = %s', tid)
return self._setConfiguration('truncate_tid', tid)
def _setPackTID(self, tid):
self._setConfiguration('_pack_tid', tid)
def _getPackTID(self):
try:
return int(self.getConfiguration('_pack_tid'))
except TypeError:
return -1
# XXX: Consider splitting getLastIDs/_getLastIDs because
# sometimes the last oid is not wanted.
def _getLastTID(self, partition, max_tid=None):
"""Return tid of last transaction <= 'max_tid' in given 'partition'
tids are in unpacked format.
"""
@requires(_getLastTID)
def getLastTID(self, max_tid=None):
"""Return tid of last transaction <= 'max_tid'
tids are in unpacked format.
"""
x = self._readable_set
if x:
return max(self._getLastTID(x, max_tid) for x in x)
def _getLastIDs(self, partition):
"""Return max(tid) & max(oid) for objects of given partition
Results are in unpacked format
"""
@requires(_getLastIDs)
def getLastIDs(self):
"""Return max(tid) & max(oid) for readable data
It is important to ignore unassigned partitions because there may
remain data from cells that have been discarded, either due to
--disable-drop-partitions option, or in the future when dropping
partitions is done in background (as it is an expensive operation).
"""
x = self._readable_set
if x:
tid, oid = zip(*map(self._getLastIDs, x))
tid = max(self.getLastTID(), max(tid))
oid = max(oid)
return (None if tid is None else util.p64(tid),
None if oid is None else util.p64(oid))
return None, None
def _getUnfinishedTIDDict(self):
""""""
@requires(_getUnfinishedTIDDict)
def getUnfinishedTIDDict(self):
trans, obj = self._getUnfinishedTIDDict()
obj = dict.fromkeys(obj)
obj.update(trans)
p64 = util.p64
return {p64(ttid): None if tid is None else p64(tid)
for ttid, tid in obj.iteritems()}
@fallback
def getLastObjectTID(self, oid):
"""Return the latest tid of given oid or None if it does not exist"""
r = self.getObject(oid)
return r and r[0]
@abstract
def _getNextTID(self, partition, oid, tid):
"""
partition (int)
Must be the result of (oid % self.getPartition(oid))
oid (int)
Identifier of object to retrieve.
tid (int)
Exact serial to retrieve.
If tid is the last revision of oid, None is returned.
"""
def _getObject(self, oid, tid=None, before_tid=None):
"""
oid (int)
Identifier of object to retrieve.
tid (int, None)
Exact serial to retrieve.
before_tid (int, None)
Serial to retrieve is the highest existing one strictly below this
value.
Return value:
None: oid doesn't exist at requested tid/before_tid (getObject
takes care of checking if the oid exists at other serial)
6-tuple: Record content.
- record serial (int)
- serial or next record modifying object (int, None)
- compression (boolean-ish, None)
- checksum (binary string, None)
- data (binary string, None)
- data_serial (int, None)
"""
@requires(_getObject)
def getObject(self, oid, tid=None, before_tid=None):
"""
oid (packed)
Identifier of object to retrieve.
tid (packed, None)
Exact serial to retrieve.
before_tid (packed, None)
Serial to retrieve is the highest existing one strictly below this
value.
Return value:
None: Given oid doesn't exist in database.
False: No record found, but another one exists for given oid.
6-tuple: Record content.
- record serial (packed)
- serial or next record modifying object (packed, None)
- compression (boolean-ish, None)
- checksum (binary string, None)
- data (binary string, None)
- data_serial (packed, None)
"""
u64 = util.u64
r = self._getObject(u64(oid), tid and u64(tid),
before_tid and u64(before_tid))
try:
serial, next_serial, compression, checksum, data, data_serial = r
except TypeError:
# See if object exists at all
return (tid or before_tid) and self.getLastObjectTID(oid) and False
return (util.p64(serial),
None if next_serial is None else util.p64(next_serial),
compression, checksum, data,
None if data_serial is None else util.p64(data_serial))
@fallback
@requires(_getObject)
def _fetchObject(self, oid, tid):
"""Specialized version of _getObject, for replication"""
r = self._getObject(oid, tid)
if r:
return r[:1] + r[2:] # remove next_serial
def fetchObject(self, oid, tid):
"""
Specialized version of getObject, for replication:
- the oid can only be at an exact serial (parameter 'tid')
- next_serial is not part of the result
- if there's no result for the requested serial,
no need check if oid exists at other serial
"""
u64 = util.u64
r = self._fetchObject(u64(oid), u64(tid))
if r:
serial, compression, checksum, data, data_serial = r
return (util.p64(serial), compression, checksum, data,
None if data_serial is None else util.p64(data_serial))
@requires(_getPartitionTable)
def iterAssignedCells(self):
my_nid = self.getUUID()
return ((offset, tid) for offset, nid, tid in self._getPartitionTable()
if my_nid == nid)
@requires(_getPartitionTable)
def getPartitionTable(self):
return [(offset, nid, max(0, -state))
for offset, nid, state in self._getPartitionTable()]
@contextmanager
def replicated(self, offset):
readable_set = self._readable_set
assert offset not in readable_set
readable_set.add(offset)
try:
yield
finally:
readable_set.remove(offset)
def _getDataLastId(self, partition):
"""
"""
def _getMaxPartition(self):
"""
"""
@requires(_getDataLastId, _getMaxPartition)
def _updateReadable(self, reset=True):
if reset:
readable_set = self._readable_set = set()
np = 1 + self._getMaxPartition()
def _getPartition(x, np=np):
return x % np
def _getReadablePartition(x, np=np, r=readable_set):
x %= np
if x in r:
return x
raise NonReadableCell
self._getPartition = _getPartition
self._getReadablePartition = _getReadablePartition
d = self._data_last_ids = []
for p in xrange(np):
i = self._getDataLastId(p)
d.append(p << 48 if i is None else i + 1)
else:
readable_set = self._readable_set
readable_set.clear()
readable_set.update(x[0] for x in self.iterAssignedCells()
if -x[1] in READABLE)
@requires(_changePartitionTable, _getLastIDs, _getLastTID)
def changePartitionTable(self, ptid, num_replicas, cell_list, reset=False):
my_nid = self.getUUID()
pt = dict(self.iterAssignedCells())
# In backup mode, the last transactions of a readable cell may be
# incomplete.
backup_tid = self.getBackupTID()
if backup_tid:
backup_tid = util.u64(backup_tid)
def outofdate_tid(offset):
tid = pt.get(offset, 0)
if tid >= 0:
return tid
return -tid in READABLE and (backup_tid or
max(self._getLastIDs(offset)[0],
self._getLastTID(offset))) or 0
cell_list = [(offset, nid, (
None if state == CellStates.DISCARDED else
-state if nid != my_nid or state != CellStates.OUT_OF_DATE else
outofdate_tid(offset)))
for offset, nid, state in cell_list]
self._changePartitionTable(cell_list, reset)
self._updateReadable(reset)
assert isinstance(ptid, (int, long)), ptid
self._setConfiguration('ptid', str(ptid))
self._setConfiguration('replicas', str(num_replicas))
@requires(_changePartitionTable)
def updateCellTID(self, partition, tid):
t, = (t for p, t in self.iterAssignedCells() if p == partition)
if t < 0:
return
tid = util.u64(tid)
# Replicator doesn't optimize when there's no new data
# since the node went down.
if t == tid:
return
# In a backup cluster, when a storage node gets down just after
# being the first to replicate fully new transactions from upstream,
# we may end up in a special situation where an OUT_OF_DATE cell
# is actually more up-to-date than an UP_TO_DATE one.
assert t < tid or self.getBackupTID()
self._changePartitionTable([(partition, self.getUUID(), tid)])
def iterCellNextTIDs(self):
p64 = util.p64
backup_tid = self.getBackupTID()
if backup_tid:
next_tid = util.u64(backup_tid)
if next_tid:
next_tid += 1
for offset, tid in self.iterAssignedCells():
if tid >= 0: # OUT_OF_DATE
yield offset, p64(tid and tid + 1)
elif -tid in READABLE:
if backup_tid:
# An UP_TO_DATE cell does not have holes so it's fine to
# resume from the last found records.
tid = self._getLastTID(offset)
yield offset, (
# For trans, a transaction can't be partially
# replicated, so replication can resume from the next
# possible tid.
p64(max(next_tid, tid + 1) if tid else next_tid),
# For obj, the last transaction may be partially
# replicated so it must be checked again (i.e. no +1).
p64(max(next_tid, self._getLastIDs(offset)[0])))
else:
yield offset, None
@abstract
def dropPartitions(self, offset_list):
"""Delete all data for specified partitions"""
def _getUnfinishedDataIdList(self):
"""Drop any unfinished data from a database."""
@requires(_getUnfinishedDataIdList)
def dropUnfinishedData(self):
"""Drop any unfinished data from a database."""
data_id_list = self._getUnfinishedDataIdList()
self.dropPartitionsTemporary()
self.releaseData(data_id_list, True)
self.commit()
@abstract
def dropPartitionsTemporary(self, offset_list=None):
"""Drop partitions from temporary tables"""
@abstract
def storeTransaction(self, tid, object_list, transaction, temporary = True):
"""Write transaction metadata
The list of objects contains tuples, each of which consists of
an object ID, a data_id and object serial.
The transaction is either None or a tuple of the list of OIDs,
user information, a description, extension information and transaction
pack state (True for packed).
If 'temporary', the transaction is stored into ttrans/tobj tables,
(instead of trans/obj). The caller is in charge of committing, which
is always the case at tpc_vote.
"""
@abstract
def getOrphanList(self):
"""Return the list of data id that is not referenced by the obj table
This is a repair method, and it's usually expensive.
There was a bug that did not free data of transactions that were
aborted before vote. This method is used to reclaim the wasted space.
"""
@abstract
def _pruneData(self, data_id_list):
"""To be overridden by the backend to delete any unreferenced data
'unreferenced' means:
- not in self._uncommitted_data
- and not referenced by a fully-committed object (storage should have
an index or a refcount of all data ids of all objects)
The returned value is the number of deleted rows from the data table.
"""
@abstract
def storeData(self, checksum, oid, data, compression):
"""To be overridden by the backend to store object raw data
If same data was already stored, the storage only has to check there's
no hash collision.
"""
@abstract
def loadData(self, data_id):
"""Inverse of storeData
"""
def holdData(self, checksum_or_id, *args):
"""Store raw data of temporary object
If 'checksum_or_id' is a checksum, it must be the result of
makeChecksum(data) and extra parameters must be (data, compression)
where 'compression' indicates if 'data' is compressed.
A volatile reference is set to this data until 'releaseData' is called
with this checksum.
If called with only an id, it only increment the volatile
reference to the data matching the id.
"""
if args:
checksum_or_id = self.storeData(checksum_or_id, *args)
self._uncommitted_data[checksum_or_id] += 1
return checksum_or_id
def releaseData(self, data_id_list, prune=False):
"""Release 1 volatile reference to given list of data ids
If 'prune' is true, any data that is not referenced anymore (either by
a volatile reference or by a fully-committed object) is deleted.
"""
refcount = self._uncommitted_data
for data_id in data_id_list:
count = refcount[data_id] - 1
if count:
refcount[data_id] = count
else:
del refcount[data_id]
if prune:
return self._pruneData(data_id_list)
@fallback
@requires(_getObject)
def _getDataTID(self, oid, tid=None, before_tid=None):
"""
Return a 2-tuple:
tid (int)
tid corresponding to received parameters
serial
data tid of the found record
(None, None) is returned if requested object and transaction
could not be found.
This method only exists for performance reasons, by not returning data:
_getObject already returns these values but it is slower.
"""
r = self._getObject(oid, tid, before_tid)
return (r[0], r[-1]) if r else (None, None)
def findUndoTID(self, oid, tid, ltid, undone_tid, transaction_object):
"""
oid
Object OID
tid
Transation doing the undo
ltid
Upper (excluded) bound of transactions visible to transaction doing
the undo.
undone_tid
Transaction to undo
transaction_object
Object data from memory, if it was modified by running
transaction.
None if is was not modified by running transaction.
Returns a 3-tuple:
current_tid (p64)
TID of most recent version of the object client's transaction can
see. This is used later to detect current conflicts (eg, another
client modifying the same object in parallel)
data_tid (int)
TID containing (without indirection) the data prior to undone
transaction.
None if object doesn't exist prior to transaction being undone
(its creation is being undone).
is_current (bool)
False if object was modified by later transaction (ie, data_tid is
not current), True otherwise.
"""
u64 = util.u64
p64 = util.p64
oid = u64(oid)
tid = u64(tid)
if ltid:
ltid = u64(ltid)
undone_tid = u64(undone_tid)
def getDataTID(tid=None, before_tid=None):
tid, data_tid = self._getDataTID(oid, tid, before_tid)
current_tid = tid
while data_tid:
if data_tid < tid:
tid, data_tid = self._getDataTID(oid, data_tid)
if tid is not None:
continue
logging.error("Incorrect data serial for oid %s at tid %s",
oid, current_tid)
return current_tid, current_tid
return current_tid, tid
found_undone_tid, undone_data_tid = getDataTID(tid=undone_tid)
if found_undone_tid is None:
return
if transaction_object:
try:
current_tid = current_data_tid = u64(transaction_object[2])
except struct.error:
current_tid = current_data_tid = tid
else:
current_tid, current_data_tid = getDataTID(before_tid=ltid)
if current_tid is None:
return None, None, False
is_current = undone_data_tid in (current_data_tid, tid)
# Load object data as it was before given transaction.
# It can be None, in which case it means we are undoing object
# creation.
_, data_tid = getDataTID(before_tid=undone_tid)
if data_tid is not None:
data_tid = p64(data_tid)
return p64(current_tid), data_tid, is_current
@abstract
def lockTransaction(self, tid, ttid):
"""Mark voted transaction 'ttid' as committed with given 'tid'
All pending changes are committed just before returning to the caller.
"""
@abstract
def unlockTransaction(self, tid, ttid, trans, obj):
"""Finalize a transaction by moving data to a finished area."""
@abstract
def abortTransaction(self, ttid):
""""""
@abstract
def deleteTransaction(self, tid):
""""""
@abstract
def deleteObject(self, oid, serial=None):
"""Delete given object. If serial is given, only delete that serial for
given oid."""
@abstract
def _deleteRange(self, partition, min_tid=None, max_tid=None):
"""Delete all objects and transactions between given min_tid (excluded)
and max_tid (included)"""
def truncate(self):
tid = self.getTruncateTID()
if tid:
tid = util.u64(tid)
assert tid, tid
cell_list = []
my_nid = self.getUUID()
for partition, state in self.iterAssignedCells():
if state > tid:
cell_list.append((partition, my_nid, tid))
self._deleteRange(partition, tid)
if cell_list:
self._changePartitionTable(cell_list)
self._setTruncateTID(None)
self.commit()
def repair(self, weak_app, dry_run):
t = self._repairing
if t and t.is_alive():
logging.error('already repairing')
return
def repair():
l = threading.Lock()
l.acquire()
def finalize():
try:
if data_id_list and not dry_run:
self.commit()
logging.info("repair: deleted %s orphan records",
self._pruneData(data_id_list))
self.commit()
finally:
l.release()
try:
with self._duplicate() as db:
data_id_list = db.getOrphanList()
logging.info("repair: found %s records that may be orphan",
len(data_id_list))
weak_app().em.wakeup(finalize)
l.acquire()
finally:
del self._repairing
logging.info("repair: done")
t = self._repairing = threading.Thread(target=repair)
t.daemon = 1
t.start()
@abstract
def getTransaction(self, tid, all = False):
"""Return a tuple of the list of OIDs, user information,
a description, and extension information, for a given transaction
ID. If there is no such transaction ID in a database, return None.
If all is true, the transaction must be searched from a temporary
area as well."""
@abstract
def getObjectHistory(self, oid, offset, length):
"""Return a list of serials and sizes for a given object ID.
The length specifies the maximum size of such a list. Result starts
with latest serial, and the list must be sorted in descending order.
If there is no such object ID in a database, return None."""
@abstract
def getReplicationObjectList(self, min_tid, max_tid, length, partition,
min_oid):
"""Return a dict of length oids grouped by serial at (or above)
min_tid and min_oid and below max_tid, for given partition,
sorted in ascending order."""
def _getTIDList(self, offset, length, partition_list):
"""Return a list of TIDs in ascending order from an offset,
at most the specified length. The list of partitions are passed
to filter out non-applicable TIDs."""
@requires(_getTIDList)
def getTIDList(self, offset, length, partition_list):
if partition_list:
if self._readable_set.issuperset(partition_list):
return map(util.p64, self._getTIDList(
offset, length, partition_list))
raise NonReadableCell
return ()
@abstract
def getReplicationTIDList(self, min_tid, max_tid, length, partition):
"""Return a list of TIDs in ascending order from an initial tid value,
at most the specified length up to max_tid. The partition number is
passed to filter out non-applicable TIDs."""
@abstract
def pack(self, tid, updateObjectDataForPack):
"""Prune all non-current object revisions at given tid.
updateObjectDataForPack is a function called for each deleted object
and revision with:
- OID
- packed TID
- new value_serial
If object data was moved to an after-pack-tid revision, this
parameter contains the TID of that revision, allowing to backlink
to it.
- getObjectData function
To call if value_serial is None and an object needs to be updated.
Takes no parameter, returns a 3-tuple: compression, data_id,
value
"""
@abstract
def checkTIDRange(self, partition, length, min_tid, max_tid):
"""
Generate a digest from transaction list.
min_tid (packed)
TID at which verification starts.
length (int)
Maximum number of records to include in result.
Returns a 3-tuple:
- number of records actually found
- a SHA1 computed from record's TID
ZERO_HASH if no record found
- biggest TID found (ie, TID of last record read)
ZERO_TID if not record found
"""
@abstract
def checkSerialRange(self, partition, length, min_tid, max_tid, min_oid):
"""
Generate a digest from object list.
min_oid (packed)
OID at which verification starts.
min_tid (packed)
Serial of min_oid object at which search should start.
length
Maximum number of records to include in result.
Returns a 5-tuple:
- number of records actually found
- a SHA1 computed from record's OID
ZERO_HASH if no record found
- biggest OID found (ie, OID of last record read)
ZERO_OID if no record found
- a SHA1 computed from record's serial
ZERO_HASH if no record found
- biggest serial found for biggest OID found (ie, serial of last
record read)
ZERO_TID if no record found
"""
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/database/mysqldb.py 0000664 0000000 0000000 00000115262 13465024610 0027270 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from binascii import a2b_hex
from collections import OrderedDict
from functools import wraps
import MySQLdb
from MySQLdb import DataError, IntegrityError, \
OperationalError, ProgrammingError
from MySQLdb.constants.CR import SERVER_GONE_ERROR, SERVER_LOST
from MySQLdb.constants.ER import DATA_TOO_LONG, DUP_ENTRY, NO_SUCH_TABLE
# BBB: the following 2 constants were added to mysqlclient 1.3.8
DROP_LAST_PARTITION = 1508
SAME_NAME_PARTITION = 1517
from array import array
from hashlib import sha1
import os
import re
import string
import struct
import sys
import time
from . import LOG_QUERIES, DatabaseFailure
from .manager import DatabaseManager, splitOIDField
from neo.lib import logging, util
from neo.lib.interfaces import implements
from neo.lib.protocol import ZERO_OID, ZERO_TID, ZERO_HASH
class MysqlError(DatabaseFailure):
def __init__(self, exc, query=None):
self.exc = exc
self.query = query
code = property(lambda self: self.exc.args[0])
def __str__(self):
msg = 'MySQL error %s: %s' % self.exc.args
return msg if self.query is None else '%s\nQuery: %s' % (
msg, getPrintableQuery(self.query[:1000]))
def getPrintableQuery(query, max=70):
return ''.join(c if c in string.printable and c not in '\t\x0b\x0c\r'
else '\\x%02x' % ord(c) for c in query)
def auto_reconnect(wrapped):
def wrapper(self, *args):
# Try 3 times at most. When it fails too often for the same
# query then the disconnection is likely caused by this query.
# We don't want to enter into an infinite loop.
retry = 2
while 1:
try:
return wrapped(self, *args)
except OperationalError as m:
# IDEA: Is it safe to retry in case of DISK_FULL ?
# XXX: However, this would another case of failure that would
# be unnoticed by other nodes (ADMIN & MASTER). When
# there are replicas, it may be preferred to not retry.
if (self._active
or SERVER_GONE_ERROR != m.args[0] != SERVER_LOST
or not retry):
if self.LOCK:
raise MysqlError(m, *args)
raise # caught upper for secondary connections
logging.info('the MySQL server is gone; reconnecting')
assert not self._deferred
self.close()
retry -= 1
return wraps(wrapped)(wrapper)
@implements
class MySQLDatabaseManager(DatabaseManager):
"""This class manages a database on MySQL."""
VERSION = 3
ENGINES = "InnoDB", "RocksDB", "TokuDB"
_engine = ENGINES[0] # default engine
_use_partition = False
_max_allowed_packet = 32769 * 1024
def _parse(self, database):
""" Get the database credentials (username, password, database) """
# expected pattern : [user[:password]@]database[(~|.|/)unix_socket]
self.user, self.passwd, self.db, self.socket = re.match(
'(?:([^:]+)(?::(.*))?@)?([^~./]+)(.+)?$', database).groups()
def _close(self):
try:
conn = self.__dict__.pop('conn')
except KeyError:
return
conn.close()
def __getattr__(self, attr):
if attr == 'conn':
self._tryConnect()
return super(MySQLDatabaseManager, self).__getattr__(attr)
def _tryConnect(self):
kwd = {'db' : self.db}
if self.user:
kwd['user'] = self.user
if self.passwd is not None:
kwd['passwd'] = self.passwd
if self.socket:
kwd['unix_socket'] = os.path.expanduser(self.socket)
logging.info('connecting to MySQL on the database %s with user %s',
self.db, self.user)
self._active = 0
if self._wait < 0:
timeout_at = None
else:
timeout_at = time.time() + self._wait
last = None
while True:
try:
self.conn = MySQLdb.connect(**kwd)
break
except Exception as e:
if None is not timeout_at <= time.time():
raise
e = str(e)
if last == e:
log = logging.debug
else:
last = e
log = logging.exception
log('Connection to MySQL failed, retrying.')
time.sleep(1)
self._config = {}
conn = self.conn
conn.autocommit(False)
conn.query("SET SESSION group_concat_max_len = %u" % (2**32-1))
conn.set_sql_mode("TRADITIONAL,NO_ENGINE_SUBSTITUTION")
def query(sql):
conn.query(sql)
r = conn.store_result()
return r.fetch_row(r.num_rows())
if self.LOCK:
(locked,), = query("SELECT GET_LOCK('%s.%s', 0)"
% (self.db, self.LOCK))
if not locked:
sys.exit(self.LOCKED)
(name, value), = query(
"SHOW VARIABLES WHERE variable_name='max_allowed_packet'")
if int(value) < self._max_allowed_packet:
raise DatabaseFailure("Global variable %r is too small."
" Minimal value must be %uk."
% (name, self._max_allowed_packet // 1024))
self._max_allowed_packet = int(value)
try:
self._dedup = bool(query(
"SHOW INDEX FROM data WHERE key_name='hash'"))
except ProgrammingError as e:
if e.args[0] != NO_SUCH_TABLE:
raise
self._dedup = None
if not self.LOCK:
# Prevent automatic reconnection for secondary connections.
self._active = 1
self._commit = self.conn.commit
_connect = auto_reconnect(_tryConnect)
def autoReconnect(self, f):
assert self._active and not self.LOCK
@auto_reconnect
def try_once(self):
if self._active:
try:
f()
finally:
self._active = 0
return True
while not try_once(self):
# Avoid reconnecting too often.
# Since this is used to wrap an arbitrary long process and
# not just a single query, we can't limit the number of retries.
time.sleep(5)
self._connect()
def _commit(self):
# XXX: Should we translate OperationalError into MysqlError ?
self.conn.commit()
self._active = 0
@auto_reconnect
def query(self, query):
"""Query data from a database."""
if LOG_QUERIES:
logging.debug('querying %s...',
getPrintableQuery(query.split('\n', 1)[0][:70]))
conn = self.conn
conn.query(query)
if query.startswith("SELECT "):
r = conn.store_result()
return tuple([
tuple([d.tostring() if isinstance(d, array) else d
for d in row])
for row in r.fetch_row(r.num_rows())])
r = query.split(None, 1)[0]
if r in ("INSERT", "REPLACE", "DELETE", "UPDATE"):
self._active = 1
else:
assert r in ("ALTER", "CREATE", "DROP"), query
@property
def escape(self):
"""Escape special characters in a string."""
return self.conn.escape_string
def _getDevPath(self):
# BBB: MySQL is moving to Performance Schema.
return self.query("SELECT * FROM information_schema.global_variables"
" WHERE variable_name='datadir'")[0][1]
def erase(self):
self.query("DROP TABLE IF EXISTS"
" config, pt, trans, obj, data, bigdata, ttrans, tobj")
def nonempty(self, table):
try:
return bool(self.query("SELECT 1 FROM %s LIMIT 1" % table))
except ProgrammingError as e:
if e.args[0] != NO_SUCH_TABLE:
raise
def _alterTable(self, schema_dict, table, select="*"):
q = self.query
new = 'new_' + table
if self.nonempty(table) is None:
if self.nonempty(new) is None:
return
else:
q("DROP TABLE IF EXISTS " + new)
q(schema_dict.pop(table) % new
+ " SELECT %s FROM %s" % (select, table))
q("DROP TABLE " + table)
q("ALTER TABLE %s RENAME TO %s" % (new, table))
def _migrate1(self, _):
self._checkNoUnfinishedTransactions()
self.query("DROP TABLE IF EXISTS ttrans")
def _migrate2(self, schema_dict):
self._alterTable(schema_dict, 'obj')
def _migrate3(self, schema_dict):
self._alterTable(schema_dict, 'pt', "rid as `partition`, nid,"
" CASE state"
" WHEN 0 THEN -1" # UP_TO_DATE
" WHEN 2 THEN -2" # FEEDING
" ELSE 1-state"
" END as tid")
# Let's wait for a more important change to clean up,
# so that users can still downgrade.
if 0:
def _migrate4(self, schema_dict):
self._setConfiguration('partitions', None)
def _setup(self, dedup=False):
self._config.clear()
q = self.query
p = engine = self._engine
schema_dict = OrderedDict()
# The table "config" stores configuration
# parameters which affect the persistent data.
schema_dict['config'] = """CREATE TABLE %s (
name VARBINARY(255) NOT NULL PRIMARY KEY,
value VARBINARY(255) NULL
) ENGINE=""" + engine
# The table "pt" stores a partition table.
schema_dict['pt'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL,
nid INT NOT NULL,
tid BIGINT NOT NULL,
PRIMARY KEY (`partition`, nid)
) ENGINE=""" + engine
if self._use_partition:
p += """ PARTITION BY LIST (`partition`) (
PARTITION dummy VALUES IN (NULL))"""
if engine == "RocksDB":
cf = lambda name, rev=False: " COMMENT '%scf_neo_%s'" % (
'rev:' if rev else '', name)
else:
cf = lambda *_: ''
# The table "trans" stores information on committed transactions.
schema_dict['trans'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL,
tid BIGINT UNSIGNED NOT NULL,
packed BOOLEAN NOT NULL,
oids MEDIUMBLOB NOT NULL,
user BLOB NOT NULL,
description BLOB NOT NULL,
ext BLOB NOT NULL,
ttid BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`partition`, tid){}
) ENGINE={}""".format(cf('append_meta'), p)
# The table "obj" stores committed object metadata.
schema_dict['obj'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL,
oid BIGINT UNSIGNED NOT NULL,
tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (`partition`, oid, tid){},
KEY tid (`partition`, tid, oid){},
KEY (data_id){}
) ENGINE={}""".format(cf('obj_pk', True),
cf('append_meta'), cf('append_meta'), p)
if engine == "TokuDB":
engine += " compression='tokudb_uncompressed'"
# The table "data" stores object data.
# We'd like to have partial index on 'hash' column (e.g. hash(4))
# but 'UNIQUE' constraint would not work as expected.
schema_dict['data'] = """CREATE TABLE %s (
id BIGINT UNSIGNED NOT NULL,
hash BINARY(20) NOT NULL,
compression TINYINT UNSIGNED NULL,
value MEDIUMBLOB NOT NULL,
PRIMARY KEY (id){}{}
) ENGINE={}""".format(cf('append'), """,
UNIQUE (hash, compression)""" + cf('no_comp') if dedup else "",
engine)
schema_dict['bigdata'] = """CREATE TABLE %s (
id INT UNSIGNED NOT NULL AUTO_INCREMENT,
value MEDIUMBLOB NOT NULL,
PRIMARY KEY (id){}
) ENGINE={}""".format(cf('append'), p)
# The table "ttrans" stores information on uncommitted transactions.
schema_dict['ttrans'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL,
tid BIGINT UNSIGNED,
packed BOOLEAN NOT NULL,
oids MEDIUMBLOB NOT NULL,
user BLOB NOT NULL,
description BLOB NOT NULL,
ext BLOB NOT NULL,
ttid BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (ttid){}
) ENGINE={}""".format(cf('no_comp'), p)
# The table "tobj" stores uncommitted object metadata.
schema_dict['tobj'] = """CREATE TABLE %s (
`partition` SMALLINT UNSIGNED NOT NULL,
oid BIGINT UNSIGNED NOT NULL,
tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (tid, oid){}
) ENGINE={}""".format(cf('no_comp'), p)
if self.nonempty('config') is None:
q(schema_dict.pop('config') % 'config')
self._setConfiguration('version', self.VERSION)
else:
self.migrate(schema_dict)
for table, schema in schema_dict.iteritems():
q(schema % ('IF NOT EXISTS ' + table))
if self._dedup is None:
self._dedup = dedup
self._uncommitted_data.update(q("SELECT data_id, count(*)"
" FROM tobj WHERE data_id IS NOT NULL GROUP BY data_id"))
def getConfiguration(self, key):
try:
return self._config[key]
except KeyError:
sql_key = self.escape(str(key))
try:
r = self.query("SELECT value FROM config WHERE name = '%s'"
% sql_key)[0][0]
except IndexError:
r = None
self._config[key] = r
return r
def _setConfiguration(self, key, value):
q = self.query
e = self.escape
self._config[key] = value
k = e(str(key))
if value is None:
q("DELETE FROM config WHERE name = '%s'" % k)
return
value = str(value)
sql = "REPLACE INTO config VALUES ('%s', '%s')" % (k, e(value))
try:
q(sql)
except DataError as e:
if e.args[0] != DATA_TOO_LONG or len(value) < 256 or key != "zodb":
raise
q("ALTER TABLE config MODIFY value VARBINARY(%s) NULL" % len(value))
q(sql)
def _getMaxPartition(self):
return self.query("SELECT MAX(`partition`) FROM pt")[0][0]
def _getPartitionTable(self):
return self.query("SELECT * FROM pt")
def _getLastTID(self, partition, max_tid=None):
x = "WHERE `partition`=%s" % partition
if max_tid:
x += " AND tid<=%s" % max_tid
(tid,), = self.query(
"SELECT MAX(tid) as t FROM trans FORCE INDEX (PRIMARY)" + x)
return tid
def _getLastIDs(self, partition):
q = self.query
x = "WHERE `partition`=%s" % partition
(oid,), = q("SELECT MAX(oid) FROM obj FORCE INDEX (PRIMARY)" + x)
(tid,), = q("SELECT MAX(tid) FROM obj FORCE INDEX (tid)" + x)
return tid, oid
def _getDataLastId(self, partition):
return self.query("SELECT MAX(id) FROM data WHERE %s <= id AND id < %s"
% (partition << 48, (partition + 1) << 48))[0][0]
def _getUnfinishedTIDDict(self):
q = self.query
return q("SELECT ttid, tid FROM ttrans"), (ttid
for ttid, in q("SELECT DISTINCT tid FROM tobj"))
def getFinalTID(self, ttid):
ttid = util.u64(ttid)
# MariaDB is smart enough to realize that 'ttid' is constant.
r = self.query("SELECT tid FROM trans"
" WHERE `partition`=%s AND tid>=ttid AND ttid=%s LIMIT 1"
% (self._getReadablePartition(ttid), ttid))
if r:
return util.p64(r[0][0])
def getLastObjectTID(self, oid):
oid = util.u64(oid)
r = self.query("SELECT tid FROM obj FORCE INDEX(PRIMARY)"
" WHERE `partition`=%d AND oid=%d"
" ORDER BY tid DESC LIMIT 1"
% (self._getReadablePartition(oid), oid))
return util.p64(r[0][0]) if r else None
def _getNextTID(self, *args): # partition, oid, tid
r = self.query("SELECT tid FROM obj"
" FORCE INDEX(PRIMARY)"
" WHERE `partition`=%d AND oid=%d AND tid>%d"
" ORDER BY tid LIMIT 1" % args)
return r[0][0] if r else None
def _getObject(self, oid, tid=None, before_tid=None):
q = self.query
partition = self._getReadablePartition(oid)
sql = ('SELECT tid, compression, data.hash, value, value_tid'
' FROM obj FORCE INDEX(PRIMARY)'
' LEFT JOIN data ON (obj.data_id = data.id)'
' WHERE `partition` = %d AND oid = %d') % (partition, oid)
if before_tid is not None:
sql += ' AND tid < %d ORDER BY tid DESC LIMIT 1' % before_tid
elif tid is not None:
sql += ' AND tid = %d' % tid
else:
# XXX I want to express "HAVING tid = MAX(tid)", but
# MySQL does not use an index for a HAVING clause!
sql += ' ORDER BY tid DESC LIMIT 1'
r = q(sql)
try:
serial, compression, checksum, data, value_serial = r[0]
except IndexError:
return None
if compression and compression & 0x80:
compression &= 0x7f
data = ''.join(self._bigData(data))
return (serial, self._getNextTID(partition, oid, serial),
compression, checksum, data, value_serial)
def _changePartitionTable(self, cell_list, reset=False):
offset_list = []
q = self.query
if reset:
q("DELETE FROM pt")
for offset, nid, tid in cell_list:
# TODO: this logic should move out of database manager
# add 'dropCells(cell_list)' to API and use one query
if tid is None:
q("DELETE FROM pt WHERE `partition` = %d AND nid = %d"
% (offset, nid))
else:
offset_list.append(offset)
q("INSERT INTO pt VALUES (%d, %d, %d)"
" ON DUPLICATE KEY UPDATE tid = %d"
% (offset, nid, tid, tid))
if self._use_partition:
for offset in offset_list:
add = """ALTER TABLE %%s ADD PARTITION (
PARTITION p%u VALUES IN (%u))""" % (offset, offset)
for table in 'trans', 'obj':
try:
self.query(add % table)
except MysqlError as e:
if e.code != SAME_NAME_PARTITION:
raise
def dropPartitions(self, offset_list):
q = self.query
# XXX: these queries are inefficient (execution time increase with
# row count, although we use indexes) when there are rows to
# delete. It should be done as an idle task, by chunks.
for partition in offset_list:
where = " WHERE `partition`=%d" % partition
data_id_list = [x for x, in
q("SELECT DISTINCT data_id FROM obj FORCE INDEX(tid)"
"%s AND data_id IS NOT NULL" % where)]
if not self._use_partition:
q("DELETE FROM obj" + where)
q("DELETE FROM trans" + where)
self._pruneData(data_id_list)
if self._use_partition:
drop = "ALTER TABLE %s DROP PARTITION" + \
','.join(' p%u' % i for i in offset_list)
for table in 'trans', 'obj':
try:
self.query(drop % table)
except MysqlError as e:
if e.code != DROP_LAST_PARTITION:
raise
def _getUnfinishedDataIdList(self):
return [x for x, in self.query(
"SELECT data_id FROM tobj WHERE data_id IS NOT NULL")]
def dropPartitionsTemporary(self, offset_list=None):
where = "" if offset_list is None else \
" WHERE `partition` IN (%s)" % ','.join(map(str, offset_list))
q = self.query
q("DELETE FROM tobj" + where)
q("DELETE FROM ttrans" + where)
def storeTransaction(self, tid, object_list, transaction, temporary = True):
e = self.escape
u64 = util.u64
tid = u64(tid)
if temporary:
obj_table = 'tobj'
trans_table = 'ttrans'
else:
obj_table = 'obj'
trans_table = 'trans'
q = self.query
sql = ["REPLACE INTO %s VALUES " % obj_table]
values_max = self._max_allowed_packet - len(sql[0])
values_size = 0
for oid, data_id, value_serial in object_list:
oid = u64(oid)
partition = self._getPartition(oid)
if value_serial:
value_serial = u64(value_serial)
(data_id,), = q("SELECT data_id FROM obj"
" WHERE `partition`=%d AND oid=%d AND tid=%d"
% (partition, oid, value_serial))
if temporary:
self.holdData(data_id)
else:
value_serial = 'NULL'
value = "(%s,%s,%s,%s,%s)," % (
partition, oid, tid,
'NULL' if data_id is None else data_id,
value_serial)
values_size += len(value)
# actually: max_values < values_size + EXTRA - len(final comma)
# (test_max_allowed_packet checks that EXTRA == 2)
if values_max <= values_size:
sql[-1] = sql[-1][:-1] # remove final comma
q(''.join(sql))
del sql[1:]
values_size = len(value)
sql.append(value)
if values_size:
sql[-1] = value[:-1] # remove final comma
q(''.join(sql))
if transaction:
oid_list, user, desc, ext, packed, ttid = transaction
partition = self._getPartition(tid)
assert packed in (0, 1)
q("REPLACE INTO %s VALUES (%s,%s,%s,'%s','%s','%s','%s',%s)" % (
trans_table, partition, 'NULL' if temporary else tid, packed,
e(''.join(oid_list)), e(user), e(desc), e(ext), u64(ttid)))
_structLL = struct.Struct(">LL")
_unpackLL = _structLL.unpack
def getOrphanList(self):
return [x for x, in self.query(
"SELECT id FROM data LEFT JOIN obj ON (id=data_id)"
" WHERE data_id IS NULL")]
def _pruneData(self, data_id_list):
data_id_list = set(data_id_list).difference(self._uncommitted_data)
if data_id_list:
q = self.query
id_list = []
bigid_list = []
for id, value in q("SELECT id, IF(compression < 128, NULL, value)"
" FROM data LEFT JOIN obj ON (id = data_id)"
" WHERE id IN (%s) AND data_id IS NULL"
% ",".join(map(str, data_id_list))):
id_list.append(str(id))
if value:
bigdata_id, length = self._unpackLL(value)
bigid_list += xrange(bigdata_id,
bigdata_id + (length + 0x7fffff >> 23))
if id_list:
q("DELETE FROM data WHERE id IN (%s)" % ",".join(id_list))
if bigid_list:
q("DELETE FROM bigdata WHERE id IN (%s)"
% ",".join(map(str, bigid_list)))
return len(id_list)
return 0
def _bigData(self, value):
bigdata_id, length = self._unpackLL(value)
q = self.query
return (q("SELECT value FROM bigdata WHERE id=%s" % i)[0][0]
for i in xrange(bigdata_id,
bigdata_id + (length + 0x7fffff >> 23)))
def storeData(self, checksum, oid, data, compression, _pack=_structLL.pack):
e = self.escape
checksum = e(checksum)
if 0x1000000 <= len(data): # 16M (MEDIUMBLOB limit)
compression |= 0x80
q = self.query
if self._dedup:
for r, d in q("SELECT id, value FROM data"
" WHERE hash='%s' AND compression=%s"
% (checksum, compression)):
i = 0
for d in self._bigData(d):
j = i + len(d)
if data[i:j] != d:
raise IntegrityError(DUP_ENTRY)
i = j
if j != len(data):
raise IntegrityError(DUP_ENTRY)
return r
i = 'NULL'
length = len(data)
for j in xrange(0, length, 0x800000): # 8M
q("INSERT INTO bigdata VALUES (%s, '%s')"
% (i, e(data[j:j+0x800000])))
if not j:
i = bigdata_id = self.conn.insert_id()
i += 1
data = _pack(bigdata_id, length)
p = self._getPartition(util.u64(oid))
r = self._data_last_ids[p]
try:
self.query("INSERT INTO data VALUES (%s, '%s', %d, '%s')" %
(r, checksum, compression, e(data)))
except IntegrityError as e:
if e.args[0] == DUP_ENTRY:
(r, d), = self.query("SELECT id, value FROM data"
" WHERE hash='%s' AND compression=%s"
% (checksum, compression))
if d == data:
return r
raise
self._data_last_ids[p] = r + 1
return r
def loadData(self, data_id):
compression, hash, value = self.query(
"SELECT compression, hash, value FROM data where id=%s"
% data_id)[0]
if compression and compression & 0x80:
compression &= 0x7f
data = ''.join(self._bigData(data))
return compression, hash, value
del _structLL
def _getDataTID(self, oid, tid=None, before_tid=None):
sql = ('SELECT tid, value_tid FROM obj FORCE INDEX(PRIMARY)'
' WHERE `partition` = %d AND oid = %d'
) % (self._getReadablePartition(oid), oid)
if tid is not None:
sql += ' AND tid = %d' % tid
elif before_tid is not None:
sql += ' AND tid < %d ORDER BY tid DESC LIMIT 1' % before_tid
else:
# XXX I want to express "HAVING tid = MAX(tid)", but
# MySQL does not use an index for a HAVING clause!
sql += ' ORDER BY tid DESC LIMIT 1'
r = self.query(sql)
return r[0] if r else (None, None)
def lockTransaction(self, tid, ttid):
u64 = util.u64
self.query("UPDATE ttrans SET tid=%d WHERE ttid=%d LIMIT 1"
% (u64(tid), u64(ttid)))
self.commit()
def unlockTransaction(self, tid, ttid, trans, obj):
q = self.query
u64 = util.u64
tid = u64(tid)
if trans:
q("INSERT INTO trans SELECT * FROM ttrans WHERE tid=%d" % tid)
q("DELETE FROM ttrans WHERE tid=%d" % tid)
if not obj:
return
sql = " FROM tobj WHERE tid=%d" % u64(ttid)
data_id_list = [x for x, in q("SELECT data_id%s AND data_id IS NOT NULL"
% sql)]
q("INSERT INTO obj SELECT `partition`, oid, %d, data_id, value_tid %s"
% (tid, sql))
q("DELETE" + sql)
self.releaseData(data_id_list)
def abortTransaction(self, ttid):
ttid = util.u64(ttid)
q = self.query
q("DELETE FROM tobj WHERE tid=%s" % ttid)
q("DELETE FROM ttrans WHERE ttid=%s" % ttid)
def deleteTransaction(self, tid):
tid = util.u64(tid)
self.query("DELETE FROM trans WHERE `partition`=%s AND tid=%s" %
(self._getPartition(tid), tid))
def deleteObject(self, oid, serial=None):
u64 = util.u64
oid = u64(oid)
sql = " FROM obj WHERE `partition`=%d AND oid=%d" \
% (self._getPartition(oid), oid)
if serial:
sql += ' AND tid=%d' % u64(serial)
q = self.query
data_id_list = [x for x, in q(
"SELECT DISTINCT data_id%s AND data_id IS NOT NULL" % sql)]
q("DELETE" + sql)
self._pruneData(data_id_list)
def _deleteRange(self, partition, min_tid=None, max_tid=None):
sql = " WHERE `partition`=%d" % partition
if min_tid is not None:
sql += " AND %d < tid" % min_tid
if max_tid is not None:
sql += " AND tid <= %d" % max_tid
q = self.query
q("DELETE FROM trans" + sql)
sql = " FROM obj" + sql
data_id_list = [x for x, in q(
"SELECT DISTINCT data_id%s AND data_id IS NOT NULL" % sql)]
q("DELETE" + sql)
self._pruneData(data_id_list)
def getTransaction(self, tid, all = False):
tid = util.u64(tid)
q = self.query
r = q("SELECT oids, user, description, ext, packed, ttid"
" FROM trans WHERE `partition` = %d AND tid = %d"
% (self._getReadablePartition(tid), tid))
if not r and all:
r = q("SELECT oids, user, description, ext, packed, ttid"
" FROM ttrans WHERE tid = %d" % tid)
if r:
oids, user, desc, ext, packed, ttid = r[0]
oid_list = splitOIDField(tid, oids)
return oid_list, user, desc, ext, bool(packed), util.p64(ttid)
def getObjectHistory(self, oid, offset, length):
# FIXME: This method doesn't take client's current transaction id as
# parameter, which means it can return transactions in the future of
# client's transaction.
oid = util.u64(oid)
p64 = util.p64
r = self.query("SELECT tid, IF(compression < 128, LENGTH(value),"
" CAST(CONV(HEX(SUBSTR(value, 5, 4)), 16, 10) AS INT))"
" FROM obj FORCE INDEX(PRIMARY)"
" LEFT JOIN data ON (obj.data_id = data.id)"
" WHERE `partition` = %d AND oid = %d AND tid >= %d"
" ORDER BY tid DESC LIMIT %d, %d" %
(self._getReadablePartition(oid), oid,
self._getPackTID(), offset, length))
if r:
return [(p64(tid), length or 0) for tid, length in r]
def _fetchObject(self, oid, tid):
r = self.query(
'SELECT tid, compression, data.hash, value, value_tid'
' FROM obj FORCE INDEX(PRIMARY)'
' LEFT JOIN data ON (obj.data_id = data.id)'
' WHERE `partition` = %d AND oid = %d AND tid = %d'
% (self._getReadablePartition(oid), oid, tid))
if r:
r = r[0]
compression = r[1]
if compression and compression & 0x80:
return (r[0], compression & 0x7f, r[2],
''.join(self._bigData(r[3])), r[4])
return r
def getReplicationObjectList(self, min_tid, max_tid, length, partition,
min_oid):
u64 = util.u64
p64 = util.p64
min_tid = u64(min_tid)
r = self.query('SELECT tid, oid FROM obj FORCE INDEX(tid)'
' WHERE `partition` = %d AND tid <= %d'
' AND (tid = %d AND %d <= oid OR %d < tid)'
' ORDER BY tid ASC, oid ASC LIMIT %d' % (
partition, u64(max_tid), min_tid, u64(min_oid), min_tid, length))
return [(p64(serial), p64(oid)) for serial, oid in r]
def _getTIDList(self, offset, length, partition_list):
return (t[0] for t in self.query(
"SELECT tid FROM trans WHERE `partition` in (%s)"
" ORDER BY tid DESC LIMIT %d,%d"
% (','.join(map(str, partition_list)), offset, length)))
def getReplicationTIDList(self, min_tid, max_tid, length, partition):
u64 = util.u64
p64 = util.p64
min_tid = u64(min_tid)
max_tid = u64(max_tid)
r = self.query("""SELECT tid FROM trans
WHERE `partition` = %(partition)d
AND tid >= %(min_tid)d AND tid <= %(max_tid)d
ORDER BY tid ASC LIMIT %(length)d""" % {
'partition': partition,
'min_tid': min_tid,
'max_tid': max_tid,
'length': length,
})
return [p64(t[0]) for t in r]
def _updatePackFuture(self, oid, orig_serial, max_serial):
q = self.query
# Before deleting this objects revision, see if there is any
# transaction referencing its value at max_serial or above.
# If there is, copy value to the first future transaction. Any further
# reference is just updated to point to the new data location.
value_serial = None
kw = {
'partition': self._getReadablePartition(oid),
'oid': oid,
'orig_tid': orig_serial,
'max_tid': max_serial,
'new_tid': 'NULL',
}
for kw['table'] in 'obj', 'tobj':
for kw['tid'], in q('SELECT tid FROM %(table)s'
' WHERE `partition`=%(partition)d AND oid=%(oid)d'
' AND tid>=%(max_tid)d AND value_tid=%(orig_tid)d'
' ORDER BY tid ASC' % kw):
q('UPDATE %(table)s SET value_tid=%(new_tid)s'
' WHERE `partition`=%(partition)d AND oid=%(oid)d'
' AND tid=%(tid)d' % kw)
if value_serial is None:
# First found, mark its serial for future reference.
kw['new_tid'] = value_serial = kw['tid']
return value_serial
def pack(self, tid, updateObjectDataForPack):
# TODO: unit test (along with updatePackFuture)
p64 = util.p64
tid = util.u64(tid)
updatePackFuture = self._updatePackFuture
getPartition = self._getReadablePartition
q = self.query
self._setPackTID(tid)
for count, oid, max_serial in q("SELECT COUNT(*) - 1, oid, MAX(tid)"
" FROM obj FORCE INDEX(PRIMARY)"
" WHERE tid <= %d GROUP BY oid"
% tid):
partition = getPartition(oid)
if q("SELECT 1 FROM obj WHERE `partition` = %d"
" AND oid = %d AND tid = %d AND data_id IS NULL"
% (partition, oid, max_serial)):
max_serial += 1
elif not count:
continue
# There are things to delete for this object
data_id_set = set()
sql = ' FROM obj WHERE `partition`=%d AND oid=%d' \
' AND tid<%d' % (partition, oid, max_serial)
for serial, data_id in q('SELECT tid, data_id' + sql):
data_id_set.add(data_id)
new_serial = updatePackFuture(oid, serial, max_serial)
if new_serial:
new_serial = p64(new_serial)
updateObjectDataForPack(p64(oid), p64(serial),
new_serial, data_id)
q('DELETE' + sql)
data_id_set.discard(None)
self._pruneData(data_id_set)
self.commit()
def checkTIDRange(self, partition, length, min_tid, max_tid):
count, tid_checksum, max_tid = self.query(
"""SELECT COUNT(*), SHA1(GROUP_CONCAT(tid SEPARATOR ",")), MAX(tid)
FROM (SELECT tid FROM trans
WHERE `partition` = %(partition)s
AND tid >= %(min_tid)d
AND tid <= %(max_tid)d
ORDER BY tid ASC %(limit)s) AS t""" % {
'partition': partition,
'min_tid': util.u64(min_tid),
'max_tid': util.u64(max_tid),
'limit': '' if length is None else 'LIMIT %u' % length,
})[0]
if count:
return count, a2b_hex(tid_checksum), util.p64(max_tid)
return 0, ZERO_HASH, ZERO_TID
def checkSerialRange(self, partition, length, min_tid, max_tid, min_oid):
u64 = util.u64
# We don't ask MySQL to compute everything (like in checkTIDRange)
# because it's difficult to get the last serial _for the last oid_.
# We would need a function (that could be named 'LAST') that returns the
# last grouped value, instead of the greatest one.
r = self.query(
"""SELECT tid, oid
FROM obj FORCE INDEX(tid)
WHERE `partition` = %(partition)s
AND tid <= %(max_tid)d
AND (tid > %(min_tid)d OR
tid = %(min_tid)d AND oid >= %(min_oid)d)
ORDER BY tid, oid %(limit)s""" % {
'min_oid': u64(min_oid),
'min_tid': u64(min_tid),
'max_tid': u64(max_tid),
'limit': '' if length is None else 'LIMIT %u' % length,
'partition': partition,
})
if r:
p64 = util.p64
return (len(r),
sha1(','.join(str(x[0]) for x in r)).digest(),
p64(r[-1][0]),
sha1(','.join(str(x[1]) for x in r)).digest(),
p64(r[-1][1]))
return 0, ZERO_HASH, ZERO_TID, ZERO_HASH, ZERO_OID
def _cmdline(self):
for x in ('u', self.user), ('p', self.passwd), ('S', self.socket):
if x[1]:
yield '-%s%s' % x
yield self.db
def dump(self):
import subprocess
cmd = ['mysqldump', '--compact', '--hex-blob']
cmd += self._cmdline()
return subprocess.check_output(cmd)
def _restore(self, sql):
import subprocess
cmd = ['mysql']
cmd += self._cmdline()
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
p.communicate(sql)
retcode = p.wait()
if retcode:
raise subprocess.CalledProcessError(retcode, cmd)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/database/sqlite.py 0000664 0000000 0000000 00000070637 13465024610 0027124 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2012-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from collections import OrderedDict
import os
import sqlite3
from hashlib import sha1
import string
import traceback
from . import LOG_QUERIES
from .manager import DatabaseManager, splitOIDField
from neo.lib import logging, util
from neo.lib.interfaces import implements
from neo.lib.protocol import ZERO_OID, ZERO_TID, ZERO_HASH
def unique_constraint_message(table, *columns):
c = sqlite3.connect(":memory:")
values = '?' * len(columns)
insert = "INSERT INTO %s VALUES(%s)" % (table, ', '.join(values))
x = "%s (%s)" % (table, ', '.join(columns))
c.execute("CREATE TABLE " + x)
c.execute("CREATE UNIQUE INDEX i ON " + x)
try:
c.executemany(insert, (values, values))
except sqlite3.IntegrityError, e:
return e.args[0]
assert False
def retry_if_locked(f, *args):
try:
return f(*args)
except sqlite3.OperationalError as e:
x = e.args[0]
if x != 'database is locked':
raise
msg = traceback.format_exception_only(type(e), e)
msg += traceback.format_stack()
logging.warning(''.join(msg))
while 1:
try:
return f(*args)
except sqlite3.OperationalError as e:
if e.args[0] != x:
raise
@implements
class SQLiteDatabaseManager(DatabaseManager):
"""This class manages a database on SQLite.
CAUTION: Make sure we never use statement journal files, as explained at
https://www.sqlite.org/tempfiles.html for more information.
In other words, temporary files (by default in /var/tmp !) must
never be used for small requests.
"""
VERSION = 3
def _parse(self, database):
self.db = os.path.expanduser(database)
def _close(self):
self.conn.close()
def _connect(self):
logging.info('connecting to SQLite database %r', self.db)
self.conn = sqlite3.connect(self.db, check_same_thread=False)
self.conn.text_factory = str
self.lock(self.db)
if self.UNSAFE:
q = self.query
q("PRAGMA synchronous = OFF")
q("PRAGMA journal_mode = MEMORY")
self._config = {}
def _getDevPath(self):
return self.db
def _commit(self):
retry_if_locked(self.conn.commit)
if LOG_QUERIES:
def query(self, query):
printable_char_list = []
for c in query.split('\n', 1)[0][:70]:
if c not in string.printable or c in '\t\x0b\x0c\r':
c = '\\x%02x' % ord(c)
printable_char_list.append(c)
logging.debug('querying %s...', ''.join(printable_char_list))
return self.conn.execute(query)
else:
query = property(lambda self: self.conn.execute)
def erase(self):
for t in 'config', 'pt', 'trans', 'obj', 'data', 'ttrans', 'tobj':
self.query('DROP TABLE IF EXISTS ' + t)
def nonempty(self, table):
try:
return bool(self.query(
"SELECT 1 FROM %s LIMIT 1" % table).fetchone())
except sqlite3.OperationalError as e:
if not e.args[0].startswith("no such table:"):
raise
def _alterTable(self, schema_dict, table, select="*"):
# BBB: As explained in _setup, no transactional DDL
# so let's do the same dance as for MySQL.
q = self.query
new = 'new_' + table
if self.nonempty(table) is None:
if self.nonempty(new) is None:
return
else:
q("DROP TABLE IF EXISTS " + new)
q(schema_dict.pop(table) % new)
q("INSERT INTO %s SELECT %s FROM %s" % (new, select, table))
q("DROP TABLE " + table)
q("ALTER TABLE %s RENAME TO %s" % (new, table))
def _migrate1(self, *_):
self._checkNoUnfinishedTransactions()
self.query("DROP TABLE IF EXISTS ttrans")
def _migrate2(self, schema_dict, index_dict):
self._alterTable(schema_dict, 'obj')
def _migrate3(self, schema_dict, index_dict):
self._alterTable(schema_dict, 'pt', "rid, nid, CASE state"
" WHEN 0 THEN -1" # UP_TO_DATE
" WHEN 2 THEN -2" # FEEDING
" ELSE 1-state END")
# Let's wait for a more important change to clean up,
# so that users can still downgrade.
if 0:
def _migrate4(self, schema_dict, index_dict):
self._setConfiguration('partitions', None)
def _setup(self, dedup=False):
# BBB: SQLite has transactional DDL but before Python 3.6,
# the binding automatically commits between such statements.
# This anti-feature causes this method to be relatively slow.
# Unit tests enables the UNSAFE boolean flag.
self._config.clear()
q = self.query
schema_dict = OrderedDict()
index_dict = {}
# The table "config" stores configuration
# parameters which affect the persistent data.
schema_dict['config'] = """CREATE TABLE %s (
name TEXT NOT NULL PRIMARY KEY,
value TEXT)
"""
# The table "pt" stores a partition table.
schema_dict['pt'] = """CREATE TABLE %s (
partition INTEGER NOT NULL,
nid INTEGER NOT NULL,
tid INTEGER NOT NULL,
PRIMARY KEY (partition, nid))
"""
# The table "trans" stores information on committed transactions.
schema_dict['trans'] = """CREATE TABLE %s (
partition INTEGER NOT NULL,
tid INTEGER NOT NULL,
packed BOOLEAN NOT NULL,
oids BLOB NOT NULL,
user BLOB NOT NULL,
description BLOB NOT NULL,
ext BLOB NOT NULL,
ttid INTEGER NOT NULL,
PRIMARY KEY (partition, tid)
) WITHOUT ROWID
"""
# The table "obj" stores committed object metadata.
schema_dict['obj'] = """CREATE TABLE %s (
partition INTEGER NOT NULL,
oid INTEGER NOT NULL,
tid INTEGER NOT NULL,
data_id INTEGER,
value_tid INTEGER,
PRIMARY KEY (partition, oid, tid)
) WITHOUT ROWID
"""
index_dict['obj'] = (
"CREATE INDEX %s ON %s(partition, tid, oid)",
"CREATE INDEX %s ON %s(data_id)")
# The table "data" stores object data.
schema_dict['data'] = """CREATE TABLE %s (
id INTEGER PRIMARY KEY,
hash BLOB NOT NULL,
compression INTEGER NOT NULL,
value BLOB NOT NULL)
"""
if dedup:
index_dict['data'] = (
"CREATE UNIQUE INDEX %s ON %s(hash, compression)",)
# The table "ttrans" stores information on uncommitted transactions.
schema_dict['ttrans'] = """CREATE TABLE %s (
partition INTEGER NOT NULL,
tid INTEGER,
packed BOOLEAN NOT NULL,
oids BLOB NOT NULL,
user BLOB NOT NULL,
description BLOB NOT NULL,
ext BLOB NOT NULL,
ttid INTEGER NOT NULL)
"""
# The table "tobj" stores uncommitted object metadata.
schema_dict['tobj'] = """CREATE TABLE %s (
partition INTEGER NOT NULL,
oid INTEGER NOT NULL,
tid INTEGER NOT NULL,
data_id INTEGER,
value_tid INTEGER,
PRIMARY KEY (tid, oid))
"""
if self.nonempty('config') is None:
q(schema_dict.pop('config') % 'config')
self._setConfiguration('version', self.VERSION)
else:
self.migrate(schema_dict, index_dict)
for table, schema in schema_dict.iteritems():
q(schema % ('IF NOT EXISTS ' + table))
for table, index in index_dict.iteritems():
for i, index in enumerate(index, 1):
q(index % ('IF NOT EXISTS _%s_i%s' % (table, i), table))
self._uncommitted_data.update(q("SELECT data_id, count(*)"
" FROM tobj WHERE data_id IS NOT NULL GROUP BY data_id"))
def getConfiguration(self, key):
try:
return self._config[key]
except KeyError:
try:
r = self.query("SELECT value FROM config WHERE name=?",
(key,)).fetchone()[0]
except TypeError:
r = None
self._config[key] = r
return r
def _setConfiguration(self, key, value):
q = self.query
self._config[key] = value
if value is None:
q("DELETE FROM config WHERE name=?", (key,))
else:
q("REPLACE INTO config VALUES (?,?)", (key, str(value)))
def _getMaxPartition(self):
return self.query("SELECT MAX(`partition`) FROM pt").next()[0]
def _getPartitionTable(self):
return self.query("SELECT * FROM pt")
def _getLastTID(self, partition, max_tid=None):
x = self.query
if max_tid is None:
x = x("SELECT MAX(tid) FROM trans WHERE partition=?", (partition,))
else:
x = x("SELECT MAX(tid) FROM trans WHERE partition=? AND tid<=?",
(partition, max_tid))
return x.next()[0]
def _getLastIDs(self, *args):
q = self.query
(oid,), = q("SELECT MAX(oid) FROM obj WHERE `partition`=?", args)
(tid,), = q("SELECT MAX(tid) FROM obj WHERE `partition`=?", args)
return tid, oid
def _getDataLastId(self, partition):
return self.query("SELECT MAX(id) FROM data WHERE %s <= id AND id < %s"
% (partition << 48, (partition + 1) << 48)).fetchone()[0]
def _getUnfinishedTIDDict(self):
q = self.query
return q("SELECT ttid, tid FROM ttrans"), (ttid
for ttid, in q("SELECT DISTINCT tid FROM tobj"))
def getFinalTID(self, ttid):
ttid = util.u64(ttid)
# As of SQLite 3.8.7.1, 'tid>=ttid' would ignore the index on tid,
# even though ttid is a constant.
for tid, in self.query("SELECT tid FROM trans"
" WHERE partition=? AND tid>=? AND ttid=? LIMIT 1",
(self._getReadablePartition(ttid), ttid, ttid)):
return util.p64(tid)
def getLastObjectTID(self, oid):
oid = util.u64(oid)
r = self.query("SELECT tid FROM obj"
" WHERE partition=? AND oid=?"
" ORDER BY tid DESC LIMIT 1",
(self._getReadablePartition(oid), oid)).fetchone()
return r and util.p64(r[0])
def _getNextTID(self, *args): # partition, oid, tid
r = self.query("""SELECT tid FROM obj
WHERE partition=? AND oid=? AND tid>?
ORDER BY tid LIMIT 1""", args).fetchone()
return r and r[0]
def _getObject(self, oid, tid=None, before_tid=None):
q = self.query
partition = self._getReadablePartition(oid)
sql = ('SELECT tid, compression, data.hash, value, value_tid'
' FROM obj LEFT JOIN data ON obj.data_id = data.id'
' WHERE partition=? AND oid=?')
if tid is not None:
r = q(sql + ' AND tid=?', (partition, oid, tid))
elif before_tid is not None:
r = q(sql + ' AND tid ORDER BY tid DESC LIMIT 1',
(partition, oid, before_tid))
else:
r = q(sql + ' ORDER BY tid DESC LIMIT 1', (partition, oid))
try:
serial, compression, checksum, data, value_serial = r.fetchone()
except TypeError:
return None
if checksum:
checksum = str(checksum)
data = str(data)
return (serial, self._getNextTID(partition, oid, serial),
compression, checksum, data, value_serial)
def _changePartitionTable(self, cell_list, reset=False):
q = self.query
if reset:
q("DELETE FROM pt")
for offset, nid, state in cell_list:
# TODO: this logic should move out of database manager
# add 'dropCells(cell_list)' to API and use one query
# WKRD: Why does SQLite need a statement journal file
# whereas we try to replace only 1 value ?
# We don't want to remove the 'NOT NULL' constraint
# so we must simulate a "REPLACE OR FAIL".
q("DELETE FROM pt WHERE partition=? AND nid=?", (offset, nid))
if state is not None:
q("INSERT OR FAIL INTO pt VALUES (?,?,?)",
(offset, nid, int(state)))
def dropPartitions(self, offset_list):
where = " WHERE partition=?"
q = self.query
for partition in offset_list:
args = partition,
data_id_list = [x for x, in q(
"SELECT DISTINCT data_id FROM obj%s AND data_id IS NOT NULL"
% where, args)]
q("DELETE FROM obj" + where, args)
q("DELETE FROM trans" + where, args)
self._pruneData(data_id_list)
def _getUnfinishedDataIdList(self):
return [x for x, in self.query(
"SELECT data_id FROM tobj WHERE data_id IS NOT NULL")]
def dropPartitionsTemporary(self, offset_list=None):
where = "" if offset_list is None else \
" WHERE `partition` IN (%s)" % ','.join(map(str, offset_list))
q = self.query
q("DELETE FROM tobj" + where)
q("DELETE FROM ttrans" + where)
def storeTransaction(self, tid, object_list, transaction, temporary=True):
u64 = util.u64
tid = u64(tid)
T = 't' if temporary else ''
obj_sql = "INSERT OR FAIL INTO %sobj VALUES (?,?,?,?,?)" % T
q = self.query
for oid, data_id, value_serial in object_list:
oid = u64(oid)
partition = self._getPartition(oid)
if value_serial:
value_serial = u64(value_serial)
(data_id,), = q("SELECT data_id FROM obj"
" WHERE partition=? AND oid=? AND tid=?",
(partition, oid, value_serial))
if temporary:
self.holdData(data_id)
try:
q(obj_sql, (partition, oid, tid, data_id, value_serial))
except sqlite3.IntegrityError:
# This may happen if a previous replication of 'obj' was
# interrupted.
if not T:
r, = q("SELECT data_id, value_tid FROM obj"
" WHERE partition=? AND oid=? AND tid=?",
(partition, oid, tid))
if r == (data_id, value_serial):
continue
raise
if transaction:
oid_list, user, desc, ext, packed, ttid = transaction
partition = self._getPartition(tid)
assert packed in (0, 1)
q("INSERT OR FAIL INTO %strans VALUES (?,?,?,?,?,?,?,?)" % T,
(partition, None if temporary else tid,
packed, buffer(''.join(oid_list)),
buffer(user), buffer(desc), buffer(ext), u64(ttid)))
def getOrphanList(self):
return [x for x, in self.query(
"SELECT id FROM data LEFT JOIN obj ON (id=data_id)"
" WHERE data_id IS NULL")]
def _pruneData(self, data_id_list):
data_id_list = set(data_id_list).difference(self._uncommitted_data)
if data_id_list:
q = self.query
data_id_list.difference_update(x for x, in q(
"SELECT DISTINCT data_id FROM obj WHERE data_id IN (%s)"
% ",".join(map(str, data_id_list))))
q("DELETE FROM data WHERE id IN (%s)"
% ",".join(map(str, data_id_list)))
return len(data_id_list)
return 0
def storeData(self, checksum, oid, data, compression,
_dup=unique_constraint_message("data", "hash", "compression")):
H = buffer(checksum)
p = self._getPartition(util.u64(oid))
r = self._data_last_ids[p]
try:
self.query("INSERT INTO data VALUES (?,?,?,?)",
(r, H, compression, buffer(data)))
except sqlite3.IntegrityError, e:
if e.args[0] == _dup:
(r, d), = self.query("SELECT id, value FROM data"
" WHERE hash=? AND compression=?",
(H, compression))
if str(d) == data:
return r
raise
self._data_last_ids[p] = r + 1
return r
def loadData(self, data_id):
compression, checksum, data = self.query(
"SELECT compression, hash, value FROM data WHERE id=?",
(data_id,)).fetchone()
if checksum:
return compression, str(checksum), str(data)
return compression, checksum, data
def _getDataTID(self, oid, tid=None, before_tid=None):
partition = self._getReadablePartition(oid)
sql = 'SELECT tid, value_tid FROM obj' \
' WHERE partition=? AND oid=?'
if tid is not None:
r = self.query(sql + ' AND tid=?', (partition, oid, tid))
elif before_tid is not None:
r = self.query(sql + ' AND tid ORDER BY tid DESC LIMIT 1',
(partition, oid, before_tid))
else:
r = self.query(sql + ' ORDER BY tid DESC LIMIT 1',
(partition, oid))
r = r.fetchone()
return r or (None, None)
def lockTransaction(self, tid, ttid):
u64 = util.u64
self.query("UPDATE ttrans SET tid=? WHERE ttid=?",
(u64(tid), u64(ttid)))
self.commit()
def unlockTransaction(self, tid, ttid, trans, obj):
q = self.query
u64 = util.u64
tid = u64(tid)
if trans:
q("INSERT INTO trans SELECT * FROM ttrans WHERE tid=?", (tid,))
q("DELETE FROM ttrans WHERE tid=?", (tid,))
if not obj:
return
ttid = u64(ttid)
sql = " FROM tobj WHERE tid=?"
data_id_list = [x for x, in q("SELECT data_id%s AND data_id IS NOT NULL"
% sql, (ttid,))]
q("INSERT INTO obj SELECT partition, oid, ?, data_id, value_tid" + sql,
(tid, ttid))
q("DELETE" + sql, (ttid,))
self.releaseData(data_id_list)
def abortTransaction(self, ttid):
args = util.u64(ttid),
q = self.query
q("DELETE FROM tobj WHERE tid=?", args)
q("DELETE FROM ttrans WHERE ttid=?", args)
def deleteTransaction(self, tid):
tid = util.u64(tid)
self.query("DELETE FROM trans WHERE partition=? AND tid=?",
(self._getPartition(tid), tid))
def deleteObject(self, oid, serial=None):
oid = util.u64(oid)
sql = " FROM obj WHERE partition=? AND oid=?"
args = [self._getPartition(oid), oid]
if serial:
sql += " AND tid=?"
args.append(util.u64(serial))
q = self.query
data_id_list = [x for x, in q(
"SELECT DISTINCT data_id%s AND data_id IS NOT NULL" % sql, args)]
q("DELETE" + sql, args)
self._pruneData(data_id_list)
def _deleteRange(self, partition, min_tid=None, max_tid=None):
sql = " WHERE partition=?"
args = [partition]
if min_tid is not None:
sql += " AND ? < tid"
args.append(min_tid)
if max_tid is not None:
sql += " AND tid <= ?"
args.append(max_tid)
q = self.query
q("DELETE FROM trans" + sql, args)
sql = " FROM obj" + sql
data_id_list = [x for x, in q(
"SELECT DISTINCT data_id%s AND data_id IS NOT NULL" % sql, args)]
q("DELETE" + sql, args)
self._pruneData(data_id_list)
def getTransaction(self, tid, all=False):
tid = util.u64(tid)
q = self.query
r = q("SELECT oids, user, description, ext, packed, ttid"
" FROM trans WHERE partition=? AND tid=?",
(self._getReadablePartition(tid), tid)).fetchone()
if not r and all:
r = q("SELECT oids, user, description, ext, packed, ttid"
" FROM ttrans WHERE tid=?", (tid,)).fetchone()
if r:
oids, user, description, ext, packed, ttid = r
return splitOIDField(tid, oids), str(user), \
str(description), str(ext), packed, util.p64(ttid)
def getObjectHistory(self, oid, offset, length):
# FIXME: This method doesn't take client's current transaction id as
# parameter, which means it can return transactions in the future of
# client's transaction.
p64 = util.p64
oid = util.u64(oid)
return [(p64(tid), length or 0) for tid, length in self.query("""\
SELECT tid, LENGTH(value)
FROM obj LEFT JOIN data ON obj.data_id = data.id
WHERE partition=? AND oid=? AND tid>=?
ORDER BY tid DESC LIMIT ?,?""",
(self._getReadablePartition(oid), oid,
self._getPackTID(), offset, length))
] or None
def _fetchObject(self, oid, tid):
for serial, compression, checksum, data, value_serial in self.query(
'SELECT tid, compression, data.hash, value, value_tid'
' FROM obj LEFT JOIN data ON obj.data_id = data.id'
' WHERE partition=? AND oid=? AND tid=?',
(self._getReadablePartition(oid), oid, tid)):
if checksum:
checksum = str(checksum)
data = str(data)
return serial, compression, checksum, data, value_serial
def getReplicationObjectList(self, min_tid, max_tid, length, partition,
min_oid):
u64 = util.u64
p64 = util.p64
min_tid = u64(min_tid)
return [(p64(serial), p64(oid)) for serial, oid in self.query("""\
SELECT tid, oid FROM obj
WHERE partition=? AND tid<=?
AND (tid=? AND ?<=oid OR ?=? AND value_tid=?
ORDER BY tid ASC""" % T,
(partition, oid, max_serial, orig_serial)):
q(update, (value_serial, partition, oid, serial))
if value_serial is None:
# First found, mark its serial for future reference.
value_serial = serial
return value_serial
def pack(self, tid, updateObjectDataForPack):
# TODO: unit test (along with updatePackFuture)
p64 = util.p64
tid = util.u64(tid)
updatePackFuture = self._updatePackFuture
getPartition = self._getReadablePartition
q = self.query
self._setPackTID(tid)
for count, oid, max_serial in q("SELECT COUNT(*) - 1, oid, MAX(tid)"
" FROM obj WHERE tid<=? GROUP BY oid",
(tid,)):
partition = getPartition(oid)
if q("SELECT 1 FROM obj WHERE partition=?"
" AND oid=? AND tid=? AND data_id IS NULL",
(partition, oid, max_serial)).fetchone():
max_serial += 1
elif not count:
continue
# There are things to delete for this object
data_id_set = set()
sql = " FROM obj WHERE partition=? AND oid=? AND tid"
args = partition, oid, max_serial
for serial, data_id in q("SELECT tid, data_id" + sql, args):
data_id_set.add(data_id)
new_serial = updatePackFuture(oid, serial, max_serial)
if new_serial:
new_serial = p64(new_serial)
updateObjectDataForPack(p64(oid), p64(serial),
new_serial, data_id)
q("DELETE" + sql, args)
data_id_set.discard(None)
self._pruneData(data_id_set)
self.commit()
def checkTIDRange(self, partition, length, min_tid, max_tid):
# XXX: SQLite's GROUP_CONCAT is slow (looks like quadratic)
count, tids, max_tid = self.query("""\
SELECT COUNT(*), GROUP_CONCAT(tid), MAX(tid)
FROM (SELECT tid FROM trans
WHERE partition=? AND ?<=tid AND tid<=?
ORDER BY tid ASC LIMIT ?) AS t""",
(partition, util.u64(min_tid), util.u64(max_tid),
-1 if length is None else length)).fetchone()
if count:
return count, sha1(tids).digest(), util.p64(max_tid)
return 0, ZERO_HASH, ZERO_TID
def checkSerialRange(self, partition, length, min_tid, max_tid, min_oid):
u64 = util.u64
# We don't ask SQLite to compute everything (like in checkTIDRange)
# because it's difficult to get the last serial _for the last oid_.
# We would need a function (that could be named 'LAST') that returns the
# last grouped value, instead of the greatest one.
min_tid = u64(min_tid)
r = self.query("""\
SELECT tid, oid
FROM obj
WHERE partition=? AND tid<=? AND (tid>? OR tid=? AND oid>=?)
ORDER BY tid, oid LIMIT ?""",
(partition, u64(max_tid), min_tid, min_tid, u64(min_oid),
-1 if length is None else length)).fetchall()
if r:
p64 = util.p64
return (len(r),
sha1(','.join(str(x[0]) for x in r)).digest(),
p64(r[-1][0]),
sha1(','.join(str(x[1]) for x in r)).digest(),
p64(r[-1][1]))
return 0, ZERO_HASH, ZERO_TID, ZERO_HASH, ZERO_OID
def dump(self):
main = []
data = []
for line in self.conn.iterdump():
if line.startswith('INSERT '):
assert line.endswith(';'), line
data.append(line)
continue
if line.startswith('CREATE TABLE '):
# ALTER TABLE adds quotes.
create, table, name, tail = line.split(' ', 3)
line = ' '.join((create, table, name.strip('"'), tail))
main.append(line)
assert line == 'COMMIT;', line
data.sort()
main[-1:-1] = data
return '\n'.join(main) + '\n'
def _restore(self, sql):
self.conn.executescript(sql)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/ 0000775 0000000 0000000 00000000000 13465024610 0025270 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/__init__.py 0000664 0000000 0000000 00000006633 13465024610 0027411 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import weakref
from neo.lib import logging
from neo.lib.handler import EventHandler
from neo.lib.exception import PrimaryFailure, StoppedOperation
from neo.lib.protocol import (uuid_str,
NodeStates, NodeTypes, Packets, ProtocolError)
class BaseHandler(EventHandler):
def notifyTransactionFinished(self, conn, ttid, max_tid):
app = self.app
app.tm.abort(ttid)
app.replicator.transactionFinished(ttid, max_tid)
def abortTransaction(self, conn, ttid, _):
self.notifyTransactionFinished(conn, ttid, None)
class BaseMasterHandler(BaseHandler):
def connectionLost(self, conn, new_state):
if self.app.listening_conn: # if running
self.app.master_node = None
raise PrimaryFailure('connection lost')
def stopOperation(self, conn):
raise StoppedOperation
def reelectPrimary(self, conn):
raise PrimaryFailure('re-election occurs')
def notifyClusterInformation(self, conn, state):
self.app.changeClusterState(state)
def notifyNodeInformation(self, conn, timestamp, node_list):
"""Store information on nodes, only if this is sent by a primary
master node."""
super(BaseMasterHandler, self).notifyNodeInformation(
conn, timestamp, node_list)
for node_type, _, uuid, state, _ in node_list:
if uuid == self.app.uuid:
# This is me, do what the master tell me
logging.info("I was told I'm %s", state)
if state in (NodeStates.UNKNOWN, NodeStates.DOWN):
erase = state == NodeStates.UNKNOWN
self.app.shutdown(erase=erase)
elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
logging.info('Notified of non-running client, abort (%s)',
uuid_str(uuid))
# See comment in ClientOperationHandler.connectionClosed
self.app.tm.abortFor(uuid, even_if_voted=True)
def notifyPartitionChanges(self, conn, ptid, num_replicas, cell_list):
"""This is very similar to Send Partition Table, except that
the information is only about changes from the previous."""
app = self.app
if ptid != 1 + app.pt.getID():
raise ProtocolError('wrong partition table id')
app.pt.update(ptid, num_replicas, cell_list, app.nm)
app.dm.changePartitionTable(ptid, num_replicas, cell_list)
if app.operational:
app.replicator.notifyPartitionChanges(cell_list)
app.dm.commit()
def askFinalTID(self, conn, ttid):
conn.answer(Packets.AnswerFinalTID(self.app.dm.getFinalTID(ttid)))
def notifyRepair(self, conn, *args):
app = self.app
app.dm.repair(weakref.ref(app), *args)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/client.py 0000664 0000000 0000000 00000030740 13465024610 0027124 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.handler import DelayEvent
from neo.lib.util import dump, makeChecksum, add64
from neo.lib.protocol import Packets, Errors, NonReadableCell, ProtocolError, \
ZERO_HASH, ZERO_TID, INVALID_PARTITION
from ..transactions import ConflictError, NotRegisteredError
from . import BaseHandler
import time
# Log stores taking (incl. lock delays) more than this many seconds.
# Set to None to disable.
SLOW_STORE = 2
class ClientOperationHandler(BaseHandler):
def connectionClosed(self, conn):
logging.debug('connection closed for %r', conn)
app = self.app
if app.operational:
# Even if in most cases, abortFor is called from both this method
# and BaseMasterHandler.notifyPartitionChanges (especially since
# storage nodes disconnects unknown clients on their own), these 2
# handlers also cover distinct scenarios, so neither of them is
# redundant:
# - A client node may have network issues with this particular
# storage node and remain RUNNING: we may still be involved in
# the second phase so we only abort non-voted transactions here.
# By not taking part to any further deadlock avoidance,
# not releasing write-locks now would lead to a deadlock.
# - A client node may be disconnected from the master, whereas
# there are still voted (and not locked) transactions to abort.
app.tm.abortFor(conn.getUUID())
def askTransactionInformation(self, conn, tid):
t = self.app.dm.getTransaction(tid)
if t is None:
p = Errors.TidNotFound('%s does not exist' % dump(tid))
else:
p = Packets.AnswerTransactionInformation(tid, t[1], t[2], t[3],
bool(t[4]), t[0])
conn.answer(p)
def getEventQueue(self):
# for read rpc
return self.app.tm.read_queue
def askObject(self, conn, oid, at, before):
app = self.app
if app.tm.loadLocked(oid):
raise DelayEvent
o = app.dm.getObject(oid, at, before)
try:
serial, next_serial, compression, checksum, data, data_serial = o
except TypeError:
p = (Errors.OidDoesNotExist if o is None else
Errors.OidNotFound)(dump(oid))
else:
if checksum is None:
checksum = ZERO_HASH
data = ''
p = Packets.AnswerObject(oid, serial, next_serial,
compression, checksum, data, data_serial)
conn.answer(p)
def askStoreTransaction(self, conn, ttid, *txn_info):
self.app.tm.register(conn, ttid)
self.app.tm.vote(ttid, txn_info)
conn.answer(Packets.AnswerStoreTransaction())
def askVoteTransaction(self, conn, ttid):
self.app.tm.vote(ttid)
conn.answer(Packets.AnswerVoteTransaction())
def _askStoreObject(self, conn, oid, serial, compression, checksum, data,
data_serial, ttid, request_time):
try:
locked = self.app.tm.storeObject(ttid, serial, oid, compression,
checksum, data, data_serial)
except ConflictError, err:
# resolvable or not
locked = err.tid
except NonReadableCell:
logging.info('Ignore store of %s:%s by %s: unassigned partition',
dump(oid), dump(serial), dump(ttid))
locked = ZERO_TID
except NotRegisteredError:
# transaction was aborted, cancel this event
logging.info('Forget store of %s:%s by %s delayed by %s',
dump(oid), dump(serial), dump(ttid),
dump(self.app.tm.getLockingTID(oid)))
locked = ZERO_TID
else:
if request_time and SLOW_STORE is not None:
duration = time.time() - request_time
if duration > SLOW_STORE:
logging.info('StoreObject delay: %.02fs', duration)
conn.answer(Packets.AnswerStoreObject(locked))
def askStoreObject(self, conn, oid, serial,
compression, checksum, data, data_serial, ttid):
if 1 < compression:
raise ProtocolError('invalid compression value')
# register the transaction
self.app.tm.register(conn, ttid)
if data or checksum != ZERO_HASH:
# TODO: return an appropriate error packet
assert makeChecksum(data) == checksum
assert data_serial is None
else:
checksum = data = None
try:
self._askStoreObject(conn, oid, serial, compression,
checksum, data, data_serial, ttid, None)
except DelayEvent, e:
# locked by a previous transaction, retry later
self.app.tm.queueEvent(self._askStoreObject, conn, (oid, serial,
compression, checksum, data, data_serial, ttid, time.time()),
*e.args)
def askRebaseTransaction(self, conn, *args):
conn.answer(Packets.AnswerRebaseTransaction(
self.app.tm.rebase(conn, *args)))
def askRebaseObject(self, conn, ttid, oid):
try:
self._askRebaseObject(conn, ttid, oid, None)
except DelayEvent, e:
# locked by a previous transaction, retry later
self.app.tm.queueEvent(self._askRebaseObject,
conn, (ttid, oid, time.time()), *e.args)
def _askRebaseObject(self, conn, ttid, oid, request_time):
conflict = self.app.tm.rebaseObject(ttid, oid)
if request_time and SLOW_STORE is not None:
duration = time.time() - request_time
if duration > SLOW_STORE:
logging.info('RebaseObject delay: %.02fs', duration)
conn.answer(Packets.AnswerRebaseObject(conflict))
def askTIDsFrom(self, conn, min_tid, max_tid, length, partition):
conn.answer(Packets.AnswerTIDsFrom(self.app.dm.getReplicationTIDList(
min_tid, max_tid, length, partition)))
def _askTIDs(self, first, last, partition):
# This method is complicated, because I must return TIDs only
# about usable partitions assigned to me.
if first >= last:
raise ProtocolError('invalid offsets')
app = self.app
if partition == INVALID_PARTITION:
partition_list = app.pt.getAssignedPartitionList(app.uuid)
else:
partition_list = [partition]
return app.dm.getTIDList(first, last - first, partition_list)
def askTIDs(self, conn, *args):
conn.answer(Packets.AnswerTIDs(self._askTIDs(*args)))
def askFinalTID(self, conn, ttid):
conn.answer(Packets.AnswerFinalTID(self.app.tm.getFinalTID(ttid)))
def askObjectUndoSerial(self, conn, ttid, ltid, undone_tid, oid_list):
app = self.app
findUndoTID = app.dm.findUndoTID
getObjectFromTransaction = app.tm.getObjectFromTransaction
object_tid_dict = {}
for oid in oid_list:
r = findUndoTID(oid, ttid,
ltid, undone_tid, getObjectFromTransaction(ttid, oid))
if r:
if not r[0]:
p = Errors.OidNotFound(dump(oid))
break
object_tid_dict[oid] = r
else:
p = Packets.AnswerObjectUndoSerial(object_tid_dict)
conn.answer(p)
def askObjectHistory(self, conn, oid, first, last):
if first >= last:
raise ProtocolError('invalid offsets')
app = self.app
if app.tm.loadLocked(oid):
raise DelayEvent
history_list = app.dm.getObjectHistory(oid, first, last - first)
if history_list is None:
p = Errors.OidNotFound(dump(oid))
else:
p = Packets.AnswerObjectHistory(oid, history_list)
conn.answer(p)
def askCheckCurrentSerial(self, conn, ttid, oid, serial):
self.app.tm.register(conn, ttid)
try:
self._askCheckCurrentSerial(conn, ttid, oid, serial, None)
except DelayEvent, e:
# locked by a previous transaction, retry later
self.app.tm.queueEvent(self._askCheckCurrentSerial,
conn, (ttid, oid, serial, time.time()), *e.args)
def _askCheckCurrentSerial(self, conn, ttid, oid, serial, request_time):
try:
locked = self.app.tm.checkCurrentSerial(ttid, oid, serial)
except ConflictError, err:
# resolvable or not
locked = err.tid
except NonReadableCell:
logging.info('Ignore check of %s:%s by %s: unassigned partition',
dump(oid), dump(serial), dump(ttid))
locked = ZERO_TID
except NotRegisteredError:
# transaction was aborted, cancel this event
logging.info('Forget serial check of %s:%s by %s delayed by %s',
dump(oid), dump(serial), dump(ttid),
dump(self.app.tm.getLockingTID(oid)))
locked = ZERO_TID
else:
if request_time and SLOW_STORE is not None:
duration = time.time() - request_time
if duration > SLOW_STORE:
logging.info('CheckCurrentSerial delay: %.02fs', duration)
conn.answer(Packets.AnswerCheckCurrentSerial(locked))
# like ClientOperationHandler but read-only & only for tid <= backup_tid
class ClientReadOnlyOperationHandler(ClientOperationHandler):
def _readOnly(self, conn, *args, **kw):
conn.answer(Errors.ReadOnlyAccess(
'read-only access because cluster is in backuping mode'))
askStoreTransaction = _readOnly
askVoteTransaction = _readOnly
askStoreObject = _readOnly
askFinalTID = _readOnly
askRebaseObject = _readOnly
askRebaseTransaction = _readOnly
# takes write lock & is only used when going to commit
askCheckCurrentSerial = _readOnly
# below operations: like in ClientOperationHandler but cut tid <= backup_tid
def askTransactionInformation(self, conn, tid):
backup_tid = self.app.dm.getBackupTID()
if tid > backup_tid:
conn.answer(Errors.TidNotFound(
'tids > %s are not fully fetched yet' % dump(backup_tid)))
return
super(ClientReadOnlyOperationHandler, self).askTransactionInformation(
conn, tid)
def askObject(self, conn, oid, serial, tid):
backup_tid = self.app.dm.getBackupTID()
if serial:
if serial > backup_tid:
# obj lookup will find nothing, but return properly either
# OidDoesNotExist or OidNotFound
serial = ZERO_TID
elif tid:
tid = min(tid, add64(backup_tid, 1))
# limit "latest obj" query to tid <= backup_tid
else:
tid = add64(backup_tid, 1)
super(ClientReadOnlyOperationHandler, self).askObject(
conn, oid, serial, tid)
def askTIDsFrom(self, conn, min_tid, max_tid, length, partition):
backup_tid = self.app.dm.getBackupTID()
max_tid = min(max_tid, backup_tid)
# NOTE we don't need to adjust min_tid: if min_tid > max_tid
# db.getReplicationTIDList will return empty [], which is correct
super(ClientReadOnlyOperationHandler, self).askTIDsFrom(
conn, min_tid, max_tid, length, partition)
def askTIDs(self, conn, first, last, partition):
backup_tid = self.app.dm.getBackupTID()
tid_list = self._askTIDs(first, last, partition)
tid_list = filter(lambda tid: tid <= backup_tid, tid_list)
conn.answer(Packets.AnswerTIDs(tid_list))
# FIXME askObjectUndoSerial to limit tid <= backup_tid
# (askObjectUndoSerial is used in undo() but itself is read-only query)
# FIXME askObjectHistory to limit tid <= backup_tid
# TODO dm.getObjectHistory has to be first fixed for this
#def askObjectHistory(self, conn, oid, first, last):
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/identification.py 0000664 0000000 0000000 00000005700 13465024610 0030635 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.handler import EventHandler
from neo.lib.protocol import NodeTypes, NotReadyError, Packets
from neo.lib.protocol import ProtocolError
from .storage import StorageOperationHandler
from .client import ClientOperationHandler, ClientReadOnlyOperationHandler
class IdentificationHandler(EventHandler):
""" Handler used for incoming connections during operation state """
def connectionLost(self, conn, new_state):
logging.warning('A connection was lost during identification')
def getEventQueue(self):
# for requestIdentification
return self.app.nm
def requestIdentification(self, conn, node_type, uuid, address, name,
id_timestamp, devpath, new_nid):
self.checkClusterName(name)
app = self.app
# reject any incoming connections if not ready
if not app.operational:
raise NotReadyError
if uuid is None:
if node_type != NodeTypes.STORAGE:
raise ProtocolError('reject anonymous non-storage node')
handler = StorageOperationHandler(self.app)
conn.setHandler(handler)
else:
if uuid == app.uuid:
raise ProtocolError("uuid conflict or loopback connection")
node = app.nm.getByUUID(uuid, id_timestamp)
# choose the handler according to the node type
if node_type == NodeTypes.CLIENT:
if app.dm.getBackupTID():
handler = ClientReadOnlyOperationHandler
else:
handler = ClientOperationHandler
assert node.isRunning(), node
force = False
elif node_type == NodeTypes.STORAGE:
handler = StorageOperationHandler
force = app.uuid < uuid
else:
raise ProtocolError('reject non-client-or-storage node')
# apply the handler and set up the connection
handler = handler(self.app)
conn.setHandler(handler)
node.setConnection(conn, force)
# accept the identification and trigger an event
conn.answer(Packets.AcceptIdentification(
NodeTypes.STORAGE, uuid and app.uuid, uuid))
handler.connectionCompleted(conn)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/initialization.py 0000664 0000000 0000000 00000006170 13465024610 0030675 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from . import BaseMasterHandler
from neo.lib import logging
from neo.lib.protocol import Packets, ProtocolError, ZERO_TID
class InitializationHandler(BaseMasterHandler):
def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
app = self.app
pt = app.pt
pt.load(ptid, num_replicas, row_list, app.nm)
if not pt.filled():
raise ProtocolError('Partial partition table received')
# Install the partition table into the database for persistence.
cell_list = []
unassigned = range(pt.getPartitions())
for offset in reversed(unassigned):
for cell in pt.getCellList(offset):
cell_list.append((offset, cell.getUUID(), cell.getState()))
if cell.getUUID() == app.uuid:
unassigned.remove(offset)
# delete objects database
dm = app.dm
if unassigned:
if app.disable_drop_partitions:
logging.info('partitions %r are discarded but actual deletion'
' of data is disabled', unassigned)
else:
logging.debug('drop data for partitions %r', unassigned)
dm.dropPartitions(unassigned)
dm.changePartitionTable(ptid, num_replicas, cell_list, reset=True)
dm.commit()
def truncate(self, conn, tid):
dm = self.app.dm
dm._setBackupTID(None)
dm._setTruncateTID(tid)
dm.commit()
def askRecovery(self, conn):
app = self.app
conn.answer(Packets.AnswerRecovery(
app.pt.getID(),
app.dm.getBackupTID(),
app.dm.getTruncateTID()))
def askLastIDs(self, conn):
dm = self.app.dm
dm.truncate()
ltid, loid = dm.getLastIDs()
conn.answer(Packets.AnswerLastIDs(loid, ltid))
def askPartitionTable(self, conn):
pt = self.app.pt
conn.answer(Packets.AnswerPartitionTable(
pt.getID(), pt.getReplicas(), pt.getRowList()))
def askLockedTransactions(self, conn):
conn.answer(Packets.AnswerLockedTransactions(
self.app.dm.getUnfinishedTIDDict()))
def validateTransaction(self, conn, ttid, tid):
dm = self.app.dm
dm.lockTransaction(tid, ttid)
dm.unlockTransaction(tid, ttid, True, True)
dm.commit()
def startOperation(self, conn, backup):
# XXX: see comment in protocol
self.app.operational = True
self.app.replicator.startOperation(backup)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/master.py 0000664 0000000 0000000 00000003732 13465024610 0027142 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from neo.lib import logging
from neo.lib.util import dump
from neo.lib.protocol import Packets, ZERO_TID
from . import BaseMasterHandler
class MasterOperationHandler(BaseMasterHandler):
""" This handler is used for the primary master """
def startOperation(self, conn, backup):
# XXX: see comment in protocol
assert self.app.operational and backup
self.app.replicator.startOperation(backup)
def askLockInformation(self, conn, ttid, tid):
self.app.tm.lock(ttid, tid)
conn.answer(Packets.AnswerInformationLocked(ttid))
def notifyUnlockInformation(self, conn, ttid):
self.app.tm.unlock(ttid)
def askPack(self, conn, tid):
app = self.app
logging.info('Pack started, up to %s...', dump(tid))
app.dm.pack(tid, app.tm.updateObjectDataForPack)
logging.info('Pack finished.')
conn.answer(Packets.AnswerPack(True))
def answerUnfinishedTransactions(self, conn, *args, **kw):
self.app.replicator.setUnfinishedTIDList(*args, **kw)
def replicate(self, conn, tid, upstream_name, source_dict):
self.app.replicator.backup(tid, {p: a and (a, upstream_name)
for p, a in source_dict.iteritems()})
def checkPartition(self, conn, *args):
self.app.checker(*args)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/handlers/storage.py 0000664 0000000 0000000 00000026141 13465024610 0027312 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import weakref
from functools import wraps
from neo.lib.connection import ConnectionClosed
from neo.lib.handler import DelayEvent, EventHandler
from neo.lib.protocol import Errors, Packets, ProtocolError, ZERO_HASH
def checkConnectionIsReplicatorConnection(func):
def wrapper(self, conn, *args, **kw):
if self.app.replicator.isReplicatingConnection(conn):
return func(self, conn, *args, **kw)
return wraps(func)(wrapper)
def checkFeedingConnection(check):
def decorator(func):
def wrapper(self, conn, partition, *args, **kw):
app = self.app
cell = app.pt.getCell(partition, app.uuid)
if cell is None or (cell.isOutOfDate() if check else
not cell.isReadable()):
p = Errors.CheckingError if check else Errors.ReplicationError
return conn.answer(p("partition %u not readable" % partition))
conn.asServer()
return func(self, conn, partition, *args, **kw)
return wraps(func)(wrapper)
return decorator
class StorageOperationHandler(EventHandler):
"""This class handles events for replications."""
def connectionLost(self, conn, new_state):
app = self.app
if app.operational and conn.isClient():
uuid = conn.getUUID()
if uuid:
node = app.nm.getByUUID(uuid)
else:
node = app.nm.getByAddress(conn.getAddress())
node.setUnknown()
replicator = app.replicator
if replicator.current_node is node:
replicator.abort()
app.checker.connectionLost(conn)
# Client
def connectionFailed(self, conn):
if self.app.operational:
self.app.replicator.abort()
def _acceptIdentification(self, node, *args):
self.app.replicator.connected(node)
self.app.checker.connected(node)
@checkConnectionIsReplicatorConnection
def answerFetchTransactions(self, conn, pack_tid, next_tid, tid_list):
if tid_list:
deleteTransaction = self.app.dm.deleteTransaction
for tid in tid_list:
deleteTransaction(tid)
assert not pack_tid, "TODO"
if next_tid:
self.app.replicator.fetchTransactions(next_tid)
else:
self.app.replicator.fetchObjects()
@checkConnectionIsReplicatorConnection
def addTransaction(self, conn, tid, user, desc, ext, packed, ttid,
oid_list):
# Directly store the transaction.
self.app.dm.storeTransaction(tid, (),
(oid_list, user, desc, ext, packed, ttid), False)
@checkConnectionIsReplicatorConnection
def answerFetchObjects(self, conn, pack_tid, next_tid,
next_oid, object_dict):
if object_dict:
deleteObject = self.app.dm.deleteObject
for serial, oid_list in object_dict.iteritems():
for oid in oid_list:
deleteObject(oid, serial)
assert not pack_tid, "TODO"
if next_tid:
# TODO also provide feedback to master about current replication state (tid)
self.app.replicator.fetchObjects(next_tid, next_oid)
else:
# This will also commit.
self.app.replicator.finish()
@checkConnectionIsReplicatorConnection
def addObject(self, conn, oid, serial, compression,
checksum, data, data_serial):
dm = self.app.dm
if data or checksum != ZERO_HASH:
data_id = dm.storeData(checksum, oid, data, compression)
else:
data_id = None
# Directly store the transaction.
obj = oid, data_id, data_serial
dm.storeTransaction(serial, (obj,), None, False)
@checkConnectionIsReplicatorConnection
def replicationError(self, conn, message):
self.app.replicator.abort('source message: ' + message)
def checkingError(self, conn, message):
try:
self.app.checker.connectionLost(conn)
finally:
self.app.closeClient(conn)
@property
def answerCheckTIDRange(self):
return self.app.checker.checkRange
@property
def answerCheckSerialRange(self):
return self.app.checker.checkRange
# Server (all methods must set connection as server so that it isn't closed
# if client tasks are finished)
#
# These are all low-priority packets, in that we don't want to delay
# answers to clients, so tasks are used to postpone work when we're idle.
def getEventQueue(self):
return self.app.tm.read_queue
@checkFeedingConnection(check=True)
def askCheckTIDRange(self, conn, *args):
app = self.app
if app.tm.isLockedTid(args[3]): # max_tid
raise DelayEvent
msg_id = conn.getPeerId()
conn = weakref.proxy(conn)
def check():
r = app.dm.checkTIDRange(*args)
try:
conn.send(Packets.AnswerCheckTIDRange(*r), msg_id)
except (weakref.ReferenceError, ConnectionClosed):
pass
# Splitting this task would cause useless overhead. However, a
# generator function is expected, hence the following fake yield
# so that iteration stops immediately.
return; yield
app.newTask(check())
@checkFeedingConnection(check=True)
def askCheckSerialRange(self, conn, *args):
app = self.app
if app.tm.isLockedTid(args[3]): # max_tid
raise ProtocolError("transactions must be checked before objects")
msg_id = conn.getPeerId()
conn = weakref.proxy(conn)
def check():
r = app.dm.checkSerialRange(*args)
try:
conn.send(Packets.AnswerCheckSerialRange(*r), msg_id)
except (weakref.ReferenceError, ConnectionClosed):
pass
return; yield # same as in askCheckTIDRange
app.newTask(check())
@checkFeedingConnection(check=False)
def askFetchTransactions(self, conn, partition, length, min_tid, max_tid,
tid_list):
app = self.app
if app.tm.isLockedTid(max_tid):
# Wow, backup cluster is fast. Requested transactions are still in
# ttrans/ttobj so wait a little.
# This can also happen for internal replication, when
# NotifyTransactionFinished(M->S) + AskFetchTransactions(S->S)
# is faster than
# NotifyUnlockInformation(M->S)
raise DelayEvent
msg_id = conn.getPeerId()
conn = weakref.proxy(conn)
peer_tid_set = set(tid_list)
dm = app.dm
tid_list = dm.getReplicationTIDList(min_tid, max_tid, length + 1,
partition)
next_tid = tid_list.pop() if length < len(tid_list) else None
def push():
try:
pack_tid = None # TODO
for tid in tid_list:
if tid in peer_tid_set:
peer_tid_set.remove(tid)
else:
t = dm.getTransaction(tid)
if t is None:
conn.send(Errors.ReplicationError(
"partition %u dropped"
% partition), msg_id)
return
oid_list, user, desc, ext, packed, ttid = t
# Sending such packet does not mark the connection
# for writing if there's too little data in the buffer.
conn.send(Packets.AddTransaction(tid, user,
desc, ext, bool(packed), ttid, oid_list), msg_id)
# To avoid delaying several connections simultaneously,
# and also prevent the backend from scanning different
# parts of the DB at the same time, we ask the
# scheduler not to switch to another background task.
# Ideally, we are filling a buffer while the kernel
# is flushing another one for a concurrent connection.
yield conn.buffering
conn.send(Packets.AnswerFetchTransactions(
pack_tid, next_tid, peer_tid_set), msg_id)
yield
except (weakref.ReferenceError, ConnectionClosed):
pass
app.newTask(push())
@checkFeedingConnection(check=False)
def askFetchObjects(self, conn, partition, length, min_tid, max_tid,
min_oid, object_dict):
app = self.app
if app.tm.isLockedTid(max_tid):
raise ProtocolError("transactions must be fetched before objects")
msg_id = conn.getPeerId()
conn = weakref.proxy(conn)
dm = app.dm
object_list = dm.getReplicationObjectList(min_tid, max_tid, length + 1,
partition, min_oid)
if length < len(object_list):
next_tid, next_oid = object_list.pop()
else:
next_tid = next_oid = None
def push():
try:
pack_tid = None # TODO
for serial, oid in object_list:
oid_set = object_dict.get(serial)
if oid_set:
if type(oid_set) is tuple:
object_dict[serial] = oid_set = set(oid_set)
if oid in oid_set:
oid_set.remove(oid)
if not oid_set:
del object_dict[serial]
continue
object = dm.fetchObject(oid, serial)
if not object:
conn.send(Errors.ReplicationError(
"partition %u dropped or truncated"
% partition), msg_id)
return
if not object[2]: # creation undone
object = object[0], 0, ZERO_HASH, '', object[4]
# Same as in askFetchTransactions.
conn.send(Packets.AddObject(oid, *object), msg_id)
yield conn.buffering
conn.send(Packets.AnswerFetchObjects(
pack_tid, next_tid, next_oid, object_dict), msg_id)
yield
except (weakref.ReferenceError, ConnectionClosed):
pass
app.newTask(push())
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/replicator.py 0000664 0000000 0000000 00000050402 13465024610 0026207 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
"""
Replication algorithm
Purpose: replicate the content of a reference node into a replicating node,
bringing it up-to-date. This happens in the following cases:
- A new storage is added to en existing cluster.
- A node was separated from cluster and rejoins it.
- In a backup cluster, the master notifies a node that new data exists upstream
(note that in this case, the cell is always marked as UP_TO_DATE).
Replication happens per partition. Reference node can change between
partitions.
2 parts, done sequentially:
- Transaction (metadata) replication
- Object (metadata+data) replication
Both parts follow the same mechanism:
- The range of data to replicate is split into chunks of FETCH_COUNT items
(transaction or object).
- For every chunk, the requesting node sends to seeding node the list of items
it already has.
- Before answering, the seeding node sends 1 packet for every missing item.
For items that are already on the replicating node, there is no check that
values matches.
- The seeding node finally answers with the list of items to delete (usually
empty).
Internal replication, which is similar to RAID1 (and as opposed to asynchronous
replication to a backup cluster) requires extra care with respect to
transactions. The transition of a cell from OUT_OF_DATE to UP_TO_DATE is done
is several steps.
A replicating node can not depend on other nodes to fetch the data
recently/being committed because that can not be done atomically: it could miss
writes between the processing of its request by a source node and the reception
of the answer.
Therefore, outdated cells are writable: a storage node asks the master for
transactions being committed and then it is expected to fully receive from the
client any transaction that is started after this answer.
Which has in turn other consequences:
- The client must not fail to write to a storage node after the above request
to the master: for this, the storage must have announced it is ready, and it
must delay identification of unknown clients (those for which it hasn't
received yet a notification from the master).
- Writes must be accepted blindly (i.e. without taking a write-lock) when a
storage node lacks the data to check for conflicts. This is possible because
1 up-to-date cell (for each partition) is enough to do these checks.
- Because the client can not reliably know if a storage node is expected to
receive a transaction in full, all writes must succeed.
- Even if the replication is finished, we have to wait that we don't have any
lockless writes left before announcing to the master that we're up-to-date.
To sum up:
1. ask unfinished transactions -> (last_transaction, ttid_list)
2. replicate to last_transaction
3. wait for all ttid_list to be finished -> new last_transaction
4. replicate to last_transaction
5. no lockless write anymore, except to (oid, ttid) that were already
stored/checked without taking a lock
6. wait for all transactions with lockless writes to be finished
7. announce we're up-to-date
For any failed write, the client marks the storage node as failed and stops
writing to it for the transaction. Unless there's no failed write, vote ends
with an extra request to the master: the transaction will only succeed if the
failed nodes can be disconnected, forcing them to replicate the missing data.
TODO: Packing and replication currently fail when they happen at the same time.
"""
import random
from neo.lib import logging
from neo.lib.protocol import CellStates, NodeTypes, NodeStates, \
Packets, INVALID_TID, ZERO_TID, ZERO_OID
from neo.lib.connection import ClientConnection, ConnectionClosed
from neo.lib.util import add64, dump, p64
from .handlers.storage import StorageOperationHandler
FETCH_COUNT = 1000
class Partition(object):
__slots__ = 'next_trans', 'next_obj', 'max_ttid'
def __repr__(self):
return '<%s(%s) at 0x%x>' % (self.__class__.__name__,
', '.join('%s=%r' % (x, getattr(self, x)) for x in self.__slots__
if hasattr(self, x)),
id(self))
class Replicator(object):
# When the replication of a partition is aborted, the connection to the
# feeding node may still be open, e.g. on PT update from the master. In
# such case, replication is also aborted on the other side but there may
# be a few incoming packets that must be discarded.
_conn_msg_id = None
current_node = None
current_partition = None
def __init__(self, app):
self.app = app
def getCurrentConnection(self):
node = self.current_node
if node is not None and node.isConnected(True):
return node.getConnection()
def isReplicatingConnection(self, conn):
return conn is self.getCurrentConnection() and \
conn.getPeerId() == self._conn_msg_id
def setUnfinishedTIDList(self, max_tid, ttid_list, offset_list):
"""This is a callback from MasterOperationHandler."""
assert self.ttid_set.issubset(ttid_list), (self.ttid_set, ttid_list)
if ttid_list:
self.ttid_set.update(ttid_list)
max_ttid = max(ttid_list)
else:
max_ttid = None
for offset in offset_list:
self.partition_dict[offset].max_ttid = max_ttid
self.replicate_dict[offset] = max_tid
self._nextPartition()
def transactionFinished(self, ttid, max_tid=None):
""" Callback from MasterOperationHandler """
try:
self.ttid_set.remove(ttid)
except KeyError:
assert max_tid is None, max_tid
return
min_ttid = min(self.ttid_set) if self.ttid_set else INVALID_TID
for offset, p in self.partition_dict.iteritems():
if p.max_ttid:
if max_tid:
# Filling replicate_dict while there are still unfinished
# transactions for this partition is not the most
# efficient (due to the overhead of potentially replicating
# the last transactions in several times), but that's a
# simple way to make sure it is filled even if the
# remaining unfinished transactions are aborted.
self.replicate_dict[offset] = max_tid
if p.max_ttid < min_ttid:
# no more unfinished transaction for this partition
if not (offset == self.current_partition
or offset in self.replicate_dict):
logging.debug(
"All unfinished transactions have been aborted."
" Mark partition %u as already fully replicated",
offset)
# We don't have anymore the previous value of
# self.replicate_dict[offset], but p.max_ttid is not
# wrong. Anyway here, we're not in backup mode and this
# value will be ignored.
# XXX: see NonReadableCell.__doc__
self.app.tm.replicated(offset, p.max_ttid)
p.max_ttid = None
self._nextPartition()
def getBackupTID(self):
outdated_set = set(self.app.pt.getOutdatedOffsetListFor(self.app.uuid))
tid = INVALID_TID
for offset, p in self.partition_dict.iteritems():
if offset not in outdated_set:
tid = min(tid, p.next_trans, p.next_obj)
if ZERO_TID != tid != INVALID_TID:
return add64(tid, -1)
return ZERO_TID
def updateBackupTID(self, commit=False):
dm = self.app.dm
tid = dm.getBackupTID()
if tid:
new_tid = self.getBackupTID()
if tid != new_tid:
dm._setBackupTID(new_tid)
if commit:
dm.commit()
def startOperation(self, backup):
dm = self.app.dm
if backup:
if dm.getBackupTID():
assert not hasattr(self, 'partition_dict'), self.partition_dict
return
tid = dm.getLastIDs()[0] or ZERO_TID
else:
tid = None
dm._setBackupTID(tid)
dm.commit()
try:
partition_dict = self.partition_dict
except AttributeError:
return
for offset, next_tid in dm.iterCellNextTIDs():
if type(next_tid) is not bytes: # readable
p = partition_dict[offset]
p.next_trans, p.next_obj = next_tid
def populate(self):
self.partition_dict = {}
self.replicate_dict = {}
self.source_dict = {}
self.ttid_set = set()
outdated_list = []
for offset, next_tid in self.app.dm.iterCellNextTIDs():
self.partition_dict[offset] = p = Partition()
if type(next_tid) is bytes: # OUT_OF_DATE
outdated_list.append(offset)
p.next_trans = p.next_obj = next_tid
p.max_ttid = INVALID_TID
else: # readable
p.next_trans, p.next_obj = next_tid or (None, None)
p.max_ttid = None
if outdated_list:
self.app.tm.replicating(outdated_list)
def notifyPartitionChanges(self, cell_list):
"""This is a callback from MasterOperationHandler."""
abort = False
added_list = []
discarded_list = []
readable_list = []
app = self.app
for offset, uuid, state in cell_list:
if uuid == app.uuid:
if state in (CellStates.DISCARDED, CellStates.CORRUPTED):
try:
del self.partition_dict[offset]
except KeyError:
continue
self.replicate_dict.pop(offset, None)
self.source_dict.pop(offset, None)
abort = abort or self.current_partition == offset
discarded_list.append(offset)
elif state == CellStates.OUT_OF_DATE:
assert offset not in self.partition_dict
self.partition_dict[offset] = p = Partition()
# New cell. 0 is also what should be stored by the backend.
# Nothing to optimize.
p.next_trans = p.next_obj = ZERO_TID
p.max_ttid = INVALID_TID
added_list.append(offset)
else:
assert state in (CellStates.UP_TO_DATE,
CellStates.FEEDING), state
readable_list.append(offset)
tm = app.tm
if added_list:
tm.replicating(added_list)
if discarded_list:
tm.discarded(discarded_list)
if readable_list:
tm.readable(readable_list)
if abort:
self.abort()
def backup(self, tid, source_dict):
next_tid = None
for offset, source in source_dict.iteritems():
if source:
self.source_dict[offset] = source
self.replicate_dict[offset] = tid
elif offset != self.current_partition and \
offset not in self.replicate_dict:
# The master did its best to avoid useless replication orders
# but there may still be a few, and we may receive redundant
# update notification of backup_tid.
# So, we do nothing here if we are already replicating.
p = self.partition_dict[offset]
if not next_tid:
next_tid = add64(tid, 1)
p.next_trans = p.next_obj = next_tid
if next_tid:
self.updateBackupTID(True)
self._nextPartition()
def _nextPartitionSortKey(self, offset):
p = self.partition_dict[offset]
return p.next_obj, bool(p.max_ttid)
def _nextPartition(self):
# XXX: One connection to another storage may remain open forever.
# All other previous connections are automatically closed
# after some time of inactivity.
# This should be improved in several ways:
# - Keeping connections open between 2 clusters (backup case) is
# quite a good thing because establishing a connection costs
# time/bandwidth and replication is actually never finished.
# - When all storages of a non-backup cluster are up-to-date,
# there's no reason to keep any connection open.
if self.current_partition is not None or not self.replicate_dict:
return
app = self.app
assert app.master_conn and app.operational, (
app.master_conn, app.operational)
# Start replicating the partition which is furthest behind,
# to increase the overall backup_tid as soon as possible.
# Then prefer a partition with no unfinished transaction.
# XXX: When leaving backup mode, we should only consider UP_TO_DATE
# cells.
offset = min(self.replicate_dict, key=self._nextPartitionSortKey)
try:
addr, name = self.source_dict[offset]
except KeyError:
assert app.pt.getCell(offset, app.uuid).isOutOfDate(), (
offset, app.pt.getCell(offset, app.uuid).getState())
node = random.choice([cell.getNode()
for cell in app.pt.getCellList(offset, readable=True)
if cell.getNodeState() == NodeStates.RUNNING])
name = None
else:
node = app.nm.getByAddress(addr)
if node is None:
assert name, addr
node = app.nm.createStorage(address=addr)
self.current_partition = offset
previous_node = self.current_node
self.current_node = node
if node.isConnected(connecting=True):
if node.isIdentified():
node.getConnection().asClient()
self.fetchTransactions()
else:
assert name or node.getUUID() != app.uuid, "loopback connection"
conn = ClientConnection(app, StorageOperationHandler(app), node)
try:
conn.ask(Packets.RequestIdentification(NodeTypes.STORAGE,
None if name else app.uuid, app.server, name or app.name,
app.id_timestamp, (), ()))
except ConnectionClosed:
if previous_node is self.current_node:
return
if previous_node is not None and previous_node.isConnected():
app.closeClient(previous_node.getConnection())
def connected(self, node):
if self.current_node is node and self.current_partition is not None:
self.fetchTransactions()
def fetchTransactions(self, min_tid=None):
assert self.current_node.getConnection().isClient(), self.current_node
offset = self.current_partition
p = self.partition_dict[offset]
if min_tid:
# More than one chunk ? This could be a full replication so avoid
# restarting from the beginning by committing now.
self.app.dm.commit()
p.next_trans = min_tid
else:
try:
addr, name = self.source_dict[offset]
except KeyError:
pass
else:
if addr != self.current_node.getAddress():
return self.abort()
min_tid = p.next_trans
self.replicate_tid = self.replicate_dict.pop(offset)
logging.debug("starting replication of from %r", offset, dump(min_tid),
dump(self.replicate_tid), self.current_node)
max_tid = self.replicate_tid
tid_list = self.app.dm.getReplicationTIDList(min_tid, max_tid,
FETCH_COUNT, offset)
self._conn_msg_id = self.current_node.ask(Packets.AskFetchTransactions(
offset, FETCH_COUNT, min_tid, max_tid, tid_list))
def fetchObjects(self, min_tid=None, min_oid=ZERO_OID):
offset = self.current_partition
p = self.partition_dict[offset]
max_tid = self.replicate_tid
dm = self.app.dm
if min_tid:
p.next_obj = min_tid
self.updateBackupTID()
dm.updateCellTID(offset, add64(min_tid, -1))
dm.commit() # like in fetchTransactions
else:
min_tid = p.next_obj
p.next_trans = add64(max_tid, 1)
object_dict = {}
for serial, oid in dm.getReplicationObjectList(min_tid,
max_tid, FETCH_COUNT, offset, min_oid):
try:
object_dict[serial].append(oid)
except KeyError:
object_dict[serial] = [oid]
self._conn_msg_id = self.current_node.ask(Packets.AskFetchObjects(
offset, FETCH_COUNT, min_tid, max_tid, min_oid, object_dict))
def finish(self):
offset = self.current_partition
tid = self.replicate_tid
del self.current_partition, self._conn_msg_id, self.replicate_tid
p = self.partition_dict[offset]
p.next_obj = add64(tid, 1)
self.updateBackupTID()
app = self.app
app.dm.updateCellTID(offset, tid)
app.dm.commit()
if p.max_ttid or offset in self.replicate_dict and \
offset not in self.source_dict:
logging.debug("unfinished transactions: %r", self.ttid_set)
else:
app.tm.replicated(offset, tid)
logging.debug("partition %u replicated up to %s from %r",
offset, dump(tid), self.current_node)
self.getCurrentConnection().setReconnectionNoDelay()
self._nextPartition()
def abort(self, message=''):
offset = self.current_partition
if offset is None:
return
del self.current_partition
self._conn_msg_id = None
logging.warning('replication aborted for partition %u%s',
offset, message and ' (%s)' % message)
if offset in self.partition_dict:
# XXX: Try another partition if possible, to increase probability to
# connect to another node. It would be better to explicitly
# search for another node instead.
tid = self.replicate_dict.pop(offset, None) or self.replicate_tid
if self.replicate_dict:
self._nextPartition()
self.replicate_dict[offset] = tid
else:
self.replicate_dict[offset] = tid
self._nextPartition()
else: # partition removed
self._nextPartition()
def stop(self):
# Close any open connection to an upstream storage,
# possibly aborting current replication.
node = self.current_node
if node is not None is node.getUUID():
offset = self.current_partition
if offset is not None:
logging.info('cancel replication of partition %u', offset)
del self.current_partition
if self._conn_msg_id is not None:
self.replicate_dict.setdefault(offset, self.replicate_tid)
del self._conn_msg_id, self.replicate_tid
self.getCurrentConnection().close()
# Cancel all replication orders from upstream cluster.
for offset in self.replicate_dict.keys():
addr, name = self.source_dict.get(offset, (None, None))
if name:
tid = self.replicate_dict.pop(offset)
logging.info('cancel replication of partition %u from %r'
' up to %s', offset, addr, dump(tid))
# Make UP_TO_DATE cells really UP_TO_DATE
self._nextPartition()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/shared_queue.py 0000664 0000000 0000000 00000014014 13465024610 0026514 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2018-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from msgpack import Packer, Unpacker
class Queue(object):
"""Unidirectional pipe for asynchronous and fast exchange of big amounts
of data between 2 processes.
It is implemented using shared memory, a few locks and msgpack
serialization. While the latter is faster than C pickle, it was mainly
chosen for its streaming API while deserializing, which greatly reduces
the locking overhead for the consumer process.
There is no mechanism to end a communication, so this information must be
in the exchanged data, for example by choosing a marker object like None:
- the last object sent by the producer is this marker
- the consumer stops iterating when it gets this marker
As long as there are data being exchanged, the 2 processes can't change
roles (producer/consumer).
"""
def __init__(self, max_size):
from multiprocessing import Lock, RawArray, RawValue
self._max_size = max_size
self._array = RawArray('c', max_size)
self._pos = RawValue('L')
self._size = RawValue('L')
self._locks = Lock(), Lock(), Lock()
def __repr__(self):
return "<%s pos=%s size=%s max_size=%s>" % (self.__class__.__name__,
self._pos.value, self._size.value, self._max_size)
def __iter__(self):
"""Iterate endlessly over all objects sent by the producer
Internally, this method uses a receiving buffer that is lost if
interrupted (GeneratorExit). If this buffer was not empty, the queue
is left in a inconsistent state and this method can't be called again.
So the correct way to split a loop is to first get an iterator
explicitly:
iq = iter(queue)
for x in iq:
if ...:
break
for x in iq:
...
"""
unpacker = Unpacker(use_list=False, raw=True)
feed = unpacker.feed
max_size = self._max_size
array = self._array
pos = self._pos
size = self._size
lock, get_lock, put_lock = self._locks
left = 0
while 1:
for data in unpacker:
yield data
while 1:
with lock:
p = pos.value
s = size.value
if s:
break
get_lock.acquire()
e = p + s
if e < max_size:
feed(array[p:e])
else:
feed(array[p:])
e -= max_size
feed(array[:e])
with lock:
pos.value = e
n = size.value
size.value = n - s
if n == max_size:
put_lock.acquire(0)
put_lock.release()
def __call__(self, iterable):
"""Fill the queue with given objects
Hoping than msgpack.Packer gets a streaming API, 'iterable' should not
be split (i.e. this method should be called only once, like __iter__).
"""
pack = Packer(use_bin_type=True).pack
max_size = self._max_size
array = self._array
pos = self._pos
size = self._size
lock, get_lock, put_lock = self._locks
left = 0
for data in iterable:
data = pack(data)
n = len(data)
i = 0
while 1:
if not left:
while 1:
with lock:
p = pos.value
j = size.value
left = max_size - j
if left:
break
put_lock.acquire()
p += j
if p >= max_size:
p -= max_size
e = min(p + min(n, left), max_size)
j = e - p
array[p:e] = data[i:i+j]
n -= j
i += j
with lock:
p = pos.value
s = size.value
j += s
size.value = j
if not s:
get_lock.acquire(0)
get_lock.release()
p += j
if p >= max_size:
p -= max_size
left = max_size - j
if not n:
break
def test(self):
import multiprocessing, random, sys, threading
from traceback import print_tb
r = range(50)
random.shuffle(r)
for P in threading.Thread, multiprocessing.Process:
q = Queue(23)
def t():
for n in xrange(len(r)):
yield '.' * n
yield
for n in r:
yield '.' * n
i = j = 0
p = P(target=q, args=(t(),))
p.daemon = 1
p.start()
try:
q = iter(q)
for i, x in enumerate(q):
if x is None:
break
self.assertEqual(x, '.' * i)
self.assertEqual(i, len(r))
for j in r:
self.assertEqual(next(q), '.' * j)
except KeyboardInterrupt:
print_tb(sys.exc_info()[2])
self.fail((i, j))
p.join()
if __name__ == '__main__':
import unittest
unittest.TextTestRunner().run(type('', (unittest.TestCase,), {
'runTest': test})())
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/storage/transactions.py 0000664 0000000 0000000 00000070433 13465024610 0026561 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2010-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from time import time
from neo.lib import logging
from neo.lib.handler import DelayEvent, EventQueue
from neo.lib.util import dump
from neo.lib.protocol import Packets, ProtocolError, NonReadableCell, \
uuid_str, MAX_TID, ZERO_TID
class ConflictError(Exception):
"""
Raised when a resolvable conflict occurs
Argument: tid of locking transaction or latest revision
"""
def __init__(self, tid):
Exception.__init__(self)
self.tid = tid
class NotRegisteredError(Exception):
"""
Raised when a ttid is not registered
"""
class Transaction(object):
"""
Container for a pending transaction
"""
_delayed = {}
tid = None
voted = 0
def __init__(self, uuid, ttid):
self._birth = time()
self.locking_tid = ttid
self.uuid = uuid
self.serial_dict = {}
self.store_dict = {}
# Remember the oids for which we didn't check for conflict. This is
# used primarily to know when to declare a cell really readable once
# the replication is finished.
self.lockless = set()
def __repr__(self):
return "<%s(%s, locking_tid=%s, tid=%s, age=%.2fs) at 0x%x>" % (
self.__class__.__name__,
uuid_str(self.uuid),
dump(self.locking_tid),
dump(self.tid),
time() - self._birth,
id(self))
def __lt__(self, other):
return self.locking_tid < other.locking_tid
def logDelay(self, ttid, locked, oid_serial):
if self._delayed.get(oid_serial) != locked:
if self._delayed:
self._delayed[oid_serial] = locked
else:
self._delayed = {oid_serial: locked}
logging.info('Lock delayed for %s:%s by %s',
dump(oid_serial[0]), dump(ttid), dump(locked))
def store(self, oid, data_id, value_serial):
"""
Add an object to the transaction
"""
self.store_dict[oid] = oid, data_id, value_serial
class TransactionManager(EventQueue):
"""
Manage pending transaction and locks
"""
def __init__(self, app):
EventQueue.__init__(self)
self.read_queue = EventQueue()
self._app = app
self._transaction_dict = {}
self._store_lock_dict = {}
self._load_lock_dict = {}
self._replicated = {}
self._replicating = set()
def getPartition(self, oid):
from neo.lib.util import u64
np = self._app.pt.getPartitions()
self.getPartition = lambda oid: u64(oid) % np
return self.getPartition(oid)
def discarded(self, offset_list):
self._replicating.difference_update(offset_list)
for offset in offset_list:
self._replicated.pop(offset, None)
getPartition = self.getPartition
for oid_dict in self._load_lock_dict, self._store_lock_dict:
for oid in oid_dict.keys():
if getPartition(oid) in offset_list:
del oid_dict[oid]
data_id_list = []
for transaction in self._transaction_dict.itervalues():
serial_dict = transaction.serial_dict
oid_list = [oid for oid in serial_dict
if getPartition(oid) in offset_list]
for oid in oid_list:
del serial_dict[oid]
try:
data_id_list.append(transaction.store_dict.pop(oid)[1])
except KeyError:
pass
transaction.lockless.difference_update(oid_list)
self._app.dm.dropPartitionsTemporary(offset_list)
self._app.dm.releaseData(data_id_list, True)
# notifyPartitionChanges will commit
self.executeQueuedEvents()
self.read_queue.executeQueuedEvents()
def readable(self, offset_list):
for offset in offset_list:
tid = self._replicated.pop(offset, None)
assert tid is None, (offset, tid)
def replicating(self, offset_list):
self._replicating.update(offset_list)
isdisjoint = set(offset_list).isdisjoint
assert isdisjoint(self._replicated), (offset_list, self._replicated)
assert isdisjoint(map(self.getPartition, self._store_lock_dict)), (
offset_list, self._store_lock_dict)
p = Packets.AskUnfinishedTransactions(offset_list)
self._app.master_conn.ask(p, offset_list=offset_list)
def replicated(self, partition, tid):
# also called for readable cells in BACKINGUP state
self._replicating.discard(partition)
self._replicated[partition] = tid
self._notifyReplicated()
def _notifyReplicated(self):
getPartition = self.getPartition
store_lock_dict = self._store_lock_dict
replicated = self._replicated
notify = {x[0] for x in replicated.iteritems() if x[1]}
# We sort transactions so that in case of multiple stores/checks
# for the same oid, the lock is taken by the highest locking ttid,
# which will delay new transactions.
for txn, ttid in sorted((txn, ttid) for ttid, txn in
self._transaction_dict.iteritems()):
assert txn.lockless.issubset(txn.serial_dict), (
ttid, txn.lockless, txn.serial_dict)
for oid in txn.lockless:
partition = getPartition(oid)
if replicated.get(partition):
if store_lock_dict.get(oid, ttid) != ttid:
# We have a "multi-lock" store, i.e. an
# initially-lockless store to a partition that became
# replicated.
notify.discard(partition)
store_lock_dict[oid] = ttid
if notify:
# For these partitions, all oids of all pending transactions
# are now locked normally and we don't rely anymore on other
# readable cells to check locks: we're really up-to-date.
for partition in notify:
self._app.master_conn.send(Packets.NotifyReplicationDone(
partition, replicated[partition]))
replicated[partition] = None
for oid in [oid for oid in store_lock_dict
if getPartition(oid) in notify]:
ttid = store_lock_dict.pop(oid)
txn = self._transaction_dict[ttid]
# Use 'discard' instead of 'remove', for oids that were
# locked after that the partition was replicated.
txn.lockless.discard(oid)
try:
locked = self.lockObject(ttid, txn.serial_dict[oid], oid)
except ConflictError:
self._unstore(txn, oid)
except (DelayEvent, NonReadableCell), e: # pragma: no cover
raise AssertionError(e)
else:
assert locked, (oid, ttid, txn)
def register(self, conn, ttid):
"""
Register a transaction, it may be already registered
"""
if ttid not in self._transaction_dict:
uuid = conn.getUUID()
logging.debug('Register TXN %s for %s', dump(ttid), uuid_str(uuid))
self._transaction_dict[ttid] = Transaction(uuid, ttid)
def getObjectFromTransaction(self, ttid, oid):
"""
Return object data for given running transaction.
Return None if not found.
"""
try:
return self._transaction_dict[ttid].store_dict[oid]
except KeyError:
return None
def _rebase(self, transaction, ttid, locking_tid=MAX_TID):
# With the default value of locking_tid, this marks the transaction as
# being rebased, in case that the current lock is released (the other
# transaction is aborted or committed) before the client sends us a new
# locking tid: in lockObject, 'locked' will be None but we'll still
# have to delay the store.
transaction.locking_tid = locking_tid
if ttid:
# Remove store locks we have.
# In order to keep all locking data consistent, this must be done
# when the locking tid changes, i.e. from both 'lockObject' (for
# the node that triggered the deadlock) and 'rebase' (for other
# nodes).
for oid, locked in self._store_lock_dict.items():
# If this oid is locked by several transactions (all lockless),
# the following condition is true if we have the highest ttid,
# but in either case, _notifyReplicated will be called below,
# fixing the store lock.
if locked == ttid:
del self._store_lock_dict[oid]
# However here, we don't try to remember lockless writes: later,
# we may give write-locks to oids that would normally conflict.
# Readable cells prevent such scenario to go wrong.
lockless = transaction.lockless
if locking_tid == MAX_TID:
if lockless:
lockless.clear()
self._notifyReplicated()
else:
# There's nothing to rebase for lockless stores to replicating
# partitions because there's no lock taken yet. In other words,
# rebasing such stores would do nothing. Other lockless stores
# become normal ones: this transaction does not block anymore
# replicated partitions from being marked as UP_TO_DATE.
oid = [oid
for oid in lockless
if self.getPartition(oid) not in self._replicating]
if oid:
lockless.difference_update(oid)
self._notifyReplicated()
# Some locks were released, some pending locks may now succeed.
# We may even have delayed stores for this transaction, like the one
# that triggered the deadlock. They must also be sorted again because
# our locking tid has changed.
self.sortAndExecuteQueuedEvents()
def rebase(self, conn, ttid, locking_tid):
self.register(conn, ttid)
transaction = self._transaction_dict[ttid]
if transaction.voted:
raise ProtocolError("TXN %s already voted" % dump(ttid))
# First, get a set copy of serial_dict before _rebase locks oids.
lock_set = set(transaction.serial_dict)
self._rebase(transaction, transaction.locking_tid != MAX_TID and ttid,
locking_tid)
if transaction.locking_tid == MAX_TID:
# New deadlock. There's no point rebasing objects now.
return ()
# We return all oids that can't be relocked trivially
# (the client will use RebaseObject for these oids).
lock_set -= transaction.lockless # see comment in _rebase
recheck_set = lock_set.intersection(self._store_lock_dict)
lock_set -= recheck_set
for oid in lock_set:
try:
serial = transaction.serial_dict[oid]
except KeyError:
# An oid was already being rebased and delayed,
# and it got a conflict during the above call to _rebase.
continue
try:
self.lockObject(ttid, serial, oid)
except ConflictError:
recheck_set.add(oid)
except (DelayEvent, NonReadableCell), e: # pragma: no cover
raise AssertionError(e)
return recheck_set
def vote(self, ttid, txn_info=None):
"""
Store transaction information received from client node
"""
logging.debug('Vote TXN %s', dump(ttid))
try:
transaction = self._transaction_dict[ttid]
except KeyError:
raise ProtocolError("unknown ttid %s" % dump(ttid))
object_list = transaction.store_dict.itervalues()
if txn_info:
user, desc, ext, oid_list = txn_info
txn_info = oid_list, user, desc, ext, False, ttid
transaction.voted = 2
else:
transaction.voted = 1
# store metadata to temporary table
dm = self._app.dm
dm.storeTransaction(ttid, object_list, txn_info)
dm.commit()
def lock(self, ttid, tid):
"""
Lock a transaction
"""
logging.debug('Lock TXN %s (ttid=%s)', dump(tid), dump(ttid))
try:
transaction = self._transaction_dict[ttid]
except KeyError:
raise ProtocolError("unknown ttid %s" % dump(ttid))
assert transaction.tid is None, dump(transaction.tid)
assert ttid <= tid, (ttid, tid)
transaction.tid = tid
self._load_lock_dict.update(
dict.fromkeys(transaction.store_dict, ttid))
if transaction.voted == 2:
self._app.dm.lockTransaction(tid, ttid)
else:
assert transaction.voted
def unlock(self, ttid):
"""
Unlock transaction
"""
try:
transaction = self._transaction_dict[ttid]
except KeyError:
raise ProtocolError("unknown ttid %s" % dump(ttid))
tid = transaction.tid
logging.debug('Unlock TXN %s (ttid=%s)', dump(tid), dump(ttid))
dm = self._app.dm
dm.unlockTransaction(tid, ttid,
transaction.voted == 2,
transaction.store_dict)
self._app.em.setTimeout(time() + 1, dm.deferCommit())
self.abort(ttid, even_if_locked=True)
def getFinalTID(self, ttid):
try:
return self._transaction_dict[ttid].tid
except KeyError:
return self._app.dm.getFinalTID(ttid)
def getLockingTID(self, oid):
return self._store_lock_dict.get(oid)
def lockObject(self, ttid, serial, oid):
"""
Take a write lock on given object, checking that "serial" is
current.
Raises:
DelayEvent
ConflictError
"""
transaction = self._transaction_dict[ttid]
if self.getPartition(oid) in self._replicating:
# We're out-of-date so maybe:
# - we don't have all data to check for conflicts
# - we missed stores/check that would lock this one
# However, this transaction may have begun after we started to
# replicate, and we're expected to store it in full.
# Since there's at least 1 other (readable) cell that will do this
# check, we accept this store/check without taking a lock.
if transaction.locking_tid == MAX_TID:
# Deadlock avoidance. Still no new locking_tid from the client.
raise DelayEvent(transaction)
transaction.lockless.add(oid)
return
locked = self._store_lock_dict.get(oid)
if locked:
other = self._transaction_dict[locked]
if other < transaction or other.voted:
# We have a bigger "TTID" than locking transaction, so we are
# younger: enter waiting queue so we are handled when lock gets
# released. We also want to delay (instead of conflict) if the
# client is so faster that it is committing another transaction
# before we processed UnlockInformation from the master.
# Or the locking transaction has already voted and there's no
# risk of deadlock if we delay.
transaction.logDelay(ttid, locked, (oid, serial))
# A client may have several stores delayed for the same oid
# but this is not a problem. EventQueue processes them in order
# and only the last one will not result in conflicts (that are
# already resolved).
raise DelayEvent(transaction)
if oid in transaction.lockless:
# This is a consequence of not having taken a lock during
# replication. After a ConflictError, we may be asked to "lock"
# it again. The current lock is a special one that only delays
# new transactions.
# For the cluster, we're still out-of-date and like above,
# at least 1 other (readable) cell checks for conflicts.
return
if other is not transaction:
# We have a smaller "TTID" than locking transaction, so we are
# older: this is a possible deadlock case, as we might already
# hold locks the younger transaction is waiting upon.
logging.info('Deadlock on %s:%s with %s',
dump(oid), dump(ttid), dump(locked))
# Ask master to give the client a new locking tid, which will
# be used to ask all involved storage nodes to rebase the
# already locked oids for this transaction.
self._app.master_conn.send(Packets.NotifyDeadlock(
ttid, transaction.locking_tid))
self._rebase(transaction, ttid)
raise DelayEvent(transaction)
# If previous store was an undo, next store must be based on
# undo target.
try:
previous_serial = transaction.store_dict[oid][2]
except KeyError:
# Similarly to below for store, cascaded deadlock for
# checkCurrentSerial is possible because rebase() may return
# oids for which previous rebaseObject are delayed, or being
# received, and the client will bindly resend them.
assert oid in transaction.serial_dict, transaction
logging.info('Transaction %s checking %s more than once',
dump(ttid), dump(oid))
return True
if previous_serial is None:
# 2 valid cases:
# - the previous undo resulted in a resolved conflict
# - cascaded deadlock resolution
# Otherwise, this should not happen. For example, when being
# disconnected by the master because we missed a transaction,
# a conflict may happen after a first store to us, but the
# resolution waits for invalidations from the master (to then
# load the saved data), which are sent after the notification
# we are down, and the client would stop writing to us.
logging.info('Transaction %s storing %s more than once',
dump(ttid), dump(oid))
return True
elif transaction.locking_tid == MAX_TID:
# Deadlock avoidance. Still no new locking_tid from the client.
raise DelayEvent(transaction)
else:
try:
previous_serial = self._app.dm.getLastObjectTID(oid)
except NonReadableCell:
partition = self.getPartition(oid)
if partition not in self._replicated:
# Either the partition is discarded or we haven't yet
# received the notification from the master that the
# partition is assigned to us. In the latter case, we're
# not expected to have the partition in full.
# We'll return a successful answer to the client, which
# is fine because there's at least one other cell that is
# readable for this oid.
raise
with self._app.dm.replicated(partition):
previous_serial = self._app.dm.getLastObjectTID(oid)
# Locking before reporting a conflict would speed up the case of
# cascading conflict resolution by avoiding incremental resolution,
# assuming that the time to resolve a conflict is often constant:
# "C+A vs. B -> C+A+B" rarely costs more than "C+A vs. C+B -> C+A+B".
# However, this would be against the optimistic principle of ZODB.
if previous_serial is not None and previous_serial != serial:
assert serial < previous_serial, (serial, previous_serial)
logging.info('Conflict on %s:%s with %s',
dump(oid), dump(ttid), dump(previous_serial))
raise ConflictError(previous_serial)
logging.debug('Transaction %s locking %s', dump(ttid), dump(oid))
self._store_lock_dict[oid] = ttid
return True
def checkCurrentSerial(self, ttid, oid, serial):
try:
transaction = self._transaction_dict[ttid]
except KeyError:
raise NotRegisteredError
assert oid not in transaction.serial_dict
locked = self.lockObject(ttid, serial, oid)
transaction.serial_dict[oid] = serial
if not locked:
return ZERO_TID
def storeObject(self, ttid, serial, oid, compression, checksum, data,
value_serial):
"""
Store an object received from client node
"""
try:
transaction = self._transaction_dict[ttid]
except KeyError:
raise NotRegisteredError
locked = self.lockObject(ttid, serial, oid)
if oid in transaction.serial_dict: # initially/still lockless, or undo
# XXX: We'd like to do that before calling lockObject,
# to release resources immediately (data, maybe lock)
# in case of delay/conflict.
# But keeping everything consistent is complicated.
self._unstore(transaction, oid)
transaction.serial_dict[oid] = serial
# store object
if data is None:
data_id = None
else:
data_id = self._app.dm.holdData(checksum, oid, data, compression)
transaction.store(oid, data_id, value_serial)
if not locked:
return ZERO_TID
def rebaseObject(self, ttid, oid):
try:
transaction = self._transaction_dict[ttid]
except KeyError:
logging.info('Forget rebase of %s by %s delayed by %s',
dump(oid), dump(ttid), dump(self.getLockingTID(oid)))
return
try:
serial = transaction.serial_dict[oid]
except KeyError:
# There was a previous rebase for this oid, it was still delayed
# during the second RebaseTransaction, and then a conflict was
# reported when another transaction was committed.
# This can also happen when a partition is dropped.
logging.info("no oid %s to rebase for transaction %s",
dump(oid), dump(ttid))
return
assert oid not in transaction.lockless, (oid, transaction.lockless)
try:
self.lockObject(ttid, serial, oid)
except ConflictError, e:
# Move the data back to the client for conflict resolution,
# since the client may not have it anymore.
return serial, e.tid, self._unstore(transaction, oid)
def _unstore(self, transaction, oid):
try:
data_id = transaction.store_dict.pop(oid)[1]
except KeyError: # check current
data = None
else:
if data_id is None:
data = None
else:
dm = self._app.dm
data = dm.loadData(data_id)
dm.releaseData([data_id], True)
del transaction.serial_dict[oid]
return data
def abort(self, ttid, even_if_locked=False):
"""
Abort a transaction
Releases locks held on all transaction objects, deletes Transaction
instance, and executed queued events.
Note: does not alter persistent content.
"""
if ttid not in self._transaction_dict:
assert not even_if_locked
# See how the master processes AbortTransaction from the client.
return
transaction = self._transaction_dict[ttid]
locked = transaction.tid
# if the transaction is locked, ensure we can drop it
if locked:
if not even_if_locked:
return
else:
logging.debug('Abort TXN %s', dump(ttid))
dm = self._app.dm
dm.abortTransaction(ttid)
dm.releaseData([x[1] for x in transaction.store_dict.itervalues()],
True)
dm.commit()
# unlock any object
for oid in transaction.serial_dict:
if locked:
lock_ttid = self._load_lock_dict.pop(oid, None)
assert lock_ttid in (ttid, None), ('Transaction %s tried'
' to release the lock on oid %s, but it was held by %s'
% (dump(ttid), dump(oid), dump(lock_ttid)))
try:
write_locking_tid = self._store_lock_dict[oid]
except KeyError:
# Lockless store (we are replicating this partition),
# or unresolved deadlock.
continue
if ttid == write_locking_tid:
del self._store_lock_dict[oid]
elif __debug__:
other = self._transaction_dict[write_locking_tid]
x = (oid, ttid, write_locking_tid,
self._replicated, transaction.lockless)
assert oid in other.serial_dict, x
if oid in transaction.lockless:
# Several lockless stores for this oid and among them,
# a higher ttid is still pending.
assert transaction < other, x
# There may remain a single lockless store so we'll need
# this partition to be checked in _notifyReplicated.
assert self._replicated.get(self.getPartition(oid)), x
else: # unresolved deadlock
assert not locked, x
# remove the transaction
del self._transaction_dict[ttid]
if self._replicated:
self._notifyReplicated()
# some locks were released, some pending locks may now succeed
self.read_queue.executeQueuedEvents()
self.executeQueuedEvents()
def abortFor(self, uuid, even_if_voted=False):
"""
Abort any non-locked transaction of a node
"""
# abort any non-locked transaction of this node
for ttid, transaction in self._transaction_dict.items():
if transaction.uuid == uuid and (
even_if_voted or not transaction.voted):
self.abort(ttid)
def isLockedTid(self, tid):
return any(None is not t.tid <= tid
for t in self._transaction_dict.itervalues())
def loadLocked(self, oid):
return oid in self._load_lock_dict
def log(self):
logging.info("Transactions:")
for ttid, txn in self._transaction_dict.iteritems():
logging.info(' %s %r', dump(ttid), txn)
logging.info(' Read locks:')
for oid, ttid in self._load_lock_dict.iteritems():
logging.info(' %s by %s', dump(oid), dump(ttid))
logging.info(' Write locks:')
for oid, ttid in self._store_lock_dict.iteritems():
logging.info(' %s by %s', dump(oid), dump(ttid))
self.logQueuedEvents()
self.read_queue.logQueuedEvents()
def updateObjectDataForPack(self, oid, orig_serial, new_serial, data_id):
lock_tid = self.getLockingTID(oid)
if lock_tid is not None:
transaction = self._transaction_dict[lock_tid]
if transaction.store_dict[oid][2] == orig_serial:
if new_serial:
data_id = None
else:
self._app.dm.holdData(data_id)
transaction.store(oid, data_id, new_serial)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/ 0000775 0000000 0000000 00000000000 13465024610 0023166 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/__init__.py 0000664 0000000 0000000 00000051257 13465024610 0025311 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import __builtin__
import errno
import functools
import gc
import os
import random
import socket
import subprocess
import sys
import tempfile
import unittest
import weakref
import MySQLdb
import transaction
from contextlib import contextmanager
from ConfigParser import SafeConfigParser
from cStringIO import StringIO
try:
from ZODB._compat import Unpickler
except ImportError:
from cPickle import Unpickler
from functools import wraps
from inspect import isclass
from .mock import Mock
from neo.lib import debug, logging, protocol
from neo.lib.protocol import NodeTypes, Packets, UUID_NAMESPACES
from neo.lib.util import cached_property
from time import time, sleep
from struct import pack, unpack
from unittest.case import _ExpectedFailure, _UnexpectedSuccess
try:
from transaction.interfaces import IDataManager
from ZODB.utils import newTid
from ZODB.ConflictResolution import PersistentReferenceFactory
except ImportError:
pass
def expectedFailure(exception=AssertionError):
def decorator(func):
def wrapper(*args, **kw):
try:
func(*args, **kw)
except exception, e:
# XXX: passing sys.exc_info() causes deadlocks
raise _ExpectedFailure((type(e), None, None))
raise _UnexpectedSuccess
return wraps(func)(wrapper)
if callable(exception) and not isinstance(exception, type):
func = exception
exception = Exception
return decorator(func)
return decorator
DB_PREFIX = os.getenv('NEO_DB_PREFIX', 'test_neo')
DB_ADMIN = os.getenv('NEO_DB_ADMIN', 'root')
DB_PASSWD = os.getenv('NEO_DB_PASSWD', '')
DB_USER = os.getenv('NEO_DB_USER', 'test')
DB_SOCKET = os.getenv('NEO_DB_SOCKET', '')
DB_INSTALL = os.getenv('NEO_DB_INSTALL', 'mysql_install_db')
DB_MYSQLD = os.getenv('NEO_DB_MYSQLD', '/usr/sbin/mysqld')
DB_MYCNF = os.getenv('NEO_DB_MYCNF')
IP_VERSION_FORMAT_DICT = {
socket.AF_INET: '127.0.0.1',
socket.AF_INET6: '::1',
}
ADDRESS_TYPE = socket.AF_INET
SSL = os.path.dirname(__file__) + os.sep
SSL = SSL + "ca.crt", SSL + "node.crt", SSL + "node.key"
logging.default_root_handler.handle = lambda record: None
debug.register()
def mockDefaultValue(name, function):
def method(self, *args, **kw):
if name in self.mockReturnValues:
return self.__getattr__(name)(*args, **kw)
return function(self, *args, **kw)
method.__name__ = name
setattr(Mock, name, method)
mockDefaultValue('__nonzero__', lambda self: self.__len__() != 0)
mockDefaultValue('__repr__', lambda self:
'<%s object at 0x%x>' % (self.__class__.__name__, id(self)))
mockDefaultValue('__str__', repr)
def buildUrlFromString(address):
try:
socket.inet_pton(socket.AF_INET6, address)
address = '[%s]' % address
except Exception:
pass
return address
def getTempDirectory():
"""get the current temp directory or a new one"""
try:
temp_dir = os.environ['TEMP']
except KeyError:
neo_dir = os.path.join(tempfile.gettempdir(), 'neo_tests')
while True:
temp_name = repr(time())
temp_dir = os.path.join(neo_dir, temp_name)
try:
os.makedirs(temp_dir)
break
except OSError, e:
if e.errno != errno.EEXIST:
raise
last = os.path.join(neo_dir, "last")
try:
os.remove(last)
except OSError, e:
if e.errno != errno.ENOENT:
raise
os.symlink(temp_name, last)
os.environ['TEMP'] = temp_dir
print 'Using temp directory %r.' % temp_dir
return temp_dir
def setupMySQLdb(db_list, clear_databases=True):
if mysql_pool:
return mysql_pool.setup(db_list, clear_databases)
from MySQLdb.constants.ER import BAD_DB_ERROR
user = DB_USER
password = ''
kw = {'unix_socket': os.path.expanduser(DB_SOCKET)} if DB_SOCKET else {}
conn = MySQLdb.connect(user=DB_ADMIN, passwd=DB_PASSWD, **kw)
cursor = conn.cursor()
for database in db_list:
try:
conn.select_db(database)
if not clear_databases:
continue
cursor.execute('DROP DATABASE `%s`' % database)
except MySQLdb.OperationalError, (code, _):
if code != BAD_DB_ERROR:
raise
cursor.execute('GRANT ALL ON `%s`.* TO "%s"@"localhost" IDENTIFIED'
' BY "%s"' % (database, user, password))
cursor.execute('CREATE DATABASE `%s`' % database)
cursor.close()
conn.commit()
conn.close()
return '{}:{}@%s{}'.format(user, password, DB_SOCKET).__mod__
class MySQLPool(object):
def __init__(self, pool_dir=None):
self._args = {}
self._mysqld_dict = {}
if not pool_dir:
pool_dir = getTempDirectory()
self._base = pool_dir + os.sep
self._sock_template = os.path.join(pool_dir, '%s', 'mysql.sock')
def __del__(self):
self.kill(*self._mysqld_dict)
def setup(self, db_list, clear_databases):
start_list = set(db_list).difference(self._mysqld_dict)
if start_list:
start_list = sorted(start_list)
x = []
with open(os.devnull, 'wb') as f:
for db in start_list:
base = self._base + db
datadir = os.path.join(base, 'datadir')
sock = self._sock_template % db
tmpdir = os.path.join(base, 'tmp')
args = [DB_INSTALL,
'--defaults-file=' + DB_MYCNF,
'--datadir=' + datadir,
'--socket=' + sock,
'--tmpdir=' + tmpdir,
'--log_error=' + os.path.join(base, 'error.log')]
if os.path.exists(datadir):
try:
os.remove(sock)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
os.makedirs(tmpdir)
x.append(subprocess.Popen(args,
stdout=f, stderr=subprocess.STDOUT))
args[0] = DB_MYSQLD
self._args[db] = args
for x in x:
x = x.wait()
if x:
raise subprocess.CalledProcessError(x, DB_INSTALL)
self.start(*start_list)
for db in start_list:
sock = self._sock_template % db
p = self._mysqld_dict[db]
while not os.path.exists(sock):
sleep(1)
x = p.poll()
if x is not None:
raise subprocess.CalledProcessError(x, DB_MYSQLD)
for db in db_list:
db = MySQLdb.connect(unix_socket=self._sock_template % db,
user='root')
if clear_databases:
db.query('DROP DATABASE IF EXISTS neo')
db.query('CREATE DATABASE IF NOT EXISTS neo')
db.close()
return ('root@neo' + self._sock_template).__mod__
def start(self, *db, **kw):
assert set(db).isdisjoint(self._mysqld_dict)
for db in db:
self._mysqld_dict[db] = subprocess.Popen(self._args[db], **kw)
def kill(self, *db):
processes = []
for db in db:
p = self._mysqld_dict.pop(db)
processes.append(p)
p.kill()
for p in processes:
p.wait()
mysql_pool = MySQLPool() if DB_MYCNF else None
def ImporterConfigParser(adapter, zodb, **kw):
cfg = SafeConfigParser()
cfg.add_section("neo")
cfg.set("neo", "adapter", adapter)
for x in kw.iteritems():
cfg.set("neo", *x)
for name, zodb in zodb:
cfg.add_section(name)
for x in zodb.iteritems():
cfg.set(name, *x)
return cfg
class NeoTestBase(unittest.TestCase):
maxDiff = None
def setUp(self):
logging.name = self.setupLog()
unittest.TestCase.setUp(self)
def setupLog(self):
test_case, logging.name = self.id().rsplit('.', 1)
logging.setup(os.path.join(getTempDirectory(), test_case + '.log'))
def tearDown(self):
assert self.tearDown.im_func is NeoTestBase.tearDown.im_func
self._tearDown(sys._getframe(1).f_locals['success'])
assert not gc.garbage, gc.garbage
def _tearDown(self, success):
# Kill all unfinished transactions for next test.
# Note we don't even abort them because it may require a valid
# connection to a master node (see Storage.sync()).
transaction.manager.__init__()
if logging._max_size is not None:
logging.flush()
class failureException(AssertionError):
def __init__(self, msg=None):
logging.error(msg)
AssertionError.__init__(self, msg)
failIfEqual = failUnlessEqual = assertEquals = assertNotEquals = None
def assertNotEqual(self, first, second, msg=None):
assert not (isinstance(first, Mock) or isinstance(second, Mock)), \
"Mock objects can't be compared with '==' or '!='"
return super(NeoTestBase, self).assertNotEqual(first, second, msg=msg)
def assertEqual(self, first, second, msg=None):
assert not (isinstance(first, Mock) or isinstance(second, Mock)), \
"Mock objects can't be compared with '==' or '!='"
return super(NeoTestBase, self).assertEqual(first, second, msg=msg)
def assertPartitionTable(self, pt, expected, key=None):
self.assertEqual(
expected if isinstance(expected, str) else '|'.join(expected),
'|'.join(pt._formatRows(sorted(pt.count_dict, key=key))))
@contextmanager
def expectedFailure(self, exception=AssertionError, regex=None):
with self.assertRaisesRegexp(exception, regex) as cm:
yield
raise _UnexpectedSuccess
# XXX: passing sys.exc_info() causes deadlocks
raise _ExpectedFailure((type(cm.exception), None, None))
class NeoUnitTestBase(NeoTestBase):
""" Base class for neo tests, implements common checks """
local_ip = IP_VERSION_FORMAT_DICT[ADDRESS_TYPE]
def setUp(self):
self.uuid_dict = {}
NeoTestBase.setUp(self)
@cached_property
def nm(self):
from neo.lib import node
return node.NodeManager()
def createStorage(self, *args):
return self.nm.createStorage(**dict(zip(
('address', 'uuid', 'state'), args)))
def prepareDatabase(self, number, prefix=DB_PREFIX):
""" create empty databases """
adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL')
if adapter == 'MySQL':
db_template = setupMySQLdb(
[prefix + str(i) for i in xrange(number)])
self.db_template = lambda i: db_template(prefix + str(i))
elif adapter == 'SQLite':
self.db_template = os.path.join(getTempDirectory(),
prefix + '%s.sqlite').__mod__
for i in xrange(number):
try:
os.remove(self.db_template(i))
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
assert False, adapter
def getMasterConfiguration(self, cluster='main', master_number=2,
replicas=2, partitions=1009, uuid=None):
assert master_number >= 1 and master_number <= 10
masters = ([(self.local_ip, 10010 + i)
for i in xrange(master_number)])
return {
'cluster': cluster,
'bind': masters[0],
'masters': masters,
'replicas': replicas,
'partitions': partitions,
'uuid': uuid,
}
def getStorageConfiguration(self, cluster='main', master_number=2,
index=0, prefix=DB_PREFIX, uuid=None):
assert master_number >= 1 and master_number <= 10
masters = [(buildUrlFromString(self.local_ip),
10010 + i) for i in xrange(master_number)]
adapter = os.getenv('NEO_TESTS_ADAPTER', 'MySQL')
return {
'cluster': cluster,
'bind': (masters[0], 10020 + index),
'masters': masters,
'database': self.db_template(index),
'uuid': uuid,
'adapter': adapter,
'wait': 0,
}
def getNewUUID(self, node_type):
"""
Retuns a 16-bytes UUID according to namespace 'prefix'
"""
if node_type is None:
node_type = random.choice(NodeTypes)
self.uuid_dict[node_type] = uuid = 1 + self.uuid_dict.get(node_type, 0)
return uuid + (UUID_NAMESPACES[node_type] << 24)
def getClientUUID(self):
return self.getNewUUID(NodeTypes.CLIENT)
def getMasterUUID(self):
return self.getNewUUID(NodeTypes.MASTER)
def getStorageUUID(self):
return self.getNewUUID(NodeTypes.STORAGE)
def getAdminUUID(self):
return self.getNewUUID(NodeTypes.ADMIN)
def getNextTID(self, ltid=None):
return newTid(ltid)
def getFakeConnector(self, descriptor=None):
return Mock({
'__repr__': 'FakeConnector',
'getDescriptor': descriptor,
'getAddress': ('', 0),
})
def getFakeConnection(self, uuid=None, address=('127.0.0.1', 10000),
is_server=False, connector=None, peer_id=None):
if connector is None:
connector = self.getFakeConnector()
conn = Mock({
'getUUID': uuid,
'getAddress': address,
'isServer': is_server,
'__repr__': 'FakeConnection',
'__nonzero__': 0,
'getConnector': connector,
'getPeerId': peer_id,
})
conn.mockAddReturnValues(__hash__ = id(conn))
conn.connecting = False
return conn
def checkProtocolErrorRaised(self, method, *args, **kwargs):
""" Check if the ProtocolError exception was raised """
self.assertRaises(protocol.ProtocolError, method, *args, **kwargs)
def checkAborted(self, conn):
""" Ensure the connection was aborted """
self.assertEqual(len(conn.mockGetNamedCalls('abort')), 1)
def checkClosed(self, conn):
""" Ensure the connection was closed """
self.assertEqual(len(conn.mockGetNamedCalls('close')), 1)
def _checkNoPacketSend(self, conn, method_id):
self.assertEqual([], conn.mockGetNamedCalls(method_id))
def checkNoPacketSent(self, conn):
""" check if no packet were sent """
self._checkNoPacketSend(conn, 'send')
self._checkNoPacketSend(conn, 'answer')
self._checkNoPacketSend(conn, 'ask')
# in check(Ask|Answer|Notify)Packet we return the packet so it can be used
# in tests if more accurate checks are required
def checkErrorPacket(self, conn):
""" Check if an error packet was answered """
calls = conn.mockGetNamedCalls("answer")
self.assertEqual(len(calls), 1)
packet = calls.pop().getParam(0)
self.assertTrue(isinstance(packet, protocol.Packet))
self.assertEqual(type(packet), Packets.Error)
return packet
def checkAskPacket(self, conn, packet_type):
""" Check if an ask-packet with the right type is sent """
calls = conn.mockGetNamedCalls('ask')
self.assertEqual(len(calls), 1)
packet = calls.pop().getParam(0)
self.assertTrue(isinstance(packet, protocol.Packet))
self.assertEqual(type(packet), packet_type)
return packet
def checkAnswerPacket(self, conn, packet_type):
""" Check if an answer-packet with the right type is sent """
calls = conn.mockGetNamedCalls('answer')
self.assertEqual(len(calls), 1)
packet = calls.pop().getParam(0)
self.assertTrue(isinstance(packet, protocol.Packet))
self.assertEqual(type(packet), packet_type)
return packet
def checkNotifyPacket(self, conn, packet_type, packet_number=0):
""" Check if a notify-packet with the right type is sent """
calls = conn.mockGetNamedCalls('send')
packet = calls.pop(packet_number).getParam(0)
self.assertTrue(isinstance(packet, protocol.Packet))
self.assertEqual(type(packet), packet_type)
return packet
class TransactionalResource(object):
class _sortKey(object):
def __init__(self, last):
self._last = last
def __cmp__(self, other):
assert type(self) is not type(other), other
return 1 if self._last else -1
def __init__(self, txn, last, **kw):
self.sortKey = lambda: self._sortKey(last)
for k in kw:
assert callable(IDataManager.get(k)), k
self.__dict__.update(kw)
txn.get().join(self)
def __call__(self, func):
name = func.__name__
assert callable(IDataManager.get(name)), name
setattr(self, name, func)
return func
def __getattr__(self, attr):
if callable(IDataManager.get(attr)):
return lambda *_: None
return self.__getattribute__(attr)
class Patch(object):
"""
Patch attributes and revert later automatically.
Usage:
with Patch(someObject, [new,] attrToPatch=newValue) as patch:
[... code that runs with patches ...]
[... code that runs without patch ...]
The 'new' positional parameter defaults to False and it must be equal to
not hasattr(someObject, 'attrToPatch')
It is an assertion to detect when a Patch is obsolete.
' as patch' is optional: 'patch.revert()' can be used to revert patches
in the middle of the 'with' clause.
Or:
patch = Patch(...)
patch.apply()
In this case, patches are automatically reverted when 'patch' is deleted.
For patched callables, the new one receives the original value as first
argument if 'new' is True.
Alternative usage:
@Patch(someObject)
def funcToPatch(orig, ...):
...
...
funcToPatch.revert()
The decorator applies the patch immediately.
"""
applied = False
def __new__(cls, patched, *args, **patch):
if patch:
return object.__new__(cls)
def patch(func):
self = cls(patched, *args, **{func.__name__: func})
self.apply()
return self
return patch
def __init__(self, patched, *args, **patch):
new, = args or (0,)
(name, patch), = patch.iteritems()
self._patched = patched
self._name = name
try:
wrapped = getattr(patched, name)
except AttributeError:
assert new, (patched, name)
else:
assert not new, (patched, name)
if callable(patch):
func = patch
patch = lambda *args, **kw: func(wrapped, *args, **kw)
if callable(wrapped):
patch = wraps(wrapped)(patch)
self._patch = patch
try:
orig = patched.__dict__[name]
except KeyError:
if new or isclass(patched):
self._revert = lambda: delattr(patched, name)
return
orig = getattr(patched, name)
self._revert = lambda: setattr(patched, name, orig)
def apply(self):
assert not self.applied
setattr(self._patched, self._name, self._patch)
self.applied = True
def revert(self):
del self.applied
self._revert()
def __del__(self):
if self.applied:
self.revert()
def __enter__(self):
self.apply()
return weakref.proxy(self)
def __exit__(self, t, v, tb):
self.__del__()
def unpickle_state(data):
unpickler = Unpickler(StringIO(data))
unpickler.persistent_load = PersistentReferenceFactory().persistent_load
unpickler.load() # skip the class tuple
return unpickler.load()
__builtin__.pdb = lambda depth=0: \
debug.getPdb().set_trace(sys._getframe(depth+1))
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/benchmark.py 0000664 0000000 0000000 00000006770 13465024610 0025504 0 ustar 00root root 0000000 0000000 from __future__ import print_function
import argparse
import sys
import smtplib
import platform
import datetime
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
MAIL_SERVER = '127.0.0.1:25'
class AttributeDict(dict):
def __getattr__(self, item):
return self.__getitem__(item)
class BenchmarkRunner(object):
"""
Base class for a command-line benchmark test runner.
"""
def __init__(self):
self._successful = True
self._status = []
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter)
# register common options
_ = parser.add_argument
_('--title')
_('--mail-to', action='append')
_('--mail-from')
_('--mail-server')
self.add_options(parser)
# check common arguments
args = parser.parse_args()
if bool(args.mail_to) ^ bool(args.mail_from):
sys.exit('Need a sender and recipients to mail report')
mail_server = args.mail_server or MAIL_SERVER
# check specifics arguments
self._config = AttributeDict()
self._config.update(self.load_options(args))
self._config.update(
title = args.title or self.__class__.__name__,
mail_from = args.mail_from,
mail_to = args.mail_to,
mail_server = mail_server.split(':'),
)
def add_status(self, key, value):
self._status.append((key, value))
def build_report(self, content):
fmt = "%-25s : %s"
status = "\n".join([fmt % item for item in [
('Title', self._config.title),
('Date', datetime.date.today().isoformat()),
('Node', platform.node()),
('Machine', platform.machine()),
('System', platform.system()),
('Python', platform.python_version()),
]])
status += '\n\n'
status += "\n".join([fmt % item for item in self._status])
return "%s\n\n%s" % (status, content)
def send_report(self, subject, report):
# build report
# build email
msg = MIMEMultipart()
msg['Subject'] = '%s: %s' % (self._config.title, subject)
msg['From'] = self._config.mail_from
msg['To'] = ', '.join(self._config.mail_to)
msg['X-ERP5-Tests'] = 'NEO'
if self._successful:
msg['X-ERP5-Tests-Status'] = 'OK'
msg.epilogue = ''
msg.attach(MIMEText(report))
# send it
s = smtplib.SMTP()
s.connect(*self._config.mail_server)
mail = msg.as_string()
for recipient in self._config.mail_to:
try:
s.sendmail(self._config.mail_from, recipient, mail)
except smtplib.SMTPRecipientsRefused:
print("Mail for %s fails" % recipient)
s.close()
def run(self):
subject, report = self.start()
report = self.build_report(report)
if self._config.mail_to:
self.send_report(subject, report)
print(subject)
if report:
print()
print(report, end='')
def was_successful(self):
return self._successful
def add_options(self, parser):
""" Append options to command line parser """
raise NotImplementedError
def load_options(self, args):
""" Check options and return a configuration dict """
raise NotImplementedError
def start(self):
""" Run the test """
raise NotImplementedError
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/ca.crt 0000664 0000000 0000000 00000002062 13465024610 0024263 0 ustar 00root root 0000000 0000000 -----BEGIN CERTIFICATE-----
MIIC7TCCAdWgAwIBAgIJAL8e44sA7PDMMA0GCSqGSIb3DQEBCwUAMA0xCzAJBgNV
BAMMAkNBMB4XDTE1MDkzMDEzNTQzMFoXDTIxMDMyMjEzNTQzMFowDTELMAkGA1UE
AwwCQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCgT7DVKM4ViQt3
B0oJC4RGi10dNfpNZZpgA5iC2UJ1u6AqqCf0PCQkrmuIzW3l1TenlOiLNdVASkkT
wf1lekIgg4tR8/22oGTAnfY6R9r1C6jAMV72v1sffz8D6qfkMPzKchJt55zywdhm
KscUsMGzXPGIeKrG20m83dSIO4RmCmq/f4BcuWJu6Kkq4n9Wc2IsvpKk+lqEUxI/
QoqdT6OvMXooGs3t892uvKDu++muBj2Y/yyaXt1tCCjDFsRMLWl3Skks+4PeMCZ4
wugyXEBk3d5Yzdv5NsFzFBjAuRCGxJXEOEcfHj4Xj9qTCErZ1jKzgnuxJCtgdqRC
r4beX1U3AgMBAAGjUDBOMB0GA1UdDgQWBBSFY/jKvo0iSTEzzOIcZZUZCT8JfTAf
BgNVHSMEGDAWgBSFY/jKvo0iSTEzzOIcZZUZCT8JfTAMBgNVHRMEBTADAQH/MA0G
CSqGSIb3DQEBCwUAA4IBAQAB0LKDAuhodpyNVwEE9Yl+Q/IiPEPCaix6URJnRn1O
gQnXuZLo1xtJh6wJh1faG1/qNCFMxWEJ+0VkJ7r6v38cNXfYG9OcmD0S6YnNjSuO
VliAtqVVtj8MppJ4vMatLrNi4cvyYucebtNyBCzSIAi+6bkkHeaVgi1EtxXvq+AS
iZp3gl84oXv/gV7Bz4SXmVpFJnhsDMoQZG2KAULAgfZ2Am2I+ffG90cD/oEnS/3O
k3btqTvgIO8MWt8PY3sUOhJEoJYKnC9DppmhOhUTn4zzIIDSluKEOBHZiFb9AcmF
PvzL+8xiORCdUe1d6ANQQlUd0MM810BXZFYEXFbgKg8o
-----END CERTIFICATE-----
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/client/ 0000775 0000000 0000000 00000000000 13465024610 0024444 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/client/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0026543 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/client/testClientApp.py 0000664 0000000 0000000 00000006403 13465024610 0027600 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock
from ZODB.POSException import StorageTransactionError
from .. import NeoUnitTestBase, buildUrlFromString
from neo.client.app import Application
from neo.client.cache import test as testCache
from neo.client.exception import NEOStorageError
from neo.lib.util import p64
class ClientApplicationTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
self._to_stop_list = []
def _tearDown(self, success):
# stop threads
for app in self._to_stop_list:
app.close()
NeoUnitTestBase._tearDown(self, success)
# some helpers
def _begin(self, app, txn, tid):
txn_context = app._txn_container.new(txn)
txn_context.ttid = tid
return txn_context
def getApp(self, master_nodes=None, name='test', **kw):
if master_nodes is None:
master_nodes = '%s:10010' % buildUrlFromString(self.local_ip)
app = Application(master_nodes, name, **kw)
self._to_stop_list.append(app)
app.dispatcher = Mock({ })
return app
def makeOID(self, value=None):
from random import randint
return p64(randint(1, 255) if value is None else value)
makeTID = makeOID
def makeTransactionObject(self, user='u', description='d', _extension='e'):
class Transaction(object):
pass
txn = Transaction()
txn.user = user
txn.description = description
txn._extension = _extension
return txn
# common checks
testCache = testCache
def test_store1(self):
app = self.getApp()
oid = self.makeOID(11)
tid = self.makeTID()
txn = self.makeTransactionObject()
# invalid transaction > StorageTransactionError
self.assertRaises(StorageTransactionError, app.store, oid, tid, '',
None, txn)
# check partition_id and an empty cell list -> NEOStorageError
self._begin(app, txn, self.makeTID())
app.pt = Mock({'getCellList': ()})
app.num_partitions = 2
self.assertRaises(NEOStorageError, app.store, oid, tid, '', None,
txn)
calls = app.pt.mockGetNamedCalls('getCellList')
self.assertEqual(len(calls), 1)
def test_undo1(self):
# invalid transaction
app = self.getApp()
tid = self.makeTID()
txn = self.makeTransactionObject()
app.master_conn = Mock()
self.assertRaises(StorageTransactionError, app.undo, tid, txn)
# no packet sent
self.checkNoPacketSent(app.master_conn)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/client/testMasterHandler.py 0000664 0000000 0000000 00000002413 13465024610 0030447 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.client.handlers.master import PrimaryAnswersHandler
from neo.client.exception import NEOStorageError
class MasterHandlerTests(NeoUnitTestBase):
def setUp(self):
super(MasterHandlerTests, self).setUp()
self.app = Mock()
self.handler = PrimaryAnswersHandler(self.app)
def test_answerPack(self):
self.assertRaises(NEOStorageError, self.handler.answerPack, None, False)
# Check it doesn't raise
self.handler.answerPack(None, True)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/client/testZODBURI.py 0000664 0000000 0000000 00000004642 13465024610 0027042 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2017-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from neo.client.zodburi import _resolve_uri
testv = [
# [] of (uri, zconf_ok, dbkw_ok)
("neo://dbname@master",
"""\
master_nodes\tmaster
name\tdbname
""",
{}),
("neo://db2@master1:port1,master2:port2,master3:port3",
"""\
master_nodes\tmaster1:port1 master2:port2 master3:port3
name\tdb2
""",
{}),
("neo://db3@master1,master2:port2?read_only=true",
"""\
master_nodes\tmaster1 master2:port2
name\tdb3
read-only\ttrue
""",
{}),
("neo://db4@[2001:67c:1254:2a::1]:1234,master2:port2?read_only=false"
"&compress=true&logfile=xxx&alpha=111&dynamic_master_list=zzz&ca=qqq"
"&cert=rrr&key=sss&beta=222",
"""\
master_nodes\t[2001:67c:1254:2a::1]:1234 master2:port2
name\tdb4
read-only\tfalse
compress\ttrue
logfile\txxx
dynamic_master_list\tzzz
ca\tqqq
cert\trrr
key\tsss
""",
{"alpha": "111", "beta": "222"}),
]
class ZODBURITests(unittest.TestCase):
def test_zodburi(self):
# invalid schema / path / fragment
self.assertRaises(ValueError, _resolve_uri, "http://db@master")
self.assertRaises(ValueError, _resolve_uri, "neo://db@master/path")
self.assertRaises(ValueError, _resolve_uri, "neo://db@master#frag")
# db @ master not fully specified
self.assertRaises(ValueError, _resolve_uri, "neo://master")
# verify zodburi resolver produces expected zconfig
for uri, zconf_ok, dbkw_ok in testv:
zconf_ok = "%import neo.client\n\n" + zconf_ok + \
"\n"
zconf, dbkw = _resolve_uri(uri)
self.assertMultiLineEqual(zconf, zconf_ok)
self.assertEqual(dbkw, dbkw_ok)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/cluster.py 0000664 0000000 0000000 00000017676 13465024610 0025242 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2011-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import __builtin__
import errno
import mmap
import os
import psutil
import signal
import sys
import tempfile
from cPickle import dumps, loads
from functools import wraps
from time import time, sleep
from neo.lib import debug
class ClusterDict(dict):
"""Simple storage (dict), shared with forked processes"""
_acquired = 0
def __init__(self, *args, **kw):
dict.__init__(self, *args, **kw)
self._r, self._w = os.pipe()
# shm_open(3) would be better but Python doesn't provide it.
# See also http://nikitathespider.com/python/shm/
with tempfile.TemporaryFile() as f:
f.write(dumps(self.copy(), -1))
f.flush()
self._shared = mmap.mmap(f.fileno(), f.tell())
self.release()
def __del__(self):
try:
os.close(self._r)
os.close(self._w)
except TypeError: # if os.close is None
pass
def acquire(self):
self._acquired += 1
if not self._acquired:
os.read(self._r, 1)
try:
self.clear()
shared = self._shared
shared.resize(shared.size())
self.update(loads(shared[:]))
except:
self.release()
raise
def release(self, commit=False):
if not self._acquired:
if commit:
self.commit()
os.write(self._w, '\0')
self._acquired -= 1
def commit(self):
shared = self._shared
p = dumps(self.copy(), -1)
shared.resize(len(p))
shared[:] = p
cluster_dict = ClusterDict()
class ClusterPdb(object):
"""Multiprocess-aware wrapper around console and winpdb debuggers
__call__ is the method to break.
TODO: monkey-patch normal code not to timeout
if another node is being debugged
"""
def __init__(self):
self._count_dict = {}
def __setattr__(self, name, value):
try:
hook = getattr(self, name)
setattr(value.im_self, value.__name__, wraps(value)(
lambda *args, **kw: hook(value, *args, **kw)))
except AttributeError:
object.__setattr__(self, name, value)
@property
def broken_peer(self):
return self._getLastPdb(os.getpid()) is None
def __call__(self, depth=0, max_count=None, gui=False):
depth += 1
if max_count:
frame = sys._getframe(depth)
key = id(frame.f_code), frame.f_lineno
del frame
self._count_dict[key] = count = 1 + self._count_dict.get(key, 0)
if max_count < count:
return
if gui:
import rpdb2
if rpdb2.g_debugger is None:
rpdb2_CStateManager = rpdb2.CStateManager
def CStateManager(*args, **kw):
rpdb2.CStateManager = rpdb2_CStateManager
state_manager = rpdb2.CStateManager(*args, **kw)
self._rpdb2_set_state = state_manager.set_state
return state_manager
rpdb2.CStateManager = CStateManager
return debug.winpdb(depth)
try:
debugger = self.__dict__['_debugger']
except KeyError:
assert 'rpdb2' not in sys.modules
self._debugger = debugger = debug.getPdb()
self._bdb_interaction = debugger.interaction
return debugger.set_trace(sys._getframe(depth))
def kill(self, pid, sig):
force = []
sigint_handler = None
try:
while 1:
cluster_dict.acquire()
try:
last_pdb = cluster_dict.get('last_pdb', {})
if force or pid not in last_pdb:
os.kill(pid, sig)
last_pdb.pop(pid, None)
cluster_dict.commit()
break
try:
if psutil.Process(pid).status() == psutil.STATUS_ZOMBIE:
break
except psutil.NoSuchProcess:
raise OSError(errno.ESRCH, 'No such process')
finally:
cluster_dict.release()
if sigint_handler is None:
sigint_handler = signal.signal(signal.SIGINT,
lambda *args: force.append(None))
sys.stderr.write('Pid %u is/was debugged.'
' Press ^C to kill it...' % pid)
sleep(1)
finally:
if sigint_handler is not None:
signal.signal(signal.SIGINT, sigint_handler)
if force:
sys.stderr.write('\n')
def _lock_console(self):
while 1:
cluster_dict.acquire()
try:
if 'text_pdb' not in cluster_dict:
cluster_dict['text_pdb'] = pid = os.getpid()
cluster_dict.setdefault('last_pdb', {})[pid] = None
cluster_dict.commit()
break
finally:
cluster_dict.release()
sleep(0.5)
def _unlock_console(self):
cluster_dict.acquire()
try:
pid = cluster_dict.pop('text_pdb')
cluster_dict['last_pdb'][pid] = time()
cluster_dict.commit()
finally:
cluster_dict.release()
def _bdb_interaction(self, hooked, *args, **kw):
self._lock_console()
try:
return hooked(*args, **kw)
finally:
self._unlock_console()
def _rpdb2_set_state(self, hooked, state=None, *args, **kw):
from rpdb2 import STATE_BROKEN, STATE_DETACHED
cluster_dict.acquire()
try:
if state is None:
state = hooked.im_self.get_state()
last_pdb = cluster_dict.setdefault('last_pdb', {})
pid = os.getpid()
if state == STATE_DETACHED:
last_pdb.pop(pid, None)
else:
last_pdb[pid] = state != STATE_BROKEN and time() or None
return hooked(state=state, *args, **kw)
finally:
cluster_dict.release(True)
def _getLastPdb(self, *exclude):
result = 0
for pid, last_pdb in cluster_dict.get('last_pdb', {}).iteritems():
if pid not in exclude:
if last_pdb is None:
return
if result < last_pdb:
result = last_pdb
return result
def wait(self, test, timeout):
end_time = time() + timeout
period = 0.01
while not test():
cluster_dict.acquire()
try:
last_pdb = self._getLastPdb()
if last_pdb is None:
next_sleep = 1
else:
next_sleep = max(last_pdb + timeout, end_time) - time()
if next_sleep > period:
next_sleep = period
elif next_sleep < 0:
return False
finally:
cluster_dict.release()
sleep(next_sleep)
return True
__builtin__.pdb = ClusterPdb()
signal.signal(signal.SIGUSR1, debug.safe_handler(
lambda sig, frame: pdb(depth=2)))
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/create-certs 0000775 0000000 0000000 00000000467 13465024610 0025504 0 ustar 00root root 0000000 0000000 #!/bin/sh -e
DAYS=2000
at_exit() { rm -f "$CAkey"; }
trap at_exit 0
CAkey=`mktemp`
openssl req -new -x509 -nodes -keyout "$CAkey" -out ca.crt -subj /CN=CA -days $DAYS
openssl req -new -nodes -keyout node.key -subj /CN=node |
openssl x509 -CA ca.crt -CAkey "$CAkey" -req -out node.crt -set_serial 1 -days $DAYS
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/functional/ 0000775 0000000 0000000 00000000000 13465024610 0025330 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/functional/__init__.py 0000664 0000000 0000000 00000065761 13465024610 0027460 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import errno
import os
import sys
import time
import ZODB
import socket
import signal
import random
import MySQLdb
import sqlite3
import unittest
import tempfile
import traceback
import threading
import psutil
import neo.scripts
from neo.neoctl.neoctl import NeoCTL, NotReadyException
from neo.lib import logging
from neo.lib.protocol import ClusterStates, NodeTypes, CellStates, NodeStates, \
UUID_NAMESPACES
from neo.lib.util import dump, setproctitle
from .. import (ADDRESS_TYPE, IP_VERSION_FORMAT_DICT, SSL,
buildUrlFromString, cluster, getTempDirectory, setupMySQLdb,
ImporterConfigParser, NeoTestBase, Patch)
from neo.client.Storage import Storage
from neo.storage.database import manager, buildDatabaseManager
try:
coverage = sys.modules['neo.scripts.runner'].coverage
except (AttributeError, KeyError):
coverage = None
command_dict = {
NodeTypes.MASTER: 'neomaster',
NodeTypes.STORAGE: 'neostorage',
NodeTypes.ADMIN: 'neoadmin',
}
DELAY_SAFETY_MARGIN = 10
MAX_START_TIME = 30
class NodeProcessError(Exception):
pass
class AlreadyRunning(Exception):
pass
class AlreadyStopped(Exception):
pass
class NotFound(Exception):
pass
class PortAllocator(object):
def __init__(self):
self.socket_list = []
self.tried_port_set = set()
def allocate(self, address_type, local_ip):
min_port = n = 16384
max_port = min_port + n
tried_port_set = self.tried_port_set
while True:
s = socket.socket(address_type, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# Find an unreserved port.
while True:
# Do not let the system choose the port to avoid conflicts
# with other software. IOW, use a range different than:
# - /proc/sys/net/ipv4/ip_local_port_range on Linux
# - what IANA recommends (49152 to 65535)
port = random.randrange(min_port, max_port)
if port not in tried_port_set:
tried_port_set.add(port)
try:
s.bind((local_ip, port))
break
except socket.error, e:
if e.errno != errno.EADDRINUSE:
raise
elif len(tried_port_set) >= n:
raise RuntimeError("No free port")
# Reserve port.
try:
s.listen(1)
self.socket_list.append(s)
return port
except socket.error, e:
if e.errno != errno.EADDRINUSE:
raise
def release(self):
for s in self.socket_list:
s.close()
self.__init__()
__del__ = release
class Process(object):
_coverage_fd = None
_coverage_prefix = None
_coverage_index = 0
on_fork = [logging.resetNids]
pid = 0
def __init__(self, command, *args, **kw):
self.command = command
self.args = args
self.arg_dict = kw
def _args(self):
args = []
for arg, param in self.arg_dict.iteritems():
args.append('--' + arg)
if param is not None:
args.append(str(param))
args += self.args
return args
def start(self):
# Prevent starting when already forked and wait wasn't called.
if self.pid != 0:
raise AlreadyRunning('Already running with PID %r' % self.pid)
command = self.command
args = self._args()
global coverage
if coverage:
cls = self.__class__
cls._coverage_index += 1
if not cls._coverage_prefix:
cls._coverage_prefix = os.path.join(
getTempDirectory(), 'coverage-')
coverage_data_path = cls._coverage_prefix + str(cls._coverage_index)
self._coverage_fd, w = os.pipe()
def save_coverage(*args):
if coverage:
coverage.stop()
coverage.save()
if args:
os.close(w)
os.kill(os.getpid(), signal.SIGSTOP)
self.pid = os.fork()
if self.pid:
# Wait that the signal to kill the child is set up.
os.close(w)
os.read(self._coverage_fd, 1)
if coverage:
coverage.neotestrunner.append(coverage_data_path)
else:
# Child
try:
signal.signal(signal.SIGTERM, lambda *args: sys.exit())
if coverage:
coverage.stop()
from coverage import Coverage
coverage = Coverage(coverage_data_path)
coverage.start()
# XXX: Sometimes, the handler is not called immediately.
# The process is stuck at an unknown place and the test
# never ends. strace unlocks:
# strace: Process 5520 attached
# close(25) = 0
# getpid() = 5520
# kill(5520, SIGSTOP) = 0
# ...
signal.signal(signal.SIGUSR2, save_coverage)
os.close(self._coverage_fd)
os.write(w, '\0')
sys.argv = [command] + args
setproctitle(self.command)
for on_fork in self.on_fork:
on_fork()
self.run()
status = 0
except SystemExit, e:
status = e.code
if status is None:
status = 0
except KeyboardInterrupt:
status = 1
except:
status = -1
traceback.print_exc()
finally:
# prevent child from killing anything (cf __del__), or
# running any other cleanup code normally done by the parent
try:
save_coverage()
os._exit(status)
except:
print >>sys.stderr, status
finally:
os._exit(1)
logging.info('pid %u: %s %s',
self.pid, command, ' '.join(map(repr, args)))
def run(self):
raise NotImplementedError
def child_coverage(self):
r = self._coverage_fd
if r is not None:
try:
os.read(r, 1)
finally:
os.close(r)
del self._coverage_fd
def kill(self, sig=signal.SIGTERM):
if self.pid:
logging.info('kill pid %u', self.pid)
try:
pdb.kill(self.pid, sig)
except OSError:
traceback.print_last()
else:
raise AlreadyStopped
def __del__(self):
# If we get killed, kill subprocesses aswell.
try:
self.kill(signal.SIGKILL)
self.wait()
except:
# We can ignore all exceptions at this point, since there is no
# guaranteed way to handle them (other objects we would depend on
# might already have been deleted).
pass
assert self._coverage_fd is None, self._coverage_fd
def wait(self):
if self.pid == 0:
raise AlreadyStopped
result = os.WEXITSTATUS(os.waitpid(self.pid, 0)[1])
self.pid = 0
self.child_coverage()
if result:
raise NodeProcessError('%r %r %r exited with status %r' % (
self.command, self.args, self.arg_dict, result))
return result
def stop(self):
self.kill()
self.wait()
def isAlive(self):
try:
return psutil.Process(self.pid).status() != psutil.STATUS_ZOMBIE
except psutil.NoSuchProcess:
return False
class NEOProcess(Process):
def __init__(self, command, *args, **kw):
try:
__import__('neo.scripts.' + command, level=0)
except ImportError:
raise NotFound(command + ' not found')
self.setUUID(kw.pop('uuid', None))
super(NEOProcess, self).__init__(command, *args, **kw)
def _args(self):
args = super(NEOProcess, self)._args()
if self.uuid:
args[:0] = '--nid', str(self.uuid)
return args
def run(self):
getattr(neo.scripts, self.command).main()
def getUUID(self):
return self.uuid
def setUUID(self, uuid):
"""
Note: for this change to take effect, the node must be restarted.
"""
self.uuid = uuid
@property
def logfile(self):
return self.arg_dict['logfile']
class NEOCluster(object):
SSL = None
def __init__(self, db_list, master_count=1, partitions=1, replicas=0,
name=None,
cleanup_on_delete=False, temp_dir=None, clear_databases=True,
adapter=os.getenv('NEO_TESTS_ADAPTER'),
address_type=ADDRESS_TYPE, bind_ip=None, logger=True,
importer=None, storage_kw={}):
if not adapter:
adapter = 'MySQL'
self.adapter = adapter
self.zodb_storage_list = []
self.cleanup_on_delete = cleanup_on_delete
self.uuid_dict = {}
self.db_list = db_list
if temp_dir is None:
temp_dir = tempfile.mkdtemp(prefix='neo_')
print 'Using temp directory ' + temp_dir
if adapter == 'MySQL':
self.db_template = setupMySQLdb(db_list, clear_databases)
elif adapter == 'SQLite':
self.db_template = (lambda t: lambda db:
':memory:' if db is None else db if os.sep in db else t % db
)(os.path.join(temp_dir, '%s.sqlite'))
if clear_databases:
for db in self.db_list:
if db is None:
continue
db = self.db_template(db)
try:
os.remove(db)
except OSError, e:
if e.errno != errno.ENOENT:
raise
else:
logging.debug('%r deleted', db)
else:
assert False, adapter
self.address_type = address_type
self.local_ip = local_ip = bind_ip or \
IP_VERSION_FORMAT_DICT[self.address_type]
if importer:
cfg = ImporterConfigParser(adapter, **importer)
cfg.set("neo", "database", self.db_template(*db_list))
importer_conf = os.path.join(temp_dir, 'importer.cfg')
with open(importer_conf, 'w') as f:
cfg.write(f)
adapter = "Importer"
self.db_template = str
db_list = importer_conf,
self.process_dict = {}
self.temp_dir = temp_dir
self.port_allocator = PortAllocator()
admin_port = self.port_allocator.allocate(address_type, local_ip)
self.cluster_name = name or 'neo_%s' % random.randint(0, 100)
master_node_list = [self.port_allocator.allocate(address_type, local_ip)
for i in xrange(master_count)]
self.master_nodes = ' '.join('%s:%s' % (
buildUrlFromString(self.local_ip), x, )
for x in master_node_list)
# create admin node
self._newProcess(NodeTypes.ADMIN, logger and 'admin', admin_port)
# create master nodes
for i, port in enumerate(master_node_list):
self._newProcess(NodeTypes.MASTER, logger and 'master_%u' % i,
port, partitions=partitions, replicas=replicas)
# create storage nodes
for i, db in enumerate(db_list):
self._newProcess(NodeTypes.STORAGE, logger and 'storage_%u' % i,
0, adapter=adapter, database=self.db_template(db),
**storage_kw)
# create neoctl
self.neoctl = NeoCTL((self.local_ip, admin_port), ssl=self.SSL)
def _newProcess(self, node_type, logfile=None, port=None, **kw):
self.uuid_dict[node_type] = uuid = 1 + self.uuid_dict.get(node_type, 0)
uuid += UUID_NAMESPACES[node_type] << 24
kw['cluster'] = self.cluster_name
kw['masters'] = self.master_nodes
if logfile:
kw['logfile'] = os.path.join(self.temp_dir, logfile + '.log')
if port is not None:
kw['bind'] = '%s:%u' % (buildUrlFromString(self.local_ip), port)
if self.SSL:
kw['ca'], kw['cert'], kw['key'] = self.SSL
self.process_dict.setdefault(node_type, []).append(
NEOProcess(command_dict[node_type], uuid=uuid, **kw))
def resetDB(self):
for db in self.db_list:
dm = buildDatabaseManager(self.adapter, (self.db_template(db),))
dm.setup(True)
def run(self, except_storages=()):
""" Start cluster processes except some storage nodes """
assert len(self.process_dict)
self.port_allocator.release()
for process_list in self.process_dict.itervalues():
for process in process_list:
if process not in except_storages:
process.start()
# wait for the admin node availability
def test():
try:
self.neoctl.getClusterState()
except NotReadyException:
return False
return True
if not pdb.wait(test, MAX_START_TIME):
raise AssertionError('Timeout when starting cluster')
def start(self, except_storages=()):
""" Do a complete start of a cluster """
self.run(except_storages=except_storages)
neoctl = self.neoctl
target = [len(self.db_list) - len(except_storages)]
def test():
try:
state = neoctl.getClusterState()
if state == ClusterStates.RUNNING:
return True
if state == ClusterStates.RECOVERING and target[0]:
pending_count = 0
for x in neoctl.getNodeList(node_type=NodeTypes.STORAGE):
if x[3] != NodeStates.PENDING:
target[0] = None # cluster must start automatically
break
pending_count += 1
if pending_count == target[0]:
neoctl.startCluster()
except (NotReadyException, SystemExit):
pass
if not pdb.wait(test, MAX_START_TIME):
raise AssertionError('Timeout when starting cluster')
def startCluster(self):
# Even if the storage nodes are in the expected state, there may still
# be activity between them and the master, preventing the cluster to
# start.
def start(last_try):
try:
self.neoctl.startCluster()
except (NotReadyException, SystemExit), e:
return False, e
return True, None
self.expectCondition(start)
def stop(self, clients=True):
# Suspend all processes to kill before actually killing them, so that
# nodes don't log errors because they get disconnected from other nodes:
# otherwise, storage nodes would often flush MB of logs just because we
# killed the master first, and waste much file system space.
stopped_list = []
for process_list in self.process_dict.itervalues():
for process in process_list:
try:
process.kill(signal.SIGUSR2)
stopped_list.append(process)
except AlreadyStopped:
pass
for process in stopped_list:
process.child_coverage()
error_list = []
for process in stopped_list:
try:
process.kill(signal.SIGKILL)
process.wait()
except NodeProcessError, e:
error_list += e.args
if clients:
for zodb_storage in self.zodb_storage_list:
zodb_storage.close()
self.zodb_storage_list = []
time.sleep(0.5)
if error_list:
raise NodeProcessError('\n'.join(error_list))
def waitAll(self):
for process_list in self.process_dict.itervalues():
for process in process_list:
try:
process.wait()
except (AlreadyStopped, NodeProcessError):
pass
def getClientConfig(self, **kw):
kw['name'] = self.cluster_name
kw['master_nodes'] = self.master_nodes.replace('/', ' ')
if self.SSL:
kw['ca'], kw['cert'], kw['key'] = self.SSL
return kw
def getZODBStorage(self, **kw):
result = Storage(**self.getClientConfig(**kw))
result.app.max_reconnection_to_master = 10
self.zodb_storage_list.append(result)
return result
def getZODBConnection(self, **kw):
""" Return a tuple with the database and a connection """
db = ZODB.DB(storage=self.getZODBStorage(**kw))
return (db, db.open())
def getSQLConnection(self, db):
assert db is not None and db in self.db_list
with Patch(manager.DatabaseManager, LOCK=None):
return buildDatabaseManager(self.adapter, (self.db_template(db),))
def getMasterProcessList(self):
return self.process_dict.get(NodeTypes.MASTER)
def getStorageProcessList(self):
return self.process_dict.get(NodeTypes.STORAGE)
def getAdminProcessList(self):
return self.process_dict.get(NodeTypes.ADMIN)
def _killMaster(self, primary=False, all=False):
killed_uuid_list = []
primary_uuid = self.neoctl.getPrimary()
for master in self.getMasterProcessList():
master_uuid = master.getUUID()
is_primary = master_uuid == primary_uuid
if primary and is_primary or not (primary or is_primary):
killed_uuid_list.append(master_uuid)
master.kill()
master.wait()
if not all:
break
return killed_uuid_list
def killPrimary(self):
return self._killMaster(primary=True)
def killSecondaryMaster(self, all=False):
return self._killMaster(primary=False, all=all)
def killMasters(self):
secondary_list = self.killSecondaryMaster(all=True)
primary_list = self.killPrimary()
return secondary_list + primary_list
def killStorage(self, all=False):
killed_uuid_list = []
for storage in self.getStorageProcessList():
killed_uuid_list.append(storage.getUUID())
storage.kill()
storage.wait()
if not all:
break
return killed_uuid_list
def __getNodeList(self, node_type, state=None):
return [x for x in self.neoctl.getNodeList(node_type)
if state is None or x[3] == state]
def getMasterList(self, state=None):
return self.__getNodeList(NodeTypes.MASTER, state)
def getStorageList(self, state=None):
return self.__getNodeList(NodeTypes.STORAGE, state)
def getClientlist(self, state=None):
return self.__getNodeList(NodeTypes.CLIENT, state)
def __getNodeState(self, node_type, uuid):
for node in self.__getNodeList(node_type):
if node[2] == uuid:
return node[3]
def getMasterNodeState(self, uuid):
return self.__getNodeState(NodeTypes.MASTER, uuid)
def getPrimary(self):
try:
current_try = self.neoctl.getPrimary()
except NotReadyException:
current_try = None
return current_try
def expectCondition(self, condition, timeout=0, on_fail=None):
end = time.time() + timeout + DELAY_SAFETY_MARGIN
opaque_history = [None]
def test():
reached, opaque = condition(opaque_history[-1])
if not reached:
opaque_history.append(opaque)
return reached
if not pdb.wait(test, timeout + DELAY_SAFETY_MARGIN):
del opaque_history[0]
if on_fail is not None:
on_fail(opaque_history)
raise AssertionError('Timeout while expecting condition. '
'History: %s' % opaque_history)
def expectAllMasters(self, node_count, state=None, *args, **kw):
def callback(last_try):
try:
current_try = len(self.getMasterList(state=state))
except NotReadyException:
current_try = 0
if last_try is not None and current_try < last_try:
raise AssertionError, 'Regression: %s became %s' % \
(last_try, current_try)
return (current_try == node_count, current_try)
self.expectCondition(callback, *args, **kw)
def __expectNodeState(self, node_type, uuid, state, *args, **kw):
if not isinstance(state, (tuple, list)):
state = (state, )
def callback(last_try):
try:
current_try = self.__getNodeState(node_type, uuid)
except NotReadyException:
current_try = None
return current_try in state, current_try
self.expectCondition(callback, *args, **kw)
def expectMasterState(self, uuid, state, *args, **kw):
self.__expectNodeState(NodeTypes.MASTER, uuid, state, *args, **kw)
def expectStorageState(self, uuid, state, *args, **kw):
self.__expectNodeState(NodeTypes.STORAGE, uuid, state, *args, **kw)
def expectRunning(self, process, *args, **kw):
self.expectStorageState(process.getUUID(), NodeStates.RUNNING,
*args, **kw)
def expectPending(self, process, *args, **kw):
self.expectStorageState(process.getUUID(), NodeStates.PENDING,
*args, **kw)
def expectDown(self, process, *args, **kw):
self.expectStorageState(process.getUUID(), NodeStates.DOWN, *args, **kw)
def expectPrimary(self, uuid=None, *args, **kw):
def callback(last_try):
current_try = self.getPrimary()
if None not in (uuid, current_try) and uuid != current_try:
raise AssertionError, 'An unexpected primary arose: %r, ' \
'expected %r' % (dump(current_try), dump(uuid))
return uuid is None or uuid == current_try, current_try
self.expectCondition(callback, *args, **kw)
def expectOudatedCells(self, number, *args, **kw):
def callback(last_try):
row_list = self.neoctl.getPartitionRowList()[2]
number_of_outdated = 0
for row in row_list:
for cell in row:
if cell[1] == CellStates.OUT_OF_DATE:
number_of_outdated += 1
return number_of_outdated == number, number_of_outdated
self.expectCondition(callback, *args, **kw)
def expectAssignedCells(self, process, number, *args, **kw):
def callback(last_try):
row_list = self.neoctl.getPartitionRowList()[2]
assigned_cells_number = 0
for row in row_list:
for cell in row:
if cell[0] == process.getUUID():
assigned_cells_number += 1
return assigned_cells_number == number, assigned_cells_number
self.expectCondition(callback, *args, **kw)
def expectClusterState(self, state, *args, **kw):
def callback(last_try):
try:
current_try = self.neoctl.getClusterState()
except NotReadyException:
current_try = None
return current_try == state, current_try
self.expectCondition(callback, *args, **kw)
def expectClusterRecovering(self, *args, **kw):
self.expectClusterState(ClusterStates.RECOVERING, *args, **kw)
def expectClusterVerifying(self, *args, **kw):
self.expectClusterState(ClusterStates.VERIFYING, *args, **kw)
def expectClusterRunning(self, *args, **kw):
self.expectClusterState(ClusterStates.RUNNING, *args, **kw)
def expectAlive(self, process, *args, **kw):
def callback(last_try):
current_try = process.isAlive()
return current_try, current_try
self.expectCondition(callback, *args, **kw)
def expectDead(self, process, *args, **kw):
def callback(last_try):
current_try = not process.isAlive()
return current_try, current_try
self.expectCondition(callback, *args, **kw)
def expectStorageUnknown(self, process, *args, **kw):
process_uuid = process.getUUID()
def expected_storage_not_known(last_try):
for storage in self.getStorageList():
if storage[2] == process_uuid:
return False, storage
return True, None
self.expectCondition(expected_storage_not_known, *args, **kw)
def __del__(self):
self.neoctl.close()
if self.cleanup_on_delete:
os.removedirs(self.temp_dir)
class NEOFunctionalTest(NeoTestBase):
def setUp(self):
if random.randint(0, 1):
NEOCluster.SSL = SSL
super(NEOFunctionalTest, self).setUp()
def _tearDown(self, success):
super(NEOFunctionalTest, self)._tearDown(success)
NEOCluster.SSL = None
def setupLog(self):
logging.setup(os.path.join(self.getTempDirectory(), 'test.log'))
logging.resetNids()
def getTempDirectory(self):
# build the full path based on test case and current test method
temp_dir = os.path.join(getTempDirectory(), self.id())
# build the path if needed
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
return temp_dir
def runWithTimeout(self, timeout, method, args=(), kwargs=None):
if kwargs is None:
kwargs = {}
exc_list = []
def excWrapper(*args, **kw):
try:
method(*args, **kw)
except:
exc_list.append(sys.exc_info())
thread = threading.Thread(None, excWrapper, args=args, kwargs=kwargs)
thread.daemon = True
thread.start()
thread.join(timeout)
self.assertFalse(thread.is_alive(), 'Run timeout')
if exc_list:
assert len(exc_list) == 1, exc_list
exc = exc_list[0]
raise exc[0], exc[1], exc[2]
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/functional/testClient.py 0000664 0000000 0000000 00000021735 13465024610 0030030 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from __future__ import print_function
import os
import unittest
import transaction
import ZODB
import socket
from struct import pack
from neo.lib.util import makeChecksum, u64
from ZODB.FileStorage import FileStorage
from ZODB.tests.StorageTestBase import zodb_pickle
from persistent import Persistent
from . import NEOCluster, NEOFunctionalTest, NEOProcess
TREE_SIZE = 6
class Tree(Persistent):
""" A simple binary tree """
def __init__(self, depth):
self.depth = depth
if depth <= 0:
return
depth -= 1
self.right = Tree(depth)
self.left = Tree(depth)
class PObject(Persistent):
pass
class ClientTests(NEOFunctionalTest):
def setUp(self):
NEOFunctionalTest.setUp(self)
self.neo = NEOCluster(
['test_neo1', 'test_neo2', 'test_neo3', 'test_neo4'],
partitions=3,
replicas=2,
master_count=1,
temp_dir=self.getTempDirectory()
)
def _tearDown(self, success):
self.neo.stop()
del self.neo
NEOFunctionalTest._tearDown(self, success)
def __setup(self):
self.neo.start()
self.neo.expectClusterRunning()
self.db = ZODB.DB(self.neo.getZODBStorage())
def makeTransaction(self):
# create a transaction a get the root object
txn = transaction.TransactionManager()
conn = self.db.open(transaction_manager=txn)
return (txn, conn)
def testIsolationAtZopeLevel(self):
""" Check transaction isolation within zope connection """
self.__setup()
t, c = self.makeTransaction()
root = c.root()
root['item'] = 0
root['other'] = 'bla'
t.commit()
t1, c1 = self.makeTransaction()
t2, c2 = self.makeTransaction()
# Makes c2 take a snapshot of database state
c2.root()['other']
c1.root()['item'] = 1
t1.commit()
# load object from zope cache
self.assertEqual(c1.root()['item'], 1)
self.assertEqual(c2.root()['item'], 0)
def testIsolationWithoutZopeCache(self):
""" Check isolation with zope cache cleared """
self.__setup()
t, c = self.makeTransaction()
root = c.root()
root['item'] = 0
root['other'] = 'bla'
t.commit()
t1, c1 = self.makeTransaction()
t2, c2 = self.makeTransaction()
# Makes c2 take a snapshot of database state
c2.root()['other']
c1.root()['item'] = 1
t1.commit()
# clear zope cache to force re-ask NEO
c1.cacheMinimize()
c2.cacheMinimize()
self.assertEqual(c1.root()['item'], 1)
self.assertEqual(c2.root()['item'], 0)
def __checkTree(self, tree, depth=TREE_SIZE):
self.assertTrue(isinstance(tree, Tree))
self.assertEqual(depth, tree.depth)
depth -= 1
if depth <= 0:
return
self.__checkTree(tree.right, depth)
self.__checkTree(tree.left, depth)
def __getDataFS(self):
name = os.path.join(self.getTempDirectory(), 'data.fs')
if os.path.exists(name):
os.remove(name)
return FileStorage(file_name=name)
def __populate(self, db, tree_size=TREE_SIZE, with_undo=True):
if isinstance(db.storage, FileStorage):
from base64 import b64encode as undo_tid
else:
undo_tid = lambda x: x
def undo(tid=None):
db.undo(undo_tid(tid or db.lastTransaction()))
transaction.commit()
conn = db.open()
root = conn.root()
root['trees'] = Tree(tree_size)
ob = root['trees'].right
left = ob.left
del ob.left
transaction.commit()
ob._p_changed = 1
transaction.commit()
t2 = db.lastTransaction()
ob.left = left
transaction.commit()
if with_undo:
undo()
t4 = db.lastTransaction()
undo(t2)
undo()
undo(t4)
undo()
undo()
conn.close()
def testImport(self):
# source database
dfs_storage = self.__getDataFS()
dfs_db = ZODB.DB(dfs_storage)
self.__populate(dfs_db)
# create a neo storage
self.neo.start()
neo_storage = self.neo.getZODBStorage()
# copy data fs to neo
neo_storage.copyTransactionsFrom(dfs_storage, verbose=0)
dfs_db.close()
# check neo content
(neo_db, neo_conn) = self.neo.getZODBConnection()
self.__checkTree(neo_conn.root()['trees'])
def testMigrationTool(self):
dfs_storage = self.__getDataFS()
dfs_db = ZODB.DB(dfs_storage)
self.__populate(dfs_db, with_undo=False)
dump = self.__dump(dfs_storage)
fs_path = dfs_storage.__name__
dfs_db.close()
neo = self.neo
neo.start()
kw = {'cluster': neo.cluster_name, 'quiet': None}
master_nodes = neo.master_nodes.replace('/', ' ')
if neo.SSL:
kw['ca'], kw['cert'], kw['key'] = neo.SSL
p = NEOProcess('neomigrate', fs_path, master_nodes, **kw)
p.start()
p.wait()
os.remove(fs_path)
p = NEOProcess('neomigrate', master_nodes, fs_path, **kw)
p.start()
p.wait()
self.assertEqual(dump, self.__dump(FileStorage(fs_path)))
def __dump(self, storage, sorted=sorted):
return {u64(t.tid): sorted((u64(o.oid), o.data_txn and u64(o.data_txn),
None if o.data is None else makeChecksum(o.data))
for o in t)
for t in storage.iterator()}
def testExport(self):
# create a neo storage
self.neo.start()
(neo_db, neo_conn) = self.neo.getZODBConnection()
self.__populate(neo_db)
dump = self.__dump(neo_db.storage, list)
# copy neo to data fs
dfs_storage = self.__getDataFS()
neo_storage = self.neo.getZODBStorage()
dfs_storage.copyTransactionsFrom(neo_storage)
# check data fs content
dfs_db = ZODB.DB(dfs_storage)
root = dfs_db.open().root()
self.__checkTree(root['trees'])
dfs_db.close()
self.neo.stop()
self.neo = NEOCluster(db_list=['test_neo1'], partitions=3,
importer={"zodb": [("root", {
"storage": "\npath %s\n"
% dfs_storage.getName()})]},
temp_dir=self.getTempDirectory())
self.neo.start()
neo_db, neo_conn = self.neo.getZODBConnection()
self.__checkTree(neo_conn.root()['trees'])
# BUG: The following check is sometimes done whereas the import is not
# finished, resulting in a failure because getReplicationTIDList
# is not implemented by the Importer backend.
self.assertEqual(dump, self.__dump(neo_db.storage, list))
def testIPv6Client(self):
""" Test the connectivity of an IPv6 connection for neo client """
def test():
"""
Implement the IPv6Client test
"""
self.neo = NEOCluster(['test_neo1'], replicas=0,
temp_dir = self.getTempDirectory(),
address_type = socket.AF_INET6
)
self.neo.start()
db1, conn1 = self.neo.getZODBConnection()
db2, conn2 = self.neo.getZODBConnection()
self.runWithTimeout(40, test)
def testGreaterOIDSaved(self):
"""
Store an object with an OID greater than the last generated by the
master. This OID must be intercepted at commit, used for next OID
generations and persistently saved on storage nodes.
"""
self.neo.start()
db1, conn1 = self.neo.getZODBConnection()
st1 = conn1._storage
t1 = transaction.Transaction()
rev = '\0' * 8
data = zodb_pickle(PObject())
my_oid = pack('!Q', 100000)
# store an object with this OID
st1.tpc_begin(t1)
st1.store(my_oid, rev, data, '', t1)
st1.tpc_vote(t1)
st1.tpc_finish(t1)
# request an oid, should be greater than mine
oid = st1.new_oid()
self.assertTrue(oid > my_oid)
def test_suite():
return unittest.makeSuite(ClientTests)
if __name__ == "__main__":
unittest.main(defaultTest="test_suite")
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/functional/testCluster.py 0000664 0000000 0000000 00000014530 13465024610 0030226 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
import transaction
from neo.lib.protocol import NodeStates
from . import NEOCluster, NEOFunctionalTest
class ClusterTests(NEOFunctionalTest):
def _tearDown(self, success):
if hasattr(self, "neo"):
self.neo.stop()
del self.neo
NEOFunctionalTest._tearDown(self, success)
def testClusterStartup(self):
neo = self.neo = NEOCluster(['test_neo1', 'test_neo2'], replicas=1,
temp_dir=self.getTempDirectory())
neo.run()
# Runing a new cluster doesn't exit Recovery state.
s1, s2 = neo.getStorageProcessList()
neo.expectPending(s1)
neo.expectPending(s2)
neo.expectClusterRecovering()
# When allowing cluster to exit Recovery, it reaches Running state and
# all present storage nodes reach running state.
neo.startCluster()
neo.expectRunning(s1)
neo.expectRunning(s2)
neo.expectClusterRunning()
# Re-running cluster with a missing storage doesn't exit Recovery
# state.
neo.stop()
neo.run(except_storages=(s2, ))
neo.expectPending(s1)
neo.expectDown(s2)
neo.expectClusterRecovering()
# Starting missing storage allows cluster to exit Recovery without
# neoctl action.
s2.start()
neo.expectRunning(s1)
neo.expectRunning(s2)
neo.expectClusterRunning()
# Re-running cluster with a missing storage and allowing startup exits
# recovery.
neo.stop()
neo.run(except_storages=(s2, ))
neo.expectPending(s1)
neo.expectDown(s2)
neo.expectClusterRecovering()
neo.startCluster()
neo.expectRunning(s1)
neo.expectDown(s2)
neo.expectClusterRunning()
def testClusterBreaks(self):
self.neo = NEOCluster(['test_neo1'],
master_count=1, temp_dir=self.getTempDirectory())
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
self.neo.killStorage()
self.neo.expectClusterRecovering()
def testClusterBreaksWithTwoNodes(self):
self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=2, master_count=1, replicas=0,
temp_dir=self.getTempDirectory())
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
self.neo.killStorage()
self.neo.expectClusterRecovering()
def testClusterDoesntBreakWithTwoNodesOneReplica(self):
self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=2, replicas=1, master_count=1,
temp_dir=self.getTempDirectory())
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
self.neo.killStorage()
self.neo.expectClusterRunning()
def testElectionWithManyMasters(self):
MASTER_COUNT = 20
self.neo = NEOCluster(['test_neo1', 'test_neo2'],
partitions=10, replicas=0, master_count=MASTER_COUNT,
temp_dir=self.getTempDirectory())
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectAllMasters(MASTER_COUNT, NodeStates.RUNNING)
self.neo.expectOudatedCells(0)
def testLeavingOperationalStateDropClientNodes(self):
"""
Check that client nodes are dropped where the cluster leaves the
operational state.
"""
# start a cluster
self.neo = NEOCluster(['test_neo1'], replicas=0,
temp_dir=self.getTempDirectory())
self.neo.start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(0)
# connect a client a check it's known
db, conn = self.neo.getZODBConnection()
self.assertEqual(len(self.neo.getClientlist()), 1)
# drop the storage, the cluster is no more operational...
self.neo.getStorageProcessList()[0].stop()
self.neo.expectClusterRecovering()
# ...and the client gets disconnected
self.assertEqual(len(self.neo.getClientlist()), 0)
# restart storage so that the cluster is operational again
self.neo.getStorageProcessList()[0].start()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(0)
# and reconnect the client, there must be only one known by the admin
conn.root()['plop'] = 1
transaction.commit()
self.assertEqual(len(self.neo.getClientlist()), 1)
def testStorageLostDuringRecovery(self):
"""
Check that admin node receive notifications of storage
connection and disconnection during recovery
"""
self.neo = NEOCluster(['test_neo%d' % i for i in xrange(2)],
master_count=1, partitions=10, replicas=1,
temp_dir=self.getTempDirectory(), clear_databases=True,
)
storages = self.neo.getStorageProcessList()
self.neo.run(except_storages=storages)
self.neo.expectStorageUnknown(storages[0])
self.neo.expectStorageUnknown(storages[1])
storages[0].start()
self.neo.expectPending(storages[0])
self.neo.expectStorageUnknown(storages[1])
storages[1].start()
self.neo.expectPending(storages[0])
self.neo.expectPending(storages[1])
storages[0].stop()
self.neo.expectDown(storages[0])
self.neo.expectPending(storages[1])
storages[1].stop()
self.neo.expectDown(storages[0])
self.neo.expectDown(storages[1])
def test_suite():
return unittest.makeSuite(ClusterTests)
if __name__ == "__main__":
unittest.main(defaultTest="test_suite")
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/functional/testMaster.py 0000664 0000000 0000000 00000012621 13465024610 0030037 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from . import NEOCluster, NEOFunctionalTest
from neo.lib.protocol import NodeStates
MASTER_NODE_COUNT = 3
class MasterTests(NEOFunctionalTest):
def setUp(self):
NEOFunctionalTest.setUp(self)
self.neo = NEOCluster([], master_count=MASTER_NODE_COUNT,
temp_dir=self.getTempDirectory())
self.neo.stop()
self.neo.run()
def _tearDown(self, success):
self.neo.stop()
NEOFunctionalTest._tearDown(self, success)
def testStoppingSecondaryMaster(self):
# Wait for masters to stabilize
self.neo.expectAllMasters(MASTER_NODE_COUNT, NodeStates.RUNNING)
# Kill
neoctl = self.neo.neoctl
primary_uuid = neoctl.getPrimary()
for master in self.neo.getMasterProcessList():
uuid = master.getUUID()
if uuid != primary_uuid:
break
neoctl.killNode(uuid)
self.neo.expectDead(master)
self.assertRaises(SystemExit, neoctl.killNode, primary_uuid)
def testStoppingPrimaryWithTwoSecondaries(self):
# Wait for masters to stabilize
self.neo.expectAllMasters(MASTER_NODE_COUNT)
# Kill
killed_uuid_list = self.neo.killPrimary()
# Test sanity check.
self.assertEqual(len(killed_uuid_list), 1)
uuid = killed_uuid_list[0]
# Check the state of the primary we just killed
self.neo.expectMasterState(uuid, (None, NodeStates.DOWN))
# BUG: The following check expects neoctl to reconnect before
# the election finishes.
self.assertEqual(self.neo.getPrimary(), None)
# Check that a primary master arose.
self.neo.expectPrimary(timeout=10)
# Check that the uuid really changed.
new_uuid = self.neo.getPrimary()
self.assertNotEqual(new_uuid, uuid)
def testStoppingPrimaryWithOneSecondary(self):
self.neo.expectAllMasters(MASTER_NODE_COUNT,
state=NodeStates.RUNNING)
# Kill one secondary master.
killed_uuid_list = self.neo.killSecondaryMaster()
# Test sanity checks.
self.assertEqual(len(killed_uuid_list), 1)
self.neo.expectMasterState(killed_uuid_list[0],
NodeStates.DOWN)
self.assertEqual(len(self.neo.getMasterList()), MASTER_NODE_COUNT)
uuid, = self.neo.killPrimary()
# Check the state of the primary we just killed
self.neo.expectMasterState(uuid, NodeStates.DOWN)
# Check that a primary master arose.
self.neo.expectPrimary(timeout=10)
# Check that the uuid really changed.
self.assertNotEqual(self.neo.getPrimary(), uuid)
def testMasterSequentialStart(self):
self.neo.expectAllMasters(MASTER_NODE_COUNT,
state=NodeStates.RUNNING)
master_list = self.neo.getMasterProcessList()
# Stop the cluster (so we can start processes manually)
self.neo.killMasters()
# Restart admin to make sure it knows all masters.
admin, = self.neo.getAdminProcessList()
admin.kill()
admin.wait()
admin.start()
# Start the first master.
first_master = master_list[0]
first_master.start()
first_master_uuid = first_master.getUUID()
# Check that the master node we started elected itself.
self.neo.expectPrimary(first_master_uuid, timeout=30)
# Check that no other node is known as running.
self.assertEqual(len(self.neo.getMasterList(
state=NodeStates.RUNNING)), 1)
# Start a second master.
second_master = master_list[1]
# Check that the second master is known as being down.
self.assertEqual(self.neo.getMasterNodeState(second_master.getUUID()),
None)
second_master.start()
# Check that the second master is running under his known UUID.
self.neo.expectMasterState(second_master.getUUID(),
NodeStates.RUNNING)
# Check that the primary master didn't change.
self.assertEqual(self.neo.getPrimary(), first_master_uuid)
# Start a third master.
third_master = master_list[2]
# Check that the third master is known as being down.
self.assertEqual(self.neo.getMasterNodeState(third_master.getUUID()),
None)
third_master.start()
# Check that the third master is running under his known UUID.
self.neo.expectMasterState(third_master.getUUID(),
NodeStates.RUNNING)
# Check that the primary master didn't change.
self.assertEqual(self.neo.getPrimary(), first_master_uuid)
def test_suite():
return unittest.makeSuite(MasterTests)
if __name__ == "__main__":
unittest.main(defaultTest="test_suite")
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/functional/testStorage.py 0000664 0000000 0000000 00000040570 13465024610 0030214 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
import transaction
from persistent import Persistent
from . import NEOCluster, NEOFunctionalTest
from neo.lib.protocol import ClusterStates, NodeStates
class PObject(Persistent):
def __init__(self, value):
self.value = value
OBJECT_NUMBER = 100
class StorageTests(NEOFunctionalTest):
def _tearDown(self, success):
if hasattr(self, "neo"):
self.neo.stop()
del self.neo
NEOFunctionalTest._tearDown(self, success)
def __setup(self, storage_number=2, pending_number=0, replicas=1,
partitions=10, master_count=2):
# create a neo cluster
self.neo = NEOCluster(['test_neo%d' % i for i in xrange(storage_number)],
master_count=master_count,
partitions=partitions, replicas=replicas,
temp_dir=self.getTempDirectory(),
clear_databases=True,
)
# too many pending storage nodes requested
assert pending_number <= storage_number
storage_processes = self.neo.getStorageProcessList()
start_storage_number = len(storage_processes) - pending_number
# return a tuple of storage processes lists
started_processes = storage_processes[:start_storage_number]
stopped_processes = storage_processes[start_storage_number:]
self.neo.start(except_storages=stopped_processes)
return (started_processes, stopped_processes)
def __populate(self):
db, conn = self.neo.getZODBConnection()
root = conn.root()
for i in xrange(OBJECT_NUMBER):
root[i] = PObject(i)
transaction.commit()
conn.close()
db.close()
def __checkDatabase(self, db_name):
db = self.neo.getSQLConnection(db_name)
# wait for the sql transaction to be committed
def callback(last_try):
db.commit() # to get a fresh view
# One revision per object and two for the root, before and after
(object_number,), = db.query('SELECT count(*) FROM obj')
return object_number == OBJECT_NUMBER + 2, object_number
self.neo.expectCondition(callback)
# no more temporarily objects
(t_objects,), = db.query('SELECT count(*) FROM tobj')
self.assertEqual(t_objects, 0)
# One object more for the root
query = 'SELECT count(*) FROM (SELECT * FROM obj GROUP BY oid) AS t'
(objects,), = db.query(query)
self.assertEqual(objects, OBJECT_NUMBER + 1)
# Check object content
db, conn = self.neo.getZODBConnection()
root = conn.root()
for i in xrange(OBJECT_NUMBER):
obj = root[i]
self.assertEqual(obj.value, i)
transaction.abort()
conn.close()
db.close()
def __checkReplicationDone(self):
# wait for replication to finish
def expect_all_storages(last_try):
storage_number = len(self.neo.getStorageList())
return storage_number == len(self.neo.db_list), storage_number
self.neo.expectCondition(expect_all_storages, timeout=10)
self.neo.expectOudatedCells(number=0, timeout=10)
# check databases
for db_name in self.neo.db_list:
self.__checkDatabase(db_name)
# check storages state
storage_list = self.neo.getStorageList(NodeStates.RUNNING)
self.assertEqual(len(storage_list), 2)
def testNewNodesInPendingState(self):
""" Check that new storage nodes are set as pending, the cluster remains
running """
# start with the first storage
processes = self.__setup(storage_number=3, replicas=1, pending_number=2)
started, stopped = processes
self.neo.expectRunning(started[0])
self.neo.expectClusterRunning()
# start the second then the third
stopped[0].start()
self.neo.expectPending(stopped[0])
self.neo.expectClusterRunning()
stopped[1].start()
self.neo.expectPending(stopped[1])
self.neo.expectClusterRunning()
def testReplicationWithNewStorage(self):
""" create a cluster with one storage, populate it, add a new storage
then check the database content to ensure the replication process is
well done """
# populate one storage
processes = self.__setup(storage_number=2, replicas=1, pending_number=1,
partitions=10)
started, stopped = processes
self.neo.expectOudatedCells(number=0)
self.__populate()
self.neo.expectClusterRunning()
self.neo.expectAssignedCells(started[0], number=10)
# start the second
stopped[0].start()
self.neo.expectPending(stopped[0])
self.neo.expectClusterRunning()
# add it to the partition table
self.neo.neoctl.enableStorageList([stopped[0].getUUID()])
self.neo.expectRunning(stopped[0])
self.neo.neoctl.tweakPartitionTable()
self.neo.expectAssignedCells(stopped[0], number=10)
self.neo.expectClusterRunning()
# wait for replication to finish then check
self.__checkReplicationDone()
self.neo.expectClusterRunning()
def testOudatedCellsOnDownStorage(self):
""" Check that the storage cells are set as outdated when the node is
down, the cluster remains up since there is a replica """
# populate the two storages
started, _ = self.__setup(partitions=3, replicas=1, storage_number=3)
self.neo.expectRunning(started[0])
self.neo.expectRunning(started[1])
self.neo.expectRunning(started[2])
self.neo.expectOudatedCells(number=0)
self.neo.neoctl.killNode(started[0].getUUID())
# Cluster still operational. All cells of first storage should be
# outdated.
self.neo.expectDown(started[0])
self.neo.expectOudatedCells(2)
self.neo.expectClusterRunning()
self.assertRaises(SystemExit, self.neo.neoctl.killNode,
started[1].getUUID())
started[1].stop()
# Cluster not operational anymore. Only cells of second storage that
# were shared with the third one should become outdated.
self.neo.expectDown(started[1])
self.neo.expectClusterRecovering()
self.neo.expectOudatedCells(3)
def testVerificationTriggered(self):
""" Check that the verification stage is executed when a storage node
required to be operational is lost, and the cluster come back in
running state when the storage is up again """
# start neo with one storages
(started, _) = self.__setup(replicas=0, storage_number=1)
self.neo.expectRunning(started[0])
self.neo.expectOudatedCells(number=0)
# add a client node
db, conn = self.neo.getZODBConnection()
root = conn.root()['test'] = 'ok'
transaction.commit()
self.assertEqual(len(self.neo.getClientlist()), 1)
# stop it, the cluster must switch to verification
started[0].stop()
self.neo.expectDown(started[0])
self.neo.expectClusterRecovering()
# client must have been disconnected
self.assertEqual(len(self.neo.getClientlist()), 0)
conn.close()
db.close()
# restart it, the cluster must come back to running state
started[0].start()
self.neo.expectRunning(started[0])
self.neo.expectClusterRunning()
def testSequentialStorageKill(self):
""" Check that the cluster remains running until the last storage node
died when all are replicas """
# start neo with three storages / two replicas
(started, _) = self.__setup(replicas=2, storage_number=3, partitions=10)
self.neo.expectRunning(started[0])
self.neo.expectRunning(started[1])
self.neo.expectRunning(started[2])
self.neo.expectOudatedCells(number=0)
self.neo.expectClusterRunning()
# stop one storage, cluster must remains running
started[0].stop()
self.neo.expectDown(started[0])
self.neo.expectRunning(started[1])
self.neo.expectRunning(started[2])
self.neo.expectOudatedCells(number=10)
self.neo.expectClusterRunning()
# stop a second storage, cluster is still running
started[1].stop()
self.neo.expectDown(started[0])
self.neo.expectDown(started[1])
self.neo.expectRunning(started[2])
self.neo.expectOudatedCells(number=20)
self.neo.expectClusterRunning()
# stop the last, cluster died
started[2].stop()
self.neo.expectDown(started[0])
self.neo.expectDown(started[1])
self.neo.expectDown(started[2])
self.neo.expectOudatedCells(number=20)
self.neo.expectClusterRecovering()
def testConflictingStorageRejected(self):
""" Check that a storage coming after the recovery process with the same
UUID as another already running is refused """
# start with one storage
(started, stopped) = self.__setup(storage_number=2, pending_number=1)
self.neo.expectRunning(started[0])
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
# start the second with the same UUID as the first
stopped[0].setUUID(started[0].getUUID())
stopped[0].start()
self.neo.expectOudatedCells(number=0)
# check the first and the cluster are still running
self.neo.expectRunning(started[0])
self.neo.expectClusterRunning()
# XXX: should wait for the storage rejection
# check that no node were added
storage_number = len(self.neo.getStorageList())
self.assertEqual(storage_number, 1)
def testPartitionTableReorganizedWithNewStorage(self):
""" Check if the partition change when adding a new storage to a cluster
with one storage and no replicas """
# start with one storage and no replicas
(started, stopped) = self.__setup(storage_number=2, pending_number=1,
partitions=10, replicas=0)
self.neo.expectRunning(started[0])
self.neo.expectClusterRunning()
self.neo.expectAssignedCells(started[0], 10)
self.neo.expectOudatedCells(number=0)
# start the second and add it to the partition table
stopped[0].start()
self.neo.expectPending(stopped[0])
self.neo.neoctl.enableStorageList([stopped[0].getUUID()])
self.neo.neoctl.tweakPartitionTable()
self.neo.expectRunning(stopped[0])
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
# the partition table must change, each node should be assigned to
# five partitions
self.neo.expectAssignedCells(started[0], 5)
self.neo.expectAssignedCells(stopped[0], 5)
def testPartitionTableReorganizedAfterDrop(self):
""" Check that the partition change when dropping a replicas from a
cluster with two storages """
# start with two storage / one replicas
(started, stopped) = self.__setup(storage_number=2, replicas=1,
partitions=10, pending_number=0)
self.neo.expectRunning(started[0])
self.neo.expectRunning(started[1])
self.neo.expectOudatedCells(number=0)
self.neo.expectAssignedCells(started[0], 10)
self.neo.expectAssignedCells(started[1], 10)
# kill one storage, it should be set as unavailable
started[0].stop()
self.neo.expectDown(started[0])
self.neo.expectRunning(started[1])
# and the partition table must not change
self.neo.expectAssignedCells(started[0], 10)
self.neo.expectAssignedCells(started[1], 10)
# ask neoctl to drop it
self.neo.neoctl.dropNode(started[0].getUUID())
self.neo.expectStorageUnknown(started[0])
self.neo.expectAssignedCells(started[0], 0)
self.neo.expectAssignedCells(started[1], 10)
self.assertRaises(SystemExit, self.neo.neoctl.dropNode,
started[1].getUUID())
self.neo.expectClusterRunning()
def testReplicationThenRunningWithReplicas(self):
""" Add a replicas to a cluster, wait for the replication to finish,
shutdown the first storage then check the new storage content """
# start with one storage
(started, stopped) = self.__setup(storage_number=2, replicas=1,
pending_number=1, partitions=10)
self.neo.expectRunning(started[0])
self.neo.expectStorageUnknown(stopped[0])
self.neo.expectOudatedCells(number=0)
# populate the cluster with some data
self.__populate()
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
self.neo.expectAssignedCells(started[0], 10)
self.__checkDatabase(self.neo.db_list[0])
# add a second storage
stopped[0].start()
self.neo.expectPending(stopped[0])
self.neo.neoctl.enableStorageList([stopped[0].getUUID()])
self.neo.neoctl.tweakPartitionTable()
self.neo.expectRunning(stopped[0])
self.neo.expectClusterRunning()
self.neo.expectAssignedCells(started[0], 10)
self.neo.expectAssignedCells(stopped[0], 10)
# wait for replication to finish
self.neo.expectOudatedCells(number=0)
self.neo.expectClusterRunning()
self.__checkReplicationDone()
# kill the first storage
started[0].stop()
self.neo.expectDown(started[0])
self.neo.expectOudatedCells(number=10)
self.neo.expectAssignedCells(started[0], 10)
self.neo.expectAssignedCells(stopped[0], 10)
self.neo.expectClusterRunning()
self.__checkDatabase(self.neo.db_list[0])
# drop it from partition table
self.neo.neoctl.dropNode(started[0].getUUID())
self.neo.expectStorageUnknown(started[0])
self.neo.expectRunning(stopped[0])
self.neo.expectAssignedCells(started[0], 0)
self.neo.expectAssignedCells(stopped[0], 10)
self.__checkDatabase(self.neo.db_list[1])
def testStartWithManyPartitions(self):
""" Just tests that cluster can start with more than 1000 partitions.
1000, because currently there is an arbitrary packet split at
every 1000 partition when sending a partition table. """
self.__setup(storage_number=2, partitions=5000, master_count=1)
self.neo.expectClusterState(ClusterStates.RUNNING)
def testRecoveryWithMultiplePT(self):
# start a cluster with 2 storages and a replica
(started, stopped) = self.__setup(storage_number=2, replicas=1,
pending_number=0, partitions=10)
self.neo.expectRunning(started[0])
self.neo.expectRunning(started[1])
self.neo.expectOudatedCells(number=0)
self.neo.expectClusterRunning()
# drop the first then the second storage
started[0].stop()
self.neo.expectDown(started[0])
self.neo.expectRunning(started[1])
self.neo.expectOudatedCells(number=10)
started[1].stop()
self.neo.expectDown(started[0])
self.neo.expectDown(started[1])
self.neo.expectOudatedCells(number=10)
self.neo.expectClusterRecovering()
# XXX: need to sync with storages first
self.neo.stop()
# restart the cluster with the first storage killed
self.neo.run(except_storages=[started[1]])
self.neo.expectPending(started[0])
self.neo.expectDown(started[1])
self.neo.expectClusterRecovering()
# Cluster doesn't know there are outdated cells
self.neo.expectOudatedCells(number=0)
started[1].start()
self.neo.expectRunning(started[0])
self.neo.expectRunning(started[1])
self.neo.expectClusterRunning()
self.neo.expectOudatedCells(number=0)
if __name__ == "__main__":
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/ 0000775 0000000 0000000 00000000000 13465024610 0024461 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0026560 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/testClientHandler.py 0000664 0000000 0000000 00000007061 13465024610 0030453 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.lib.util import p64
from neo.lib.protocol import NodeTypes, NodeStates, Packets
from neo.master.app import Application
from neo.master.handlers.client import ClientServiceHandler
class MasterClientHandlerTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
# create an application object
config = self.getMasterConfiguration(master_number=1, replicas=1)
self.app = Application(config)
self.app.em.close()
self.app.em = Mock()
self.app.loid = '\0' * 8
self.app.tm.setLastTID('\0' * 8)
self.service = ClientServiceHandler(self.app)
# define some variable to simulate client and storage node
self.client_port = 11022
self.storage_port = 10021
self.client_address = ('127.0.0.1', self.client_port)
self.storage_address = ('127.0.0.1', self.storage_port)
self.storage_uuid = self.getStorageUUID()
# register the storage
self.app.nm.createStorage(
uuid=self.storage_uuid,
address=self.storage_address,
)
def identifyToMasterNode(self, node_type=NodeTypes.STORAGE, ip="127.0.0.1",
port=10021):
"""Do first step of identification to MN """
# register the master itself
uuid = self.getNewUUID(node_type)
self.app.nm.createFromNodeType(
node_type,
address=(ip, port),
uuid=uuid,
state=NodeStates.RUNNING,
)
return uuid
def test_askPack(self):
self.assertEqual(self.app.packing, None)
self.app.nm.createClient()
tid = self.getNextTID()
peer_id = 42
conn = self.getFakeConnection(peer_id=peer_id)
storage_uuid = self.storage_uuid
storage_conn = self.getFakeConnection(storage_uuid,
self.storage_address, is_server=True)
self.app.nm.getByUUID(storage_uuid).setConnection(storage_conn)
self.service.askPack(conn, tid)
self.checkNoPacketSent(conn)
ptid = self.checkAskPacket(storage_conn, Packets.AskPack)._args[0]
self.assertEqual(ptid, tid)
self.assertTrue(self.app.packing[0] is conn)
self.assertEqual(self.app.packing[1], peer_id)
self.assertEqual(self.app.packing[2], {storage_uuid})
# Asking again to pack will cause an immediate error
storage_uuid = self.identifyToMasterNode(port=10022)
storage_conn = self.getFakeConnection(storage_uuid,
self.storage_address, is_server=True)
self.app.nm.getByUUID(storage_uuid).setConnection(storage_conn)
self.service.askPack(conn, tid)
self.checkNoPacketSent(storage_conn)
status = self.checkAnswerPacket(conn, Packets.AnswerPack)._args[0]
self.assertFalse(status)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/testMasterApp.py 0000664 0000000 0000000 00000006270 13465024610 0027634 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from .. import NeoUnitTestBase
from neo.lib.protocol import Packets
from neo.master.app import Application
class MasterAppTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
# create an application object
config = self.getMasterConfiguration()
self.app = Application(config)
def _tearDown(self, success):
self.app.close()
NeoUnitTestBase._tearDown(self, success)
def checkNotifyNodeInformation(self, conn):
return self.checkNotifyPacket(conn, Packets.NotifyNodeInformation)
def test_06_broadcastNodeInformation(self):
# defined some nodes to which data will be send
master_uuid = self.getMasterUUID()
master = self.app.nm.createMaster(uuid=master_uuid)
storage_uuid = self.getStorageUUID()
storage = self.app.nm.createStorage(uuid=storage_uuid)
client_uuid = self.getClientUUID()
client = self.app.nm.createClient(uuid=client_uuid)
# create conn and patch em
master_conn = self.getFakeConnection()
storage_conn = self.getFakeConnection(is_server=True)
client_conn = self.getFakeConnection(is_server=True)
master.setConnection(master_conn)
storage.setConnection(storage_conn)
client.setConnection(client_conn)
self.app.nm.add(storage)
self.app.nm.add(client)
# no address defined, not send to client node
c_node = self.app.nm.createClient(uuid=self.getClientUUID())
self.app.broadcastNodesInformation([c_node])
# check conn
self.checkNoPacketSent(client_conn)
self.checkNoPacketSent(master_conn)
self.checkNotifyNodeInformation(storage_conn)
# address defined and client type
s_node = self.app.nm.createClient(
uuid=self.getClientUUID(),
address=("127.1.0.1", 3361)
)
self.app.broadcastNodesInformation([c_node])
# check conn
self.checkNoPacketSent(client_conn)
self.checkNoPacketSent(master_conn)
self.checkNotifyNodeInformation(storage_conn)
# address defined and storage type
s_node = self.app.nm.createStorage(
uuid=self.getStorageUUID(),
address=("127.0.0.1", 1351)
)
self.app.broadcastNodesInformation([s_node])
# check conn
self.checkNotifyNodeInformation(client_conn)
self.checkNoPacketSent(master_conn)
self.checkNotifyNodeInformation(storage_conn)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/testMasterPT.py 0000664 0000000 0000000 00000036417 13465024610 0027445 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import random, time, unittest
from collections import Counter, defaultdict
from .. import NeoUnitTestBase
from neo.lib import logging
from neo.lib.protocol import NodeStates, CellStates
from neo.lib.pt import PartitionTableException
from neo.master.pt import PartitionTable
class MasterPartitionTableTests(NeoUnitTestBase):
def test_02_PartitionTable_creation(self):
num_partitions = 5
num_replicas = 3
pt = PartitionTable(num_partitions, num_replicas)
self.assertEqual(pt.np, num_partitions)
self.assertEqual(pt.nr, num_replicas)
self.assertEqual(pt.num_filled_rows, 0)
partition_list = pt.partition_list
self.assertEqual(len(partition_list), num_partitions)
for x in xrange(num_partitions):
part = partition_list[x]
self.assertTrue(isinstance(part, list))
self.assertEqual(len(part), 0)
self.assertEqual(len(pt.count_dict), 0)
# no nodes or cells for now
self.assertFalse(pt.getNodeSet())
for x in xrange(num_partitions):
self.assertEqual(len(pt.getCellList(x)), 0)
self.assertEqual(len(pt.getCellList(x, True)), 0)
self.assertEqual(len(pt.getRow(x)), 0)
self.assertFalse(pt.operational())
self.assertFalse(pt.filled())
self.assertRaises(AssertionError, pt.make, [])
self.assertFalse(pt.operational())
self.assertFalse(pt.filled())
def test_13_outdate(self):
# create nodes
uuid1 = self.getStorageUUID()
server1 = ("127.0.0.1", 19001)
sn1 = self.createStorage(server1, uuid1)
uuid2 = self.getStorageUUID()
server2 = ("127.0.0.2", 19002)
sn2 = self.createStorage(server2, uuid2)
uuid3 = self.getStorageUUID()
server3 = ("127.0.0.3", 19003)
sn3 = self.createStorage(server3, uuid3)
uuid4 = self.getStorageUUID()
server4 = ("127.0.0.4", 19004)
sn4 = self.createStorage(server4, uuid4)
# create partition table
num_partitions = 4
num_replicas = 3
pt = PartitionTable(num_partitions, num_replicas)
pt._setCell(0, sn1, CellStates.OUT_OF_DATE)
sn1.setState(NodeStates.RUNNING)
pt._setCell(1, sn2, CellStates.UP_TO_DATE)
sn2.setState(NodeStates.DOWN)
pt._setCell(2, sn3, CellStates.UP_TO_DATE)
sn3.setState(NodeStates.UNKNOWN)
pt._setCell(3, sn4, CellStates.UP_TO_DATE)
sn4.setState(NodeStates.RUNNING)
# outdate nodes
cells_outdated = pt.outdate()
self.assertEqual(len(cells_outdated), 2)
for offset, uuid, state in cells_outdated:
self.assertIn(offset, (1, 2))
self.assertIn(uuid, (uuid2, uuid3))
self.assertEqual(state, CellStates.OUT_OF_DATE)
# check each cell
# part 1, already outdated
cells = pt.getCellList(0)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 2, must be outdated
cells = pt.getCellList(1)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 3, must be outdated
cells = pt.getCellList(2)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.OUT_OF_DATE)
# part 4, remains running
cells = pt.getCellList(3)
self.assertEqual(len(cells), 1)
cell = cells[0]
self.assertEqual(cell.getState(), CellStates.UP_TO_DATE)
def test_15_dropNodeList(self):
sn = [self.createStorage(None, i + 1, NodeStates.RUNNING)
for i in xrange(3)]
pt = PartitionTable(3, 0)
pt._setCell(0, sn[0], CellStates.OUT_OF_DATE)
pt._setCell(1, sn[1], CellStates.FEEDING)
pt._setCell(1, sn[2], CellStates.OUT_OF_DATE)
pt._setCell(2, sn[0], CellStates.OUT_OF_DATE)
pt._setCell(2, sn[1], CellStates.FEEDING)
pt._setCell(2, sn[2], CellStates.UP_TO_DATE)
self.assertEqual(sorted(pt.dropNodeList(sn[:1], True)), [
(0, 1, CellStates.DISCARDED),
(2, 1, CellStates.DISCARDED),
(2, 2, CellStates.UP_TO_DATE)])
self.assertEqual(sorted(pt.dropNodeList(sn[2:], True)), [
(1, 2, CellStates.UP_TO_DATE),
(1, 3, CellStates.DISCARDED),
(2, 2, CellStates.UP_TO_DATE),
(2, 3, CellStates.DISCARDED)])
self.assertRaises(PartitionTableException, pt.dropNodeList, sn[1:2])
pt._setCell(1, sn[2], CellStates.UP_TO_DATE)
self.assertEqual(sorted(pt.dropNodeList(sn[1:2])), [
(1, 2, CellStates.DISCARDED),
(2, 2, CellStates.DISCARDED)])
pt._setCell(0, sn[0], CellStates.UP_TO_DATE)
self.assertEqual(self.tweak(pt), [(2, 3, CellStates.FEEDING)])
def test_16_make(self):
node_list = [self.createStorage(
("127.0.0.1", 19000 + i), self.getStorageUUID(),
NodeStates.RUNNING)
for i in xrange(4)]
for np, nr, expected in (
(3, 0, 'U..|.U.|..U'),
(5, 1, 'UU..|..UU|UU..|..UU|UU..'),
(9, 2, 'UUU.|UU.U|U.UU|.UUU|UUU.|UU.U|U.UU|.UUU|UUU.'),
):
pt = PartitionTable(np, nr)
pt.make(node_list)
self.assertPartitionTable(pt, expected)
self.assertTrue(pt.filled())
self.assertTrue(pt.operational())
# create a pt with less nodes
pt.clear()
self.assertFalse(pt.filled())
self.assertFalse(pt.operational())
pt.make(node_list[:1])
self.assertPartitionTable(pt, '|'.join('U' * np))
self.assertTrue(pt.filled())
self.assertTrue(pt.operational())
def update(self, pt, change_list=None):
offset_list = xrange(pt.np)
for node in pt.count_dict:
pt.updatable(node.getUUID(), offset_list)
if change_list is None:
for offset, row in enumerate(pt.partition_list):
for cell in list(row):
if cell.isOutOfDate():
pt.setUpToDate(cell.getNode(), offset)
else:
node_dict = {x.getUUID(): x for x in pt.count_dict}
for offset, uuid, state in change_list:
if state is CellStates.OUT_OF_DATE:
pt.setUpToDate(node_dict[uuid], offset)
pt.log()
def tweak(self, pt, drop_list=()):
change_list = pt.tweak(drop_list)
pt.log()
self.assertFalse(pt.tweak(drop_list))
return change_list
def test_17_tweak(self):
sn = [self.createStorage(None, i + 1, NodeStates.RUNNING)
for i in xrange(5)]
pt = PartitionTable(5, 2)
pt.setID(1)
# part 0
pt._setCell(0, sn[0], CellStates.DISCARDED)
pt._setCell(0, sn[1], CellStates.UP_TO_DATE)
# part 1
pt._setCell(1, sn[0], CellStates.FEEDING)
pt._setCell(1, sn[1], CellStates.FEEDING)
pt._setCell(1, sn[2], CellStates.OUT_OF_DATE)
# part 2
pt._setCell(2, sn[0], CellStates.FEEDING)
pt._setCell(2, sn[1], CellStates.UP_TO_DATE)
pt._setCell(2, sn[2], CellStates.UP_TO_DATE)
# part 3
pt._setCell(3, sn[0], CellStates.UP_TO_DATE)
pt._setCell(3, sn[1], CellStates.UP_TO_DATE)
pt._setCell(3, sn[2], CellStates.UP_TO_DATE)
pt._setCell(3, sn[3], CellStates.UP_TO_DATE)
# part 4
pt._setCell(4, sn[0], CellStates.UP_TO_DATE)
pt._setCell(4, sn[4], CellStates.UP_TO_DATE)
count_dict = defaultdict(int)
self.assertPartitionTable(pt, (
'.U...',
'FFO..',
'FUU..',
'UUUU.',
'U...U'))
change_list = self.tweak(pt)
self.assertPartitionTable(pt, (
'.UO.O',
'UU.O.',
'UFU.O',
'.UUU.',
'U..OU'))
for offset, uuid, state in change_list:
count_dict[state] += 1
self.assertEqual(count_dict, {CellStates.DISCARDED: 2,
CellStates.FEEDING: 1,
CellStates.OUT_OF_DATE: 5,
CellStates.UP_TO_DATE: 3})
self.update(pt)
self.assertPartitionTable(pt, (
'.UU.U',
'UU.U.',
'U.U.U',
'.UUU.',
'U..UU'))
self.assertRaises(PartitionTableException, pt.dropNodeList, sn[1:4])
self.assertEqual(6, len(pt.dropNodeList(sn[1:3], True)))
self.assertEqual(3, len(pt.dropNodeList([sn[1]])))
pt.addNodeList([sn[1]])
self.assertPartitionTable(pt, (
'..U.U',
'U..U.',
'U.U.U',
'..UU.',
'U..UU'))
change_list = self.tweak(pt)
self.assertPartitionTable(pt, (
'.OU.U',
'UO.U.',
'U.U.U',
'.OUU.',
'U..UU'))
self.assertEqual(3, len(change_list))
self.update(pt, change_list)
for np, i, expected in (
(12, 0, ('U...|.U..|..U.|...U|'
'U...|.U..|..U.|...U|'
'U...|.U..|..U.|...U',)),
(12, 1, ('UU...|..UU.|U...U|.UU..|...UU|'
'UU...|..UU.|U...U|.UU..|...UU|'
'UU...|..UU.',)),
(13, 2, ('U.UU.|.U.UU|UUU..|..UUU|UU..U|'
'U.UU.|.U.UU|UUU..|..UUU|UU..U|'
'U.UU.|.U.UU|UUU..',
'UUU..|U..UU|.UUU.|UU..U|..UUU|'
'UUU..|U..UU|.UUU.|UU..U|..UUU|'
'UUU..|U..UU|.UUU.')),
):
pt = PartitionTable(np, i)
i += 1
pt.make(sn[:i])
pt.log()
for n in sn[i:i+3]:
self.assertEqual([n], pt.addNodeList([n]))
self.update(pt, self.tweak(pt))
self.assertPartitionTable(pt, expected[0])
pt.clear()
pt.make(sn[:i])
for n in sn[i:i+3]:
self.assertEqual([n], pt.addNodeList([n]))
self.tweak(pt)
self.update(pt)
self.assertPartitionTable(pt, expected[-1])
pt = PartitionTable(7, 0)
pt.make(sn[:1])
pt.addNodeList(sn[1:3])
self.assertPartitionTable(pt, 'U..|U..|U..|U..|U..|U..|U..')
self.update(pt, self.tweak(pt, sn[:1]))
# See note in PartitionTable.tweak() about drop_list.
#self.assertPartitionTable(pt,'.U.|..U|.U.|..U|.U.|..U|.U.')
self.assertPartitionTable(pt, 'UU.|U.U|UU.|U.U|UU.|U.U|UU.')
def test_18_tweakBigPT(self):
seed = repr(time.time())
logging.info("using seed %r", seed)
sn_count = 11
sn = [self.createStorage(None, i + 1, NodeStates.RUNNING)
for i in xrange(sn_count)]
for topo in 0, 1:
r = random.Random(seed)
if topo:
for i, s in enumerate(sn, sn_count):
s.devpath = str(i % 5),
pt = PartitionTable(1000, 2)
pt.setID(1)
for offset in xrange(pt.np):
state = CellStates.UP_TO_DATE
k = r.randrange(1, sn_count)
for s in r.sample(sn, k):
pt._setCell(offset, s, state)
if k * r.random() < 1:
state = CellStates.OUT_OF_DATE
pt.log()
self.tweak(pt)
self.update(pt)
def test_19_topology(self):
sn_count = 16
sn = [self.createStorage(None, i + 1, NodeStates.RUNNING)
for i in xrange(sn_count)]
pt = PartitionTable(48, 2)
pt.make(sn)
pt.log()
for i, s in enumerate(sn, sn_count):
s.devpath = tuple(bin(i)[3:-1])
self.assertEqual(Counter(x[2] for x in self.tweak(pt)), {
CellStates.OUT_OF_DATE: 96,
CellStates.FEEDING: 96,
})
self.update(pt)
x = lambda n, *x: ('|'.join(x[:1]*n), '|'.join(x[1:]*n))
for even, np, i, topo, expected in (
## Optimal topology.
# All nodes have same number of cells.
(1, 2, 2, ("00", "01", "02", "10", "11", "12"), ('UU...U|..UUU.',
'UU.U..|..U.UU')),
(1, 7, 1, "0001122", (
'U.....U|.U.U...|..U.U..|U....U.|.U....U|..UU...|....UU.',
'U..U...|.U...U.|..U.U..|U.....U|.U.U...|..U..U.|....U.U')),
(1, 4, 1, "00011122", ('U......U|.U.U....|..U.U...|.....UU.',
'U..U....|.U..U...|..U...U.|.....U.U')),
(1, 9, 1, "000111222", ('U.......U|.U.U.....|..U.U....|'
'.....UU..|U......U.|.U......U|'
'..UU.....|....U.U..|.....U.U.',
'U..U.....|.U....U..|..U.U....|'
'.....U.U.|U.......U|.U.U.....|'
'..U...U..|....U..U.|.....U..U')),
# Some nodes have a extra cell.
(0, 8, 1, "0001122", ('U.....U|.U.U...|..U.U..|U....U.|'
'.U....U|..UU...|....UU.|U.....U',
'U..U...|.U...U.|..U.U..|U.....U|'
'.U.U...|..U..U.|....U.U|U..U...')),
## Topology ignored.
(1, 6, 1, ("00", "01", "1"), 'UU.|U.U|.UU|UU.|U.U|.UU'),
(1, 5, 2, "01233", 'UUU..|U..UU|.UUU.|UU..U|..UUU'),
):
assert len(topo) <= sn_count
sn2 = sn[:len(topo)]
for s in sn2:
s.devpath = ()
k = (1,7)[even]
pt = PartitionTable(np*k, i)
pt.make(sn2)
for devpath, s in zip(topo, sn2):
s.devpath = tuple(devpath)
if type(expected) is tuple:
self.assertTrue(self.tweak(pt))
self.update(pt)
self.assertPartitionTable(pt, '|'.join(expected[:1]*k))
pt.clear()
pt.make(sn2)
self.assertPartitionTable(pt, '|'.join(expected[1:]*k))
self.assertFalse(pt.tweak())
else:
expected = '|'.join((expected,)*k)
self.assertFalse(pt.tweak())
self.assertPartitionTable(pt, expected)
pt.clear()
pt.make(sn2)
self.assertPartitionTable(pt, expected)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/testStorageHandler.py 0000664 0000000 0000000 00000005646 13465024610 0030650 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes, Packets
from neo.master.app import Application
from neo.master.handlers.storage import StorageServiceHandler
class MasterStorageHandlerTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
# create an application object
config = self.getMasterConfiguration(master_number=1, replicas=1)
self.app = Application(config)
self.app.em.close()
self.app.em = Mock()
self.service = StorageServiceHandler(self.app)
def _allocatePort(self):
self.port = getattr(self, 'port', 1000) + 1
return self.port
def _getStorage(self):
return self.identifyToMasterNode(node_type=NodeTypes.STORAGE,
ip='127.0.0.1', port=self._allocatePort())
def identifyToMasterNode(self, node_type=NodeTypes.STORAGE, ip="127.0.0.1",
port=10021):
"""Do first step of identification to MN
"""
nm = self.app.nm
uuid = self.getNewUUID(node_type)
node = nm.createFromNodeType(node_type, address=(ip, port),
uuid=uuid)
conn = self.getFakeConnection(node.getUUID(), node.getAddress(), True)
node.setConnection(conn)
return (node, conn)
def test_answerPack(self):
# Note: incoming status has no meaning here, so it's left to False.
node1, conn1 = self._getStorage()
node2, conn2 = self._getStorage()
self.app.packing = None
# Does nothing
self.service.answerPack(None, False)
client_conn = Mock({
'getPeerId': 512,
})
client_peer_id = 42
self.app.packing = (client_conn, client_peer_id,
{conn1.getUUID(), conn2.getUUID()})
self.service.answerPack(conn1, False)
self.checkNoPacketSent(client_conn)
self.assertEqual(self.app.packing[2], {conn2.getUUID()})
self.service.answerPack(conn2, False)
packet = self.checkNotifyPacket(client_conn, Packets.AnswerPack)
# TODO: verify packet peer id
self.assertTrue(packet._args[0])
self.assertEqual(self.app.packing, None)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/master/testTransactions.py 0000664 0000000 0000000 00000005152 13465024610 0030406 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2006-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock
from struct import pack
from .. import NeoUnitTestBase
from neo.lib.protocol import NodeTypes
from neo.lib.util import packTID, unpackTID, addTID
from neo.master.transactions import TransactionManager
class testTransactionManager(NeoUnitTestBase):
def makeNode(self, node_type):
uuid = self.getNewUUID(node_type)
node = Mock({'getUUID': uuid, '__hash__': uuid, '__repr__': 'FakeNode'})
return uuid, node
def testTIDUtils(self):
"""
Tests packTID/unpackTID/addTID.
"""
min_tid = pack('!LL', 0, 0)
min_unpacked_tid = ((1900, 1, 1, 0, 0), 0)
max_tid = pack('!LL', 2**32 - 1, 2 ** 32 - 1)
# ((((9917 - 1900) * 12 + (10 - 1)) * 31 + (14 - 1)) * 24 + 4) * 60 +
# 15 == 2**32 - 1
max_unpacked_tid = ((9917, 10, 14, 4, 15), 2**32 - 1)
self.assertEqual(unpackTID(min_tid), min_unpacked_tid)
self.assertEqual(unpackTID(max_tid), max_unpacked_tid)
self.assertEqual(packTID(*min_unpacked_tid), min_tid)
self.assertEqual(packTID(*max_unpacked_tid), max_tid)
self.assertEqual(addTID(min_tid, 1), pack('!LL', 0, 1))
self.assertEqual(addTID(pack('!LL', 0, 2**32 - 1), 1),
pack('!LL', 1, 0))
self.assertEqual(addTID(pack('!LL', 0, 2**32 - 1), 2**32 + 1),
pack('!LL', 2, 0))
# Check impossible dates are avoided (2010/11/31 doesn't exist)
self.assertEqual(
unpackTID(addTID(packTID((2010, 11, 30, 23, 59), 2**32 - 1), 1)),
((2010, 12, 1, 0, 0), 0))
def testClientDisconectsAfterBegin(self):
client_uuid1, node1 = self.makeNode(NodeTypes.CLIENT)
tm = TransactionManager(None)
tid1 = self.getNextTID()
tid2 = self.getNextTID()
tm.begin(node1, 0, tid1)
self.assertEqual(1, len(list(tm.clientLost(node1))))
self.assertNotIn(tid1, tm)
if __name__ == '__main__':
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/node.crt 0000664 0000000 0000000 00000001664 13465024610 0024634 0 ustar 00root root 0000000 0000000 -----BEGIN CERTIFICATE-----
MIICkDCCAXgCAQEwDQYJKoZIhvcNAQELBQAwDTELMAkGA1UEAwwCQ0EwHhcNMTUw
OTMwMTM1NDMwWhcNMjEwMzIyMTM1NDMwWjAPMQ0wCwYDVQQDDARub2RlMIIBIjAN
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuJ+ClJyjhJOJdGUyqHn79opMLP3m
1g27uBWKT+OGd4FcreVoRDxPVuxZxMtDCZcBfUHVvOoSlS06khwSxViEe1hxwHRa
n2qMWlwvaWeNY0CFH5V+DI4XSNojgny85Lb5jB69FuPcrHnwxLk2OFntrXEeNbEa
d7QSoNbPajbJIp5BS/WR9iu5Z5JYdumLWjTvOU+eZc4iA6Wa2kdDtbqkGi4wOJ1L
/ggATL+p+QFcubVptztPT8vq7gvDdGJgXLJ2lPHV0V/sdJB1FB4mSJEDDjSm2Hpp
qPVJSO1GrAy5Ld+0SnXIZZejhIUJIumocY08r+vzDSQ/8NnqXR4Odz1TWwIDAQAB
MA0GCSqGSIb3DQEBCwUAA4IBAQBlYkkInDDDcgnNRdUmzxwejs1PmEehZ3H5FkMp
TsmpoVC+oqM+QywMu8UJRtCjXnnJdAUbVYuZ1Tjm7qvFIhN+5OlIVxJ+8WcmZPSe
lj0N7Dv2nE1diTDS+qPZVPZ0demo1LafRmPomPWiM/CQRlMPxXnimuiYOROhWGn6
jsyoOwquMkAc6Ub++l4OCxLAP0eTgJFkivmqpaYZXG4o7zFvcQ3rQ66rQrMl69sR
8/MVqbT5Sq1CEJbepP4GaFfa5l3CVy7WH2MhCV1/9mNwcXafkTgx3q2HsPon4Dze
kNwiguNAM4L/j4dbIwz+CIVWcgpBCrfv2JYu+jGlRpxIDeWR
-----END CERTIFICATE-----
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/node.key 0000664 0000000 0000000 00000003250 13465024610 0024625 0 ustar 00root root 0000000 0000000 -----BEGIN PRIVATE KEY-----
MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQC4n4KUnKOEk4l0
ZTKoefv2ikws/ebWDbu4FYpP44Z3gVyt5WhEPE9W7FnEy0MJlwF9QdW86hKVLTqS
HBLFWIR7WHHAdFqfaoxaXC9pZ41jQIUflX4MjhdI2iOCfLzktvmMHr0W49ysefDE
uTY4We2tcR41sRp3tBKg1s9qNskinkFL9ZH2K7lnklh26YtaNO85T55lziIDpZra
R0O1uqQaLjA4nUv+CABMv6n5AVy5tWm3O09Py+ruC8N0YmBcsnaU8dXRX+x0kHUU
HiZIkQMONKbYemmo9UlI7UasDLkt37RKdchll6OEhQki6ahxjTyv6/MNJD/w2epd
Hg53PVNbAgMBAAECggEACCWh0YUIAjWwfx5oGd8oKzz3F5Usto1slzb8H4Je0K84
s8PH0hsHNULAw1pk3ut2+VwKXofFwid4yjHK8kJxti+09VUtGpPNFb+zp+cV6idS
uI4UPeGXTXOy1XNDsqQQZwqENZpghovrAANcTShKwLiZKK1kCZw8pjRUes9oGMrn
zLXvNPgpJbRUPPYFa34jJyaJtiBEgIBWBmAeEk4ccTYFk4Kon93Ljmobm/6H8WZE
bxomzJjFVMSjW/NmMWnalX5H7GegJVOtkOGvHf25dHBdbdU772ReY4PRZJEGwn8E
QcZjzaKkLB34IlbjWjH0nnanNa6DyjhulaqkHjAtIQKBgQDf3zPefVHss4FrajU8
Nvch6mpayOeJ3RzOAiMzmfI/O3zcJqqrlOOKQjvP5feAoLOJTcvu4hAMfUix/7LR
Qag5nALIllox6QnoMWmvKUbcekXunf/PUVFQ7SC3RJKtqh5oq5wRGJWcDPP6/Okk
t+64NeQOT3lPs2LNyy7KpB5YlQKBgQDTHln2fYKCLnJ66+3QPjDSJrwdUD6HFiwp
X5BfNUKpnxged+buFbgp5TB2vu6Z5A3AxwmWXBFiuvHLpl3uSAEIRbFQ+elagUXa
BoiPOnq1b00X/Vdkd3sX4czVemIG9DIA1uo31Gd9fFWMLHZGBOwqgNKiJOgQZXTq
dX/tAGrQLwKBgGt59aXf1j/j4cMWxx30aWq/5nVVNEtsetKwFgRE6RbQUV5DtfYP
0bljmOFzTwJSpD7LuZcisn+8efTyg/+QHNojevafsAd8EISHjGxKTbm1ffNTqSb3
rClE3kr9wclb/aNUl+VhPxoe4dbiKm+1WgbX4He6Ucwgm9OeswUYC3WNAoGAK1i/
/+wlL7V5q+NlIKykOYHafepL7FCRIK2OZv34gfs4aIkV0SyEc5WrLbZmJxK8ACjd
vxGIQE1B+B5gitwd2iT1Ezs8vmhsfyd4QnAvYbFIkvRhTS97BpxGAk7ucZ8R5To7
PNtPpGQy7GT0o8u+8bshhEkvnK44Iyuc6Hx9ceECgYAjM1jEreHzi96v8pmneJja
VIoNNu2PoTS1nTWN+/6//B62GxpKM5UCYZDRDC8urPXb6tXSC29I104yrNJQEGXz
/acVGnA6GJV639pXz++hf2NMHKandlKY+Cz2euZT/qUU80W+B5korsubIS6ATeee
+QU2yqNaC1ZvTO2L1OLYRg==
-----END PRIVATE KEY-----
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/protocol 0000664 0000000 0000000 00000006144 13465024610 0024757 0 ustar 00root root 0000000 0000000 # generated by running the whole test suite with -p
AbortTransaction(p64,[int])
AcceptIdentification(NodeTypes,?int,?int)
AddObject(p64,p64,int,bin,bin,?p64)
AddPendingNodes([int])
AddTransaction(p64,bin,bin,bin,bool,p64,[p64])
AnswerBeginTransaction(p64)
AnswerCheckCurrentSerial(?p64)
AnswerCheckSerialRange(int,bin,p64,bin,p64)
AnswerCheckTIDRange(int,bin,p64)
AnswerClusterState(?ClusterStates)
AnswerFetchObjects(?,?p64,?p64,{:})
AnswerFetchTransactions(?,?p64,[])
AnswerFinalTID(p64)
AnswerInformationLocked(p64)
AnswerLastIDs(?p64,?p64)
AnswerLastTransaction(p64)
AnswerLockedTransactions({p64:?p64})
AnswerNewOIDs([p64])
AnswerNodeList([(NodeTypes,?(bin,int),?int,NodeStates,?float)])
AnswerObject(p64,p64,?p64,?int,bin,bin,?p64)
AnswerObjectHistory(p64,[(p64,int)])
AnswerObjectUndoSerial({p64:(p64,?p64,bool)})
AnswerPack(bool)
AnswerPartitionList(int,int,[[(int,CellStates)]])
AnswerPartitionTable(int,int,[[(int,CellStates)]])
AnswerPrimary(int)
AnswerRebaseObject(?(p64,p64,?(int,bin,bin)))
AnswerRebaseTransaction([p64])
AnswerRecovery(?int,?p64,?p64)
AnswerStoreObject(?p64)
AnswerStoreTransaction()
AnswerTIDs([p64])
AnswerTIDsFrom([p64])
AnswerTransactionFinished(p64,p64)
AnswerTransactionInformation(p64,bin,bin,bin,bool,[p64])
AnswerTweakPartitionTable(bool,[[(int,CellStates)]])
AnswerUnfinishedTransactions(p64,[p64])
AnswerVoteTransaction()
AskBeginTransaction(?p64)
AskCheckCurrentSerial(p64,p64,p64)
AskCheckSerialRange(int,int,p64,p64,p64)
AskCheckTIDRange(int,int,p64,p64)
AskClusterState()
AskFetchObjects(int,int,p64,p64,p64,{p64:[p64]})
AskFetchTransactions(int,int,p64,p64,[p64])
AskFinalTID(p64)
AskFinishTransaction(p64,[p64],[p64])
AskLastIDs()
AskLastTransaction()
AskLockInformation(p64,p64)
AskLockedTransactions()
AskNewOIDs(int)
AskNodeList(NodeTypes)
AskObject(p64,?p64,?p64)
AskObjectHistory(p64,int,int)
AskObjectUndoSerial(p64,p64,p64,[p64])
AskPack(p64)
AskPartitionList(int,int,?)
AskPartitionTable()
AskPrimary()
AskRebaseObject(p64,p64)
AskRebaseTransaction(p64,p64)
AskRecovery()
AskStoreObject(p64,p64,int,bin,bin,?p64,?p64)
AskStoreTransaction(p64,bin,bin,bin,[p64])
AskTIDs(int,int,int)
AskTIDsFrom(p64,p64,int,int)
AskTransactionInformation(p64)
AskUnfinishedTransactions([int])
AskVoteTransaction(p64)
CheckPartition(int,(bin,?(bin,int)),p64,p64)
CheckReplicas({int:?int},p64,?)
Error(int,bin)
FailedVote(p64,[int])
InvalidateObjects(p64,[p64])
NotPrimaryMaster(?int,[(bin,int)])
NotifyClusterInformation(ClusterStates)
NotifyDeadlock(p64,p64)
NotifyNodeInformation(float,[(NodeTypes,?(bin,int),?int,NodeStates,?float)])
NotifyPartitionChanges(int,int,[(int,int,CellStates)])
NotifyPartitionCorrupted(int,[int])
NotifyReady()
NotifyRepair(int)
NotifyReplicationDone(int,p64)
NotifyTransactionFinished(p64,p64)
NotifyUnlockInformation(p64)
Ping()
Pong()
Repair([int],int)
Replicate(p64,bin,{int:?(bin,int)})
RequestIdentification(NodeTypes,?int,?(bin,int),bin,?float,any,[int])
SendPartitionTable(?int,int,[[(int,CellStates)]])
SetClusterState(ClusterStates)
SetNodeState(int,NodeStates)
SetNumReplicas(int)
StartOperation(bool)
StopOperation()
Truncate(p64)
TweakPartitionTable(bool,[int])
ValidateTransaction(p64,p64)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/protocol_checker.py 0000664 0000000 0000000 00000015442 13465024610 0027073 0 ustar 00root root 0000000 0000000 # The use of ast is convoluted, and the result quite verbose,
# but that remains simpler than writing a parser from scratch.
import ast, os
from contextlib import contextmanager
from neo.lib.protocol import Packet, Enum
array = list, set, tuple
item = Enum.Item
class _ast(object):
def __getattr__(self, k):
v = lambda *args: getattr(ast, k)(lineno=0, col_offset=0, *args)
setattr(self, k, v)
return v
_ast = _ast()
class parseArgument(ast.NodeTransformer):
def visit_UnaryOp(self, node):
assert isinstance(node.op, ast.USub)
return _ast.Call(_ast.Name('option', ast.Load()),
[self.visit(node.operand)], [], None, None)
def visit_Name(self, node):
return _ast.Str(node.id.replace('_', '?'))
parseArgument = parseArgument().visit
class Argument(object):
merge = True
type = ''
option = False
@classmethod
def load(cls, arg):
arg = ast.parse(arg.rstrip()
.replace('?(', '-(').replace('?[', '-[').replace('?{', '-{')
.replace('?', '_').replace('[]', '[""]')
.replace('{:', '{"":').replace(':}', ':""}'),
mode="eval")
x = arg.body
name = x.func.id
arg.body = parseArgument(_ast.Tuple(x.args, ast.Load()))
return name, cls._load(eval(compile(arg, '', mode="eval"),
{'option': cls._option}))
@classmethod
def _load(cls, arg):
t = type(arg)
if t is cls:
return arg
x = object.__new__(cls)
if t is tuple:
x.type = map(cls._load, arg)
elif t is list:
x.type = cls._load(*arg),
elif t is dict:
(k, v), = arg.iteritems()
x.type = cls._load(k), cls._load(v)
else:
if arg.startswith('?'):
arg = arg[1:]
x.option = True
x.type = arg
return x
@classmethod
def _option(cls, arg):
arg = cls._load(arg)
arg.option = True
return arg
@classmethod
def _merge(cls, args):
if args:
x, = {cls(x) for x in args}
return x
return object.__new__(cls)
def __init__(self, arg, root=False):
if arg is None:
self.option = True
elif isinstance(arg, tuple) and (root or len(arg) > 1):
self.type = map(self.__class__, arg)
elif isinstance(arg, array):
self.type = self._merge(arg),
elif isinstance(arg, dict):
self.type = self._merge(arg), self._merge(arg.values())
else:
self.type = (('p64' if len(arg) == 8 else
'bin') if isinstance(arg, bytes) else
arg._enum._name if isinstance(arg, item) else
'str' if isinstance(arg, unicode) else
'int' if isinstance(arg, long) else
type(arg).__name__)
def __repr__(self):
x = self.type
if type(x) is tuple:
x = ('[%s]' if len(x) == 1 else '{%s:%s}') % x
elif type(x) is list:
x = '(%s)' % ','.join(map(repr, x))
return '?' + x if self.option else x
def __hash__(self):
return 0
def __eq__(self, other):
x = self.type
y = other.type
if x and y and x != 'any':
# Since we don't know whether an array is fixed-size record of
# heterogeneous values or a collection of homogeneous values,
# we end up with the following complicated heuristic.
t = type(x)
if t is tuple:
if len(x) == 1 and type(y) is list:
z = set(x)
z.update(y)
if len(z) == 1:
x = y = tuple(z)
if self.merge:
self.type = x
elif t is list:
if type(y) is tuple and len(y) == 1:
z = set(y)
z.update(x)
if len(z) == 1:
x = y = tuple(z)
if self.merge:
self.type = x
t = tuple
elif t is str is type(y) and {x, y}.issuperset(('bin', 'p64')):
x = y = 'bin'
if self.merge:
self.type = x
if not (t is type(y) and (t is not tuple or
len(x) == len(y)) and x == y):
if not self.merge:
return False
self.type = 'any'
if self.merge:
if not x:
self.type = y
if not self.option:
self.option = other.option
elif y and not x or other.option and not self.option:
return False
return True
class FrozenArgument(Argument):
merge = False
@contextmanager
def protocolChecker(dump):
x = 'Packet(p64,?[(bin,{int:})],{:?(?,[])},?{?:float})'
assert x == '%s%r' % Argument.load(x)
assert not (FrozenArgument([]) == Argument([0]))
path = os.path.join(os.path.dirname(__file__), 'protocol')
if dump:
import threading
from multiprocessing import Lock
lock = Lock()
schema = {}
pid = os.getpid()
r, w = os.pipe()
def _check(name, arg):
try:
schema[name] == arg
except KeyError:
schema[name] = arg
def check(name, args):
arg = Argument(args, True)
if pid == os.getpid():
_check(name, arg)
else:
with lock:
os.write(w, '%s%r\n' % (name, arg))
def check_thread(r):
for x in os.fdopen(r):
_check(*Argument.load(x))
check_thread = threading.Thread(target=check_thread, args=(r,))
check_thread.daemon = True
check_thread.start()
else:
with open(path) as p:
x = p.readline()
assert x[0] == '#', x
schema = dict(map(FrozenArgument.load, p))
def check(name, args):
arg = Argument(args, True)
if not (None is not schema.get(name) == arg):
raise Exception('invalid packet: %s%r' % (name, arg))
w = None
Packet_encode = Packet.__dict__['encode']
def encode(packet):
check(type(packet).__name__, packet._args)
return Packet_encode(packet)
Packet.encode = encode
try:
yield
finally:
Packet.encode = Packet_encode
if w:
os.close(w)
check_thread.join()
if dump:
with open(path, 'w') as p:
p.write('# generated by running the whole test suite with -p\n')
for x in sorted(schema.iteritems()):
p.write('%s%r\n' % x)
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/random_tree.py 0000664 0000000 0000000 00000006333 13465024610 0026044 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2018-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import hashlib, random
from collections import deque
from itertools import islice
from persistent import Persistent
from BTrees.IOBTree import IOBTree
from .stat_zodb import _DummyData
def generateTree(random=random):
tree = []
N = 5
fifo = deque()
path = ()
size = lambda: max(int(random.gauss(40,30)), 0)
while 1:
tree.extend(path + (i, size())
for i in xrange(-random.randrange(N), 0))
n = N * (1 - len(path)) + random.randrange(N)
for i in xrange(n):
fifo.append(path + (i,))
try:
path = fifo.popleft()
except IndexError:
break
change = tree
while change:
change = [x[:-1] + (size(),) for x in change if random.randrange(2)]
tree += change
random.shuffle(tree)
return tree
class Leaf(Persistent):
pass
Node = IOBTree
def importTree(root, tree, yield_interval=None, filter=None):
n = 0
for path in tree:
node = root
for i, x in enumerate(path[:-1], 1):
if filter and not filter(path[:i]):
break
if x < 0:
try:
node = node[x]
except KeyError:
node[x] = node = Leaf()
node.data = bytes(_DummyData(random.Random(path), path[-1]))
else:
try:
node = node[x]
continue
except KeyError:
node[x] = node = Node()
n += 1
if n == yield_interval:
n = 0
yield root
if n:
yield root
class hashTree(object):
_hash = None
_new = hashlib.md5
def __init__(self, node):
s = [((), node)]
def walk():
h = self._new()
update = h.update
while s:
top, node = s.pop()
try:
update('%s %s %s\n' % (top, len(node.data),
self._new(node.data).hexdigest()))
yield
except AttributeError:
update('%s %s\n' % (top, tuple(node.keys())))
yield
for k, v in reversed(node.items()):
s.append((top + (k,), v))
del self._walk
self._hash = h
self._walk = walk()
def __getattr__(self, attr):
return getattr(self._hash, attr)
def __call__(self, n=None):
if n is None:
return sum(1 for _ in self._walk)
next(islice(self._walk, n - 1, None))
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/stat_zodb.py 0000775 0000000 0000000 00000013444 13465024610 0025542 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
# -*- coding: utf-8 -*-
import math, random, sys
from cStringIO import StringIO
from ZODB.utils import p64, u64
from ZODB.BaseStorage import TransactionRecord
from ZODB.FileStorage import FileStorage
# Stats of a 43.5 GB production Data.fs
# µ σ
# size of object 6.04237779991 1.55811487853
# # objects / transaction 1.04108991045 0.906703192546
# size of transaction 7.98615420517 1.6624220402
#
# % of new object / transaction: 0.810080409164
# # of transactions: 1541194
# compression ratio: 28.5 % (gzip -6)
PROD1 = lambda random=random: DummyZODB(6.04237779991, 1.55811487853,
1.04108991045, 0.906703192546,
0.810080409164, random)
def _DummyData(random, size):
# returns data that gzip at about 28.5 %
return bytearray(int(random.gauss(0, .8)) % 256 for x in xrange(size))
def DummyData(random=random):
# make sure sample is bigger than dictionary of compressor
return StringIO(_DummyData(random, 100000))
class DummyZODB(object):
"""
Object size and count of generated transaction follows a log normal
distribution, where *_mu and *_sigma are their parameters.
"""
def __init__(self, obj_size_mu, obj_size_sigma,
obj_count_mu, obj_count_sigma,
new_ratio, random=random):
self.obj_size_mu = obj_size_mu
self.obj_size_sigma = obj_size_sigma
self.obj_count_mu = obj_count_mu
self.obj_count_sigma = obj_count_sigma
self.random = random
self.new_ratio = new_ratio
self.next_oid = 0
self.err_count = 0
self.tid = u64('TID\0\0\0\0\0')
def __call__(self):
variate = self.random.lognormvariate
oid_set = set()
for i in xrange(int(round(variate(self.obj_count_mu,
self.obj_count_sigma))) or 1):
if len(oid_set) >= self.next_oid or \
self.random.random() < self.new_ratio:
oid = self.next_oid
self.next_oid = oid + 1
else:
while True:
oid = self.random.randrange(self.next_oid)
if oid not in oid_set:
break
oid_set.add(oid)
yield p64(oid), int(round(variate(self.obj_size_mu,
self.obj_size_sigma))) or 1
def as_storage(self, stop, dummy_data_file=None):
if dummy_data_file is None:
dummy_data_file = DummyData(self.random)
if isinstance(stop, int):
stop = (lambda x: lambda y: x <= y)(stop)
class dummy_change(object):
data_txn = None
version = ''
def __init__(self, tid, oid, size):
self.tid = tid
self.oid = oid
data = ''
while size:
d = dummy_data_file.read(size)
size -= len(d)
data += d
if size:
dummy_data_file.seek(0)
self.data = data
class dummy_transaction(TransactionRecord):
def __init__(transaction, *args):
TransactionRecord.__init__(transaction, *args)
transaction_size = 0
transaction.record_list = []
add_record = transaction.record_list.append
for x in self():
oid, size = x
transaction_size += size
add_record(dummy_change(transaction.tid, oid, size))
transaction.size = transaction_size
def __iter__(transaction):
return iter(transaction.record_list)
class dummy_storage(object):
size = 0
def iterator(storage, *args):
args = ' ', '', '', {}
i = 0
variate = self.random.lognormvariate
while not stop(i):
self.tid += max(1, int(variate(10, 3)))
t = dummy_transaction(p64(self.tid), *args)
storage.size += t.size
yield t
i += 1
def getSize(self):
return self.size
return dummy_storage()
def lognorm_stat(X):
Y = map(math.log, X)
n = len(Y)
mu = sum(Y) / n
s2 = sum(d*d for d in (y - mu for y in Y)) / n
return mu, math.sqrt(s2)
def stat(*storages):
obj_size_list = []
obj_count_list = []
tr_size_list = []
oid_set = set()
for storage in storages:
for transaction in storage.iterator():
obj_count = tr_size = 0
for r in transaction:
if r.data:
obj_count += 1
oid = r.oid
if oid not in oid_set:
oid_set.add(oid)
size = len(r.data)
tr_size += size
obj_size_list.append(size)
obj_count_list.append(obj_count)
tr_size_list.append(tr_size)
new_ratio = float(len(oid_set)) / len(obj_size_list)
return (lognorm_stat(obj_size_list),
lognorm_stat(obj_count_list),
lognorm_stat(tr_size_list),
new_ratio, len(tr_size_list))
def main():
s = stat(*(FileStorage(x, read_only=True) for x in sys.argv[1:]))
print(u" %-15s σ\n"
"size of object %-15s %s\n"
"# objects / transaction %-15s %s\n"
"size of transaction %-15s %s\n"
"\n%% of new object / transaction: %s"
"\n# of transactions: %s"
% ((u"µ",) + s[0] + s[1] + s[2] + s[3:]))
if __name__ == "__main__":
sys.exit(main())
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/storage/ 0000775 0000000 0000000 00000000000 13465024610 0024632 5 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/storage/__init__.py 0000664 0000000 0000000 00000000000 13465024610 0026731 0 ustar 00root root 0000000 0000000 neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/storage/testClientHandler.py 0000664 0000000 0000000 00000007602 13465024610 0030625 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock, ReturnValues
from .. import NeoUnitTestBase
from neo.storage.app import Application
from neo.storage.handlers.client import ClientOperationHandler
from neo.lib.util import p64
from neo.lib.protocol import INVALID_TID, Packets
class StorageClientHandlerTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
self.prepareDatabase(number=1)
# create an application object
config = self.getStorageConfiguration(master_number=1)
self.app = Application(config)
self.app.tm = Mock({'__contains__': True})
# handler
self.operation = ClientOperationHandler(self.app)
# set pmn
self.master_uuid = self.getMasterUUID()
pmn = self.app.nm.getMasterList()[0]
pmn.setUUID(self.master_uuid)
self.app.primary_master_node = pmn
def _tearDown(self, success):
self.app.close()
del self.app
super(StorageClientHandlerTests, self)._tearDown(success)
def _getConnection(self, uuid=None):
return self.getFakeConnection(uuid=uuid, address=('127.0.0.1', 1000))
def test_18_askTransactionInformation1(self):
# transaction does not exists
conn = self._getConnection()
self.app.dm = Mock({'getNumPartitions': 1})
self.operation.askTransactionInformation(conn, INVALID_TID)
self.checkErrorPacket(conn)
def test_25_askTIDs1(self):
# invalid offsets => error
app = self.app
app.pt = Mock()
app.dm = Mock()
conn = self._getConnection()
self.checkProtocolErrorRaised(self.operation.askTIDs, conn, 1, 1, None)
self.assertEqual(len(app.pt.mockGetNamedCalls('getCellList')), 0)
self.assertEqual(len(app.dm.mockGetNamedCalls('getTIDList')), 0)
def test_25_askTIDs2(self):
# well case => answer
conn = self._getConnection()
self.app.pt = Mock({'getPartitions': 1})
self.app.dm = Mock({'getTIDList': (INVALID_TID, )})
self.operation.askTIDs(conn, 1, 2, 1)
calls = self.app.dm.mockGetNamedCalls('getTIDList')
self.assertEqual(len(calls), 1)
calls[0].checkArgs(1, 1, [1, ])
self.checkAnswerPacket(conn, Packets.AnswerTIDs)
def test_26_askObjectHistory1(self):
# invalid offsets => error
app = self.app
app.dm = Mock()
conn = self._getConnection()
self.checkProtocolErrorRaised(self.operation.askObjectHistory, conn,
1, 1, None)
self.assertEqual(len(app.dm.mockGetNamedCalls('getObjectHistory')), 0)
def test_askObjectUndoSerial(self):
conn = self._getConnection(uuid=self.getClientUUID())
tid = self.getNextTID()
ltid = self.getNextTID()
undone_tid = self.getNextTID()
# Keep 2 entries here, so we check findUndoTID is called only once.
oid_list = map(p64, (1, 2))
self.app.tm = Mock({
'getObjectFromTransaction': None,
})
self.app.dm = Mock({
'findUndoTID': ReturnValues((None, None, False), )
})
self.operation.askObjectUndoSerial(conn, tid, ltid, undone_tid, oid_list)
self.checkErrorPacket(conn)
if __name__ == "__main__":
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/storage/testMasterHandler.py 0000664 0000000 0000000 00000007024 13465024610 0030640 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from ..mock import Mock
from .. import NeoUnitTestBase
from neo.storage.app import Application
from neo.storage.handlers.master import MasterOperationHandler
from neo.lib.pt import PartitionTable
from neo.lib.protocol import CellStates, ProtocolError
class StorageMasterHandlerTests(NeoUnitTestBase):
def setUp(self):
NeoUnitTestBase.setUp(self)
self.prepareDatabase(number=1)
# create an application object
config = self.getStorageConfiguration(master_number=1)
self.app = Application(config)
# handler
self.operation = MasterOperationHandler(self.app)
# set pmn
self.master_uuid = self.getMasterUUID()
pmn = self.app.nm.getMasterList()[0]
pmn.setUUID(self.master_uuid)
self.app.primary_master_node = pmn
self.master_port = 10010
def _tearDown(self, success):
self.app.close()
del self.app
super(StorageMasterHandlerTests, self)._tearDown(success)
def getMasterConnection(self):
address = ("127.0.0.1", self.master_port)
return self.getFakeConnection(uuid=self.master_uuid, address=address)
def test_14_notifyPartitionChanges1(self):
# old partition change -> do nothing
app = self.app
conn = self.getMasterConnection()
app.replicator = Mock({})
self.app.pt = Mock({'getID': 1})
count = len(self.app.nm.getList())
self.assertRaises(ProtocolError, self.operation.notifyPartitionChanges,
conn, 0, 0, ())
self.assertEqual(self.app.pt.getID(), 1)
self.assertEqual(len(self.app.nm.getList()), count)
calls = self.app.replicator.mockGetNamedCalls('removePartition')
self.assertEqual(len(calls), 0)
calls = self.app.replicator.mockGetNamedCalls('addPartition')
self.assertEqual(len(calls), 0)
def test_14_notifyPartitionChanges2(self):
# cases :
uuid1, uuid2, uuid3 = [self.getStorageUUID() for i in range(3)]
cells = (
(0, uuid1, CellStates.UP_TO_DATE),
(1, uuid2, CellStates.DISCARDED),
(2, uuid3, CellStates.OUT_OF_DATE),
)
# context
conn = self.getMasterConnection()
app = self.app
# register nodes
app.nm.createStorage(uuid=uuid1)
app.nm.createStorage(uuid=uuid2)
app.nm.createStorage(uuid=uuid3)
app.pt = PartitionTable(3, 1)
app.pt._id = 1
ptid = 2
app.dm = Mock({ })
app.replicator = Mock({})
self.operation.notifyPartitionChanges(conn, ptid, 1, cells)
# ptid set
self.assertEqual(app.pt.getID(), ptid)
# dm call
calls = self.app.dm.mockGetNamedCalls('changePartitionTable')
self.assertEqual(len(calls), 1)
calls[0].checkArgs(ptid, 1, cells)
if __name__ == "__main__":
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/storage/testStorageDBTests.py 0000664 0000000 0000000 00000044571 13465024610 0030754 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from binascii import a2b_hex
from contextlib import contextmanager
import unittest
from neo.lib.util import add64, p64, u64
from neo.lib.protocol import CellStates, ZERO_HASH, ZERO_OID, ZERO_TID, MAX_TID
from .. import NeoUnitTestBase
class StorageDBTests(NeoUnitTestBase):
_last_ttid = ZERO_TID
def setUp(self):
NeoUnitTestBase.setUp(self)
@property
def db(self):
try:
return self._db
except AttributeError:
self.setNumPartitions(1)
return self._db
def _tearDown(self, success):
try:
self.__dict__.pop('_db', None).close()
except AttributeError:
pass
NeoUnitTestBase._tearDown(self, success)
def getDB(self, reset=0):
raise NotImplementedError
def setNumPartitions(self, num_partitions, reset=0):
assert not hasattr(self, '_db')
self._db = db = self.getDB(reset)
uuid = self.getStorageUUID()
db.setUUID(uuid)
self.assertEqual(uuid, db.getUUID())
db.changePartitionTable(1, 0,
[(i, uuid, CellStates.UP_TO_DATE) for i in xrange(num_partitions)],
reset=True)
self.assertEqual(num_partitions, 1 + db._getMaxPartition())
db.commit()
def checkConfigEntry(self, get_call, set_call, value):
# generic test for all configuration entries accessors
self.assertEqual(get_call(), None)
set_call(value)
self.assertEqual(get_call(), value)
set_call(value * 2)
self.assertEqual(get_call(), value * 2)
@contextmanager
def commitTransaction(self, tid, objs, txn, commit=True):
ttid = txn[-1]
self.db.storeTransaction(ttid, objs, txn)
self.db.lockTransaction(tid, ttid)
yield
if commit:
self.db.unlockTransaction(tid, ttid, True, objs)
self.db.commit()
elif commit is not None:
self.db.abortTransaction(ttid)
def test_UUID(self):
db = self.getDB()
self.checkConfigEntry(db.getUUID, db.setUUID, 123)
def test_Name(self):
db = self.getDB()
self.checkConfigEntry(db.getName, db.setName, 'TEST_NAME')
def getOIDs(self, count):
return map(p64, xrange(count))
def getTIDs(self, count):
tid_list = [self.getNextTID()]
while len(tid_list) != count:
tid_list.append(self.getNextTID(tid_list[-1]))
return tid_list
def getTransaction(self, oid_list):
self._last_ttid = ttid = add64(self._last_ttid, 1)
transaction = oid_list, 'user', 'desc', 'ext', False, ttid
H = "0" * 20
object_list = [(oid, self.db.holdData(H, oid, '', 1), None)
for oid in oid_list]
return (transaction, object_list)
def checkSet(self, list1, list2):
self.assertEqual(set(list1), set(list2))
def _test_lockDatabase_open(self):
raise NotImplementedError
def test_lockDatabase(self):
db = self._test_lockDatabase_open()
self.assertRaises(SystemExit, self._test_lockDatabase_open)
db.close()
self._test_lockDatabase_open().close()
def test_getUnfinishedTIDDict(self):
tid1, tid2, tid3, tid4 = self.getTIDs(4)
oid1, oid2 = self.getOIDs(2)
txn, objs = self.getTransaction([oid1, oid2])
# one unfinished txn
with self.commitTransaction(tid2, objs, txn):
expected = {txn[-1]: tid2}
self.assertEqual(self.db.getUnfinishedTIDDict(), expected)
# no changes
self.db.storeTransaction(tid3, objs, None, False)
self.assertEqual(self.db.getUnfinishedTIDDict(), expected)
# a second txn known by objs only
expected[tid4] = None
self.db.storeTransaction(tid4, objs, None)
self.assertEqual(self.db.getUnfinishedTIDDict(), expected)
self.db.abortTransaction(tid4)
# nothing pending
self.assertEqual(self.db.getUnfinishedTIDDict(), {})
def test_getObject(self):
oid1, = self.getOIDs(1)
tid1, tid2 = self.getTIDs(2)
FOUND_BUT_NOT_VISIBLE = False
OBJECT_T1_NO_NEXT = (tid1, None, 1, "0"*20, '', None)
OBJECT_T1_NEXT = (tid1, tid2, 1, "0"*20, '', None)
OBJECT_T2 = (tid2, None, 1, "0"*20, '', None)
txn1, objs1 = self.getTransaction([oid1])
txn2, objs2 = self.getTransaction([oid1])
# non-present
self.assertEqual(self.db.getObject(oid1), None)
self.assertEqual(self.db.getObject(oid1, tid1), None)
self.assertEqual(self.db.getObject(oid1, before_tid=tid1), None)
# one non-committed version
with self.commitTransaction(tid1, objs1, txn1):
self.assertEqual(self.db.getObject(oid1), None)
self.assertEqual(self.db.getObject(oid1, tid1), None)
self.assertEqual(self.db.getObject(oid1, before_tid=tid1), None)
# one committed version
self.assertEqual(self.db.getObject(oid1), OBJECT_T1_NO_NEXT)
self.assertEqual(self.db.getObject(oid1, tid1), OBJECT_T1_NO_NEXT)
self.assertEqual(self.db.getObject(oid1, before_tid=tid1),
FOUND_BUT_NOT_VISIBLE)
# two version available, one non-committed
with self.commitTransaction(tid2, objs2, txn2):
self.assertEqual(self.db.getObject(oid1), OBJECT_T1_NO_NEXT)
self.assertEqual(self.db.getObject(oid1, tid1), OBJECT_T1_NO_NEXT)
self.assertEqual(self.db.getObject(oid1, before_tid=tid1),
FOUND_BUT_NOT_VISIBLE)
self.assertEqual(self.db.getObject(oid1, tid2),
FOUND_BUT_NOT_VISIBLE)
self.assertEqual(self.db.getObject(oid1, before_tid=tid2),
OBJECT_T1_NO_NEXT)
# two committed versions
self.assertEqual(self.db.getObject(oid1), OBJECT_T2)
self.assertEqual(self.db.getObject(oid1, tid1), OBJECT_T1_NEXT)
self.assertEqual(self.db.getObject(oid1, before_tid=tid1),
FOUND_BUT_NOT_VISIBLE)
self.assertEqual(self.db.getObject(oid1, tid2), OBJECT_T2)
self.assertEqual(self.db.getObject(oid1, before_tid=tid2),
OBJECT_T1_NEXT)
def test_commitTransaction(self):
oid1, oid2 = self.getOIDs(2)
tid1, tid2 = self.getTIDs(2)
txn1, objs1 = self.getTransaction([oid1])
txn2, objs2 = self.getTransaction([oid2])
# nothing in database
self.assertEqual(self.db.getLastIDs(), (None, None))
self.assertEqual(self.db.getUnfinishedTIDDict(), {})
self.assertEqual(self.db.getObject(oid1), None)
self.assertEqual(self.db.getObject(oid2), None)
self.assertEqual(self.db.getTransaction(tid1, True), None)
self.assertEqual(self.db.getTransaction(tid2, True), None)
self.assertEqual(self.db.getTransaction(tid1, False), None)
self.assertEqual(self.db.getTransaction(tid2, False), None)
with self.commitTransaction(tid1, objs1, txn1), \
self.commitTransaction(tid2, objs2, txn2):
self.assertEqual(self.db.getTransaction(tid1, True),
([oid1], 'user', 'desc', 'ext', False, p64(1)))
self.assertEqual(self.db.getTransaction(tid2, True),
([oid2], 'user', 'desc', 'ext', False, p64(2)))
self.assertEqual(self.db.getTransaction(tid1, False), None)
self.assertEqual(self.db.getTransaction(tid2, False), None)
result = self.db.getTransaction(tid1, True)
self.assertEqual(result, ([oid1], 'user', 'desc', 'ext', False, p64(1)))
result = self.db.getTransaction(tid2, True)
self.assertEqual(result, ([oid2], 'user', 'desc', 'ext', False, p64(2)))
result = self.db.getTransaction(tid1, False)
self.assertEqual(result, ([oid1], 'user', 'desc', 'ext', False, p64(1)))
result = self.db.getTransaction(tid2, False)
self.assertEqual(result, ([oid2], 'user', 'desc', 'ext', False, p64(2)))
def test_deleteTransaction(self):
txn, objs = self.getTransaction([])
tid = txn[-1]
self.db.storeTransaction(tid, objs, txn, False)
self.assertEqual(self.db.getTransaction(tid), txn)
self.db.deleteTransaction(tid)
self.assertEqual(self.db.getTransaction(tid), None)
def test_deleteObject(self):
oid1, oid2 = self.getOIDs(2)
tid1, tid2 = self.getTIDs(2)
txn1, objs1 = self.getTransaction([oid1, oid2])
txn2, objs2 = self.getTransaction([oid1, oid2])
tid1 = txn1[-1]
tid2 = txn2[-1]
self.db.storeTransaction(tid1, objs1, txn1, False)
self.db.storeTransaction(tid2, objs2, txn2, False)
self.assertEqual(self.db.getObject(oid1, tid=tid1),
(tid1, tid2, 1, "0" * 20, '', None))
self.db.deleteObject(oid1)
self.assertIs(self.db.getObject(oid1, tid=tid1), None)
self.assertIs(self.db.getObject(oid1, tid=tid2), None)
self.db.deleteObject(oid2, serial=tid1)
self.assertIs(self.db.getObject(oid2, tid=tid1), False)
self.assertEqual(self.db.getObject(oid2, tid=tid2),
(tid2, None, 1, "0" * 20, '', None))
def test_deleteRange(self):
np = 4
self.setNumPartitions(np)
t1, t2, t3 = map(p64, (1, 2, 3))
oid_list = self.getOIDs(np * 2)
for tid in t1, t2, t3:
txn, objs = self.getTransaction(oid_list)
self.db.storeTransaction(tid, objs, txn, False)
def check(offset, tid_list, *tids):
self.assertEqual(self.db.getReplicationTIDList(ZERO_TID,
MAX_TID, len(tid_list) + 1, offset), tid_list)
expected = [(t, oid_list[offset+i]) for t in tids for i in (0, np)]
self.assertEqual(self.db.getReplicationObjectList(ZERO_TID,
MAX_TID, len(expected) + 1, offset, ZERO_OID), expected)
def deleteRange(partition, min_tid=None, max_tid=None):
self.db._deleteRange(partition,
None if min_tid is None else u64(min_tid),
None if max_tid is None else u64(max_tid))
deleteRange(0, MAX_TID)
deleteRange(0, max_tid=ZERO_TID)
check(0, [], t1, t2, t3)
deleteRange(0); check(0, [])
deleteRange(1, t2); check(1, [t1], t1, t2)
deleteRange(2, max_tid=t2); check(2, [], t3)
deleteRange(3, t1, t2); check(3, [t3], t1, t3)
def test_getTransaction(self):
oid1, oid2 = self.getOIDs(2)
tid1, tid2 = self.getTIDs(2)
txn1, objs1 = self.getTransaction([oid1])
txn2, objs2 = self.getTransaction([oid2])
# get from temporary table or not
with self.commitTransaction(tid1, objs1, txn1), \
self.commitTransaction(tid2, objs2, txn2, None):
pass
result = self.db.getTransaction(tid1, True)
self.assertEqual(result, ([oid1], 'user', 'desc', 'ext', False, p64(1)))
result = self.db.getTransaction(tid2, True)
self.assertEqual(result, ([oid2], 'user', 'desc', 'ext', False, p64(2)))
# get from non-temporary only
result = self.db.getTransaction(tid1, False)
self.assertEqual(result, ([oid1], 'user', 'desc', 'ext', False, p64(1)))
self.assertEqual(self.db.getTransaction(tid2, False), None)
def test_getObjectHistory(self):
oid = p64(1)
tid1, tid2, tid3 = self.getTIDs(3)
txn1, objs1 = self.getTransaction([oid])
txn2, objs2 = self.getTransaction([oid])
txn3, objs3 = self.getTransaction([oid])
# one revision
self.db.storeTransaction(tid1, objs1, txn1, False)
result = self.db.getObjectHistory(oid, 0, 3)
self.assertEqual(result, [(tid1, 0)])
result = self.db.getObjectHistory(oid, 1, 1)
self.assertEqual(result, None)
# two revisions
self.db.storeTransaction(tid2, objs2, txn2, False)
result = self.db.getObjectHistory(oid, 0, 3)
self.assertEqual(result, [(tid2, 0), (tid1, 0)])
result = self.db.getObjectHistory(oid, 1, 3)
self.assertEqual(result, [(tid1, 0)])
result = self.db.getObjectHistory(oid, 2, 3)
self.assertEqual(result, None)
def _storeTransactions(self, count):
# use OID generator to know result of tid % N
tid_list = self.getOIDs(count)
oid = p64(1)
for tid in tid_list:
txn, objs = self.getTransaction([oid])
self.db.storeTransaction(tid, objs, txn, False)
return tid_list
def test_getTIDList(self):
self.setNumPartitions(2, True)
tid1, tid2, tid3, tid4 = self._storeTransactions(4)
# get tids
# - all partitions
result = self.db.getTIDList(0, 4, [0, 1])
self.checkSet(result, [tid1, tid2, tid3, tid4])
# - one partition
result = self.db.getTIDList(0, 4, [0])
self.checkSet(result, [tid1, tid3])
result = self.db.getTIDList(0, 4, [1])
self.checkSet(result, [tid2, tid4])
# get a subset of tids
result = self.db.getTIDList(0, 1, [0])
self.checkSet(result, [tid3]) # desc order
result = self.db.getTIDList(1, 1, [1])
self.checkSet(result, [tid2])
result = self.db.getTIDList(2, 2, [0])
self.checkSet(result, [])
def test_getReplicationTIDList(self):
self.setNumPartitions(2, True)
tid1, tid2, tid3, tid4 = self._storeTransactions(4)
# - one partition
result = self.db.getReplicationTIDList(ZERO_TID, MAX_TID, 10, 0)
self.checkSet(result, [tid1, tid3])
# - another partition
result = self.db.getReplicationTIDList(ZERO_TID, MAX_TID, 10, 1)
self.checkSet(result, [tid2, tid4])
# - min_tid is inclusive
result = self.db.getReplicationTIDList(tid3, MAX_TID, 10, 0)
self.checkSet(result, [tid3])
# - max tid is inclusive
result = self.db.getReplicationTIDList(ZERO_TID, tid2, 10, 0)
self.checkSet(result, [tid1])
# - limit
result = self.db.getReplicationTIDList(ZERO_TID, MAX_TID, 1, 0)
self.checkSet(result, [tid1])
def test_checkRange(self):
def check(trans, obj, *args):
self.assertEqual(trans, self.db.checkTIDRange(*args))
self.assertEqual(obj, self.db.checkSerialRange(*(args+(ZERO_OID,))))
self.setNumPartitions(2, True)
tid1, tid2, tid3, tid4 = self._storeTransactions(4)
z = 0, ZERO_HASH, ZERO_TID, ZERO_HASH, ZERO_OID
# - one partition
check((2, a2b_hex('84320eb8dbbe583f67055c15155ab6794f11654d'), tid3),
z,
0, 10, ZERO_TID, MAX_TID)
# - another partition
check((2, a2b_hex('1f02f98cf775a9e0ce9252ff5972dce728c4ddb0'), tid4),
(4, a2b_hex('e5b47bddeae2096220298df686737d939a27d736'), tid4,
a2b_hex('1e9093698424b5370e19acd2d5fc20dcd56a32cd'), p64(1)),
1, 10, ZERO_TID, MAX_TID)
self.assertEqual(
(3, a2b_hex('b85e2d4914e22b5ad3b82b312b3dc405dc17dcb8'), tid4,
a2b_hex('1b6d73ecdc064595fe915a5c26da06b195caccaa'), p64(1)),
self.db.checkSerialRange(1, 10, ZERO_TID, MAX_TID, p64(2)))
# - min_tid is inclusive
check((1, a2b_hex('da4b9237bacccdf19c0760cab7aec4a8359010b0'), tid3),
z,
0, 10, tid3, MAX_TID)
# - max tid is inclusive
x = 1, a2b_hex('b6589fc6ab0dc82cf12099d1c2d40ab994e8410c'), tid1
check(x, z, 0, 10, ZERO_TID, tid2)
# - limit
y = 1, a2b_hex('356a192b7913b04c54574d18c28d46e6395428ab'), tid2
check(y, x + y[1:], 1, 1, ZERO_TID, MAX_TID)
def test_findUndoTID(self):
self.setNumPartitions(4, True)
db = self.db
tid1 = self.getNextTID()
tid2 = self.getNextTID()
tid3 = self.getNextTID()
tid4 = self.getNextTID()
tid5 = self.getNextTID()
oid1 = p64(1)
foo = db.holdData("3" * 20, oid1, 'foo', 0)
bar = db.holdData("4" * 20, oid1, 'bar', 0)
db.releaseData((foo, bar))
db.storeTransaction(
tid1, (
(oid1, foo, None),
), None, temporary=False)
# Undoing oid1 tid1, OK: tid1 is latest
# Result: current tid is tid1, data_tid is None (undoing object
# creation)
self.assertEqual(
db.findUndoTID(oid1, tid5, tid4, tid1, None),
(tid1, None, True))
# Store a new transaction
db.storeTransaction(
tid2, (
(oid1, bar, None),
), None, temporary=False)
# Undoing oid1 tid2, OK: tid2 is latest
# Result: current tid is tid2, data_tid is tid1
self.assertEqual(
db.findUndoTID(oid1, tid5, tid4, tid2, None),
(tid2, tid1, True))
# Undoing oid1 tid1, Error: tid2 is latest
# Result: current tid is tid2, data_tid is -1
self.assertEqual(
db.findUndoTID(oid1, tid5, tid4, tid1, None),
(tid2, None, False))
# Undoing oid1 tid1 with tid2 being undone in same transaction,
# OK: tid1 is latest
# Result: current tid is tid1, data_tid is None (undoing object
# creation)
# Explanation of transaction_object: oid1, no data but a data serial
# to tid1
self.assertEqual(
db.findUndoTID(oid1, tid5, tid4, tid1,
(u64(oid1), None, tid1)),
(tid1, None, True))
# Store a new transaction
db.storeTransaction(
tid3, (
(oid1, None, tid1),
), None, temporary=False)
# Undoing oid1 tid1, OK: tid3 is latest with tid1 data
# Result: current tid is tid2, data_tid is None (undoing object
# creation)
self.assertEqual(
db.findUndoTID(oid1, tid5, tid4, tid1, None),
(tid3, None, True))
if __name__ == "__main__":
unittest.main()
neoppod-bd5ba87ae6e0b8169b0da15fed0e32e56e7964af-neo/neo/tests/storage/testStorageMySQL.py 0000664 0000000 0000000 00000012110 13465024610 0030371 0 ustar 00root root 0000000 0000000 #
# Copyright (C) 2009-2019 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import unittest
from contextlib import contextmanager
from MySQLdb import NotSupportedError, OperationalError, ProgrammingError
from MySQLdb.constants.CR import SERVER_GONE_ERROR
from MySQLdb.constants.ER import UNKNOWN_STORAGE_ENGINE
from ..mock import Mock
from neo.lib.protocol import ZERO_OID
from neo.lib.util import p64
from .. import DB_PREFIX, DB_USER, Patch, setupMySQLdb
from .testStorageDBTests import StorageDBTests
from neo.storage.database import DatabaseFailure
from neo.storage.database.mysqldb import MySQLDatabaseManager
class ServerGone(object):
@contextmanager
def __new__(cls, db):
self = object.__new__(cls)
with Patch(db, conn=self) as self._p:
yield self._p
def query(self, *args):
self._p.revert()
raise OperationalError(SERVER_GONE_ERROR, 'this is a test')
class StorageMySQLdbTests(StorageDBTests):
engine = None
def _test_lockDatabase_open(self):
self.prepareDatabase(1)
database = self.db_template(0)
return MySQLDatabaseManager(database, self.engine)
def getDB(self, reset=0):
db = self._test_lockDatabase_open()
self.assertEqual(db.db, DB_PREFIX + '0')
self.assertEqual(db.user, DB_USER)
try:
db.setup(reset, True)
except NotSupportedError as m:
code, m = m.args
if code != UNKNOWN_STORAGE_ENGINE:
raise
raise unittest.SkipTest(m)
return db
def test_query1(self):
# fake result object
from array import array
result_object = Mock({
"num_rows": 1,
"fetch_row": ((1, 2, array('b', (1, 2, ))), ),
})
# expected formatted result
expected_result = (
(1, 2, '\x01\x02', ),
)
self.db.conn = Mock({ 'store_result': result_object })
result = self.db.query('SELECT ')
self.assertEqual(result, expected_result)
calls = self.db.conn.mockGetNamedCalls('query')
self.assertEqual(len(calls), 1)
calls[0].checkArgs('SELECT ')
def test_query2(self):
with ServerGone(self.db) as p:
self.assertRaises(ProgrammingError, self.db.query, 'QUERY')
self.assertFalse(p.applied)
def test_query3(self):
# OperationalError > raise DatabaseFailure exception
class FakeConn(object):
def close(self):
pass
def query(*args):
raise OperationalError(-1, 'this is a test')
self.db.conn = FakeConn()
self.assertRaises(DatabaseFailure, self.db.query, 'QUERY')
def test_escape(self):
self.assertEqual(self.db.escape('a"b'), 'a\\"b')
self.assertEqual(self.db.escape("a'b"), "a\\'b")
def test_max_allowed_packet(self):
EXTRA = 2
# Check EXTRA
x = "SELECT '%s'" % ('x' * (self.db._max_allowed_packet - 11))
assert len(x) + EXTRA == self.db._max_allowed_packet
self.assertRaises(DatabaseFailure, self.db.query, x + ' ')
self.db.query(x)
# Reconnection cleared the cache of the config table,
# so fill it again with required values before we patch query().
self.db._getPartition
# Check MySQLDatabaseManager._max_allowed_packet
query_list = []
self.db.query = lambda query: query_list.append(EXTRA + len(query))
self.assertEqual(2, max(len(self.db.escape(chr(x)))
for x in xrange(256)))
self.assertEqual(2, len(self.db.escape('\0')))
self.db.storeData('\0' * 20, ZERO_OID, '\0' * (2**24-1), 0)
size, = query_list
max_allowed = self.db.__class__._max_allowed_packet
self.assertTrue(max_allowed - 1024 < size <= max_allowed, size)
# Check storeTransaction
for count, max_allowed_packet in (7, 64), (6, 65), (1, 215):
self.db._max_allowed_packet = max_allowed_packet
del query_list[:]
self.db.storeTransaction(p64(0),
((p64(1< |