Commit edde1fe3 authored by Julien Muchembled's avatar Julien Muchembled

Rewrite storage cache of client

- Stop using a list of (in)validated tid (hence removal of RevisionIndex),
  because it can't work in all cases and would even cause memory leaks.
  For example, this bug could lead to ConflictError with a single client.
  Fixit it also requires that database backends always return the next serial.
- Several performance improvements. The most important one is when the latest
  version of an object is cached: it inherits the access counter of the
  previous one (for the same oid), which gets in turn its counter reset.
- Do not waste CPU evaluating the real size taken by an entry in memory.
  Just use 'len' on the value (which is always a pickle data, i.e. a string).

git-svn-id: https://svn.erp5.org/repos/neo/trunk@2711 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 240c94b3
...@@ -42,12 +42,11 @@ from neo.client.handlers import storage, master ...@@ -42,12 +42,11 @@ from neo.client.handlers import storage, master
from neo.lib.dispatcher import Dispatcher, ForgottenPacket from neo.lib.dispatcher import Dispatcher, ForgottenPacket
from neo.client.poll import ThreadedPoll, psThreadedPoll from neo.client.poll import ThreadedPoll, psThreadedPoll
from neo.client.iterator import Iterator from neo.client.iterator import Iterator
from neo.client.mq import MQ from neo.client.cache import ClientCache
from neo.client.pool import ConnectionPool from neo.client.pool import ConnectionPool
from neo.lib.util import u64, parseMasterList from neo.lib.util import u64, parseMasterList
from neo.lib.profiling import profiler_decorator, PROFILING_ENABLED from neo.lib.profiling import profiler_decorator, PROFILING_ENABLED
from neo.lib.debug import register as registerLiveDebugger from neo.lib.debug import register as registerLiveDebugger
from neo.client.mq_index import RevisionIndex
from neo.client.container import ThreadContainer, TransactionContainer from neo.client.container import ThreadContainer, TransactionContainer
if PROFILING_ENABLED: if PROFILING_ENABLED:
...@@ -93,9 +92,7 @@ class Application(object): ...@@ -93,9 +92,7 @@ class Application(object):
# no self-assigned UUID, primary master will supply us one # no self-assigned UUID, primary master will supply us one
self.uuid = None self.uuid = None
self.mq_cache = MQ() self._cache = ClientCache()
self.cache_revision_index = RevisionIndex()
self.mq_cache.addIndex(self.cache_revision_index)
self.new_oid_list = [] self.new_oid_list = []
self.last_oid = '\0' * 8 self.last_oid = '\0' * 8
self.storage_event_handler = storage.StorageEventHandler(self) self.storage_event_handler = storage.StorageEventHandler(self)
...@@ -432,20 +429,17 @@ class Application(object): ...@@ -432,20 +429,17 @@ class Application(object):
self._load_lock_acquire() self._load_lock_acquire()
try: try:
try: result = self._loadFromCache(oid, serial, tid)
return self._loadFromCache(oid, serial, tid) if not result:
except KeyError: result = self._loadFromStorage(oid, serial, tid)
pass self._cache_lock_acquire()
data, start_serial, end_serial = self._loadFromStorage(oid, serial, try:
tid) self._cache.store(oid, *result)
self._cache_lock_acquire() finally:
try: self._cache_lock_release()
self.mq_cache[(oid, start_serial)] = data, end_serial if result[0] == '':
finally: raise NEOStorageCreationUndoneError(dump(oid))
self._cache_lock_release() return result
if data == '':
raise NEOStorageCreationUndoneError(dump(oid))
return data, start_serial, end_serial
finally: finally:
self._load_lock_release() self._load_lock_release()
...@@ -479,24 +473,17 @@ class Application(object): ...@@ -479,24 +473,17 @@ class Application(object):
return data, tid, next_tid return data, tid, next_tid
@profiler_decorator @profiler_decorator
def _loadFromCache(self, oid, at_tid, before_tid): def _loadFromCache(self, oid, at_tid=None, before_tid=None):
""" """
Load from local cache, raising KeyError if not found. Load from local cache, return None if not found.
""" """
self._cache_lock_acquire() self._cache_lock_acquire()
try: try:
if at_tid is not None: if at_tid:
tid = at_tid result = self._cache.load(oid, at_tid + '*')
elif before_tid is not None: assert not result or result[1] == at_tid
tid = self.cache_revision_index.getSerialBefore(oid, return result
before_tid) return self._cache.load(oid, before_tid)
else:
tid = self.cache_revision_index.getLatestSerial(oid)
if tid is None:
raise KeyError
# Raises KeyError on miss
data, next_tid = self.mq_cache[(oid, tid)]
return (data, tid, next_tid)
finally: finally:
self._cache_lock_release() self._cache_lock_release()
...@@ -808,14 +795,7 @@ class Application(object): ...@@ -808,14 +795,7 @@ class Application(object):
# Update cache # Update cache
self._cache_lock_acquire() self._cache_lock_acquire()
try: try:
mq_cache = self.mq_cache cache = self._cache
update = mq_cache.update
def updateNextSerial(value):
data, next_tid = value
assert next_tid is None, (dump(oid), dump(base_tid),
dump(next_tid))
return (data, tid)
get_baseTID = txn_context['object_base_serial_dict'].get
for oid, data in txn_context['data_dict'].iteritems(): for oid, data in txn_context['data_dict'].iteritems():
if data is None: if data is None:
# this is just a remain of # this is just a remain of
...@@ -823,16 +803,10 @@ class Application(object): ...@@ -823,16 +803,10 @@ class Application(object):
# was modified). # was modified).
continue continue
# Update ex-latest value in cache # Update ex-latest value in cache
base_tid = get_baseTID(oid) cache.invalidate(oid, tid)
try: if data:
update((oid, base_tid), updateNextSerial)
except KeyError:
pass
if data == '':
self.cache_revision_index.invalidate([oid], tid)
else:
# Store in cache with no next_tid # Store in cache with no next_tid
mq_cache[(oid, tid)] = (data, None) cache.store(oid, data, tid, None)
finally: finally:
self._cache_lock_release() self._cache_lock_release()
txn_container.delete(transaction) txn_container.delete(transaction)
...@@ -1105,7 +1079,7 @@ class Application(object): ...@@ -1105,7 +1079,7 @@ class Application(object):
# by a pack), so don't bother invalidating on other clients. # by a pack), so don't bother invalidating on other clients.
self._cache_lock_acquire() self._cache_lock_acquire()
try: try:
self.mq_cache.clear() self._cache.clear()
finally: finally:
self._cache_lock_release() self._cache_lock_release()
......
This diff is collapsed.
...@@ -121,11 +121,12 @@ class PrimaryNotificationsHandler(BaseHandler): ...@@ -121,11 +121,12 @@ class PrimaryNotificationsHandler(BaseHandler):
app = self.app app = self.app
app._cache_lock_acquire() app._cache_lock_acquire()
try: try:
# ZODB required a dict with oid as key, so create it invalidate = app._cache.invalidate
app.cache_revision_index.invalidate(oid_list, tid) for oid in oid_list:
invalidate(oid, tid)
db = app.getDB() db = app.getDB()
if db is not None: if db is not None:
db.invalidate(tid, dict.fromkeys(oid_list, tid)) db.invalidate(tid, oid_list)
finally: finally:
app._cache_lock_release() app._cache_lock_release()
......
#
# Copyright (C) 2010-2011 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from neo.client.mq import MQIndex
class RevisionIndex(MQIndex):
"""
This cache index allows accessing a specific revision of a cached object.
It requires cache key to be a 2-tuple, composed of oid and revision.
Note: it is expected that rather few revisions are held in cache, with few
lookups for old revisions, so they are held in a simple sorted list
Note2: all methods here must be called with cache lock acquired.
"""
def __init__(self):
# key: oid
# value: tid list, from highest to lowest
self._oid_dict = {}
# key: oid
# value: tid list, from lowest to highest
self._invalidated = {}
def clear(self):
self._oid_dict.clear()
self._invalidated.clear()
def remove(self, key):
oid_dict = self._oid_dict
oid, tid = key
tid_list = oid_dict[oid]
tid_list.remove(tid)
if not tid_list:
# No more serial known for this object, drop entirely
del oid_dict[oid]
self._invalidated.pop(oid, None)
def add(self, key):
oid_dict = self._oid_dict
oid, tid = key
try:
serial_list = oid_dict[oid]
except KeyError:
serial_list = oid_dict[oid] = []
else:
assert tid not in serial_list
if not(serial_list) or tid > serial_list[0]:
serial_list.insert(0, tid)
else:
serial_list.insert(0, tid)
serial_list.sort(reverse=True)
invalidated = self._invalidated
try:
tid_list = invalidated[oid]
except KeyError:
pass
else:
try:
tid_list.remove(tid)
except ValueError:
pass
else:
if not tid_list:
del invalidated[oid]
def invalidate(self, oid_list, tid):
"""
Mark object invalidated by given transaction.
Must be called with increasing TID values (which is standard for
ZODB).
"""
invalidated = self._invalidated
oid_dict = self._oid_dict
for oid in (x for x in oid_list if x in oid_dict):
try:
tid_list = invalidated[oid]
except KeyError:
tid_list = invalidated[oid] = []
assert not tid_list or tid > tid_list[-1], (dump(oid), dump(tid),
dump(tid_list[-1]))
tid_list.append(tid)
def getSerialBefore(self, oid, tid):
"""
Get the first tid in cache which value is lower that given tid.
"""
# WARNING: return-intensive to save on indentation
oid_list = self._oid_dict.get(oid)
if oid_list is None:
# Unknown oid
return None
for result in oid_list:
if result < tid:
# Candidate found
break
else:
# No candidate in cache.
return None
# Check if there is a chance that an intermediate revision would
# exist, while missing from cache.
try:
inv_tid_list = self._invalidated[oid]
except KeyError:
return result
# Remember: inv_tid_list is sorted in ascending order.
for inv_tid in inv_tid_list:
if tid < inv_tid:
# We don't care about invalidations past requested TID.
break
elif result < inv_tid < tid:
# An invalidation was received between candidate revision,
# and before requested TID: there is a matching revision we
# don't know of, so we cannot answer.
return None
return result
def getLatestSerial(self, oid):
"""
Get the latest tid for given object.
"""
result = self._oid_dict.get(oid)
if result is not None:
result = result[0]
try:
tid_list = self._invalidated[oid]
except KeyError:
pass
else:
if result < tid_list[-1]:
# An invalidation happened from a transaction later than
# our most recent view of this object, so we cannot answer.
result = None
return result
def getSerialList(self, oid):
"""
Get the list of all serials cache knows about for given object.
"""
return self._oid_dict.get(oid, [])[:]
...@@ -275,37 +275,22 @@ class BTreeDatabaseManager(DatabaseManager): ...@@ -275,37 +275,22 @@ class BTreeDatabaseManager(DatabaseManager):
def _getObject(self, oid, tid=None, before_tid=None): def _getObject(self, oid, tid=None, before_tid=None):
tserial = self._obj.get(oid) tserial = self._obj.get(oid)
if tserial is None: if tserial is not None:
result = None
else:
if tid is None: if tid is None:
if before_tid is None: try:
try: if before_tid is None:
tid = tserial.maxKey() tid = tserial.maxKey()
except ValueError: else:
tid = None tid = tserial.maxKey(before_tid - 1)
else: except ValueError:
before_tid -= 1 return
try: result = tserial.get(tid)
tid = tserial.maxKey(before_tid)
except ValueError:
tid = None
if tid is None:
result = None
else:
result = tserial.get(tid, None)
if result: if result:
compression, checksum, data, value_serial = result try:
if before_tid is None: next_serial = tserial.minKey(tid + 1)
except ValueError:
next_serial = None next_serial = None
else: return (tid, next_serial) + result
try:
next_serial = tserial.minKey(tid + 1)
except ValueError:
next_serial = None
result = (tid, next_serial, compression, checksum, data,
value_serial)
return result
def doSetPartitionTable(self, ptid, cell_list, reset): def doSetPartitionTable(self, ptid, cell_list, reset):
pt = self._pt pt = self._pt
......
...@@ -255,6 +255,8 @@ class DatabaseManager(object): ...@@ -255,6 +255,8 @@ class DatabaseManager(object):
else: else:
serial, next_serial, compression, checksum, data, data_serial = \ serial, next_serial, compression, checksum, data, data_serial = \
result result
assert before_tid is None or next_serial is None or \
before_tid <= next_serial
if data is None and resolve_data: if data is None and resolve_data:
try: try:
_, compression, checksum, data = self._getObjectData(oid, _, compression, checksum, data = self._getObjectData(oid,
......
...@@ -335,50 +335,30 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -335,50 +335,30 @@ class MySQLDatabaseManager(DatabaseManager):
def _getObject(self, oid, tid=None, before_tid=None): def _getObject(self, oid, tid=None, before_tid=None):
q = self.query q = self.query
partition = self._getPartition(oid) partition = self._getPartition(oid)
sql = """SELECT serial, compression, checksum, value, value_serial
FROM obj
WHERE partition = %d
AND oid = %d""" % (partition, oid)
if tid is not None: if tid is not None:
r = q("""SELECT serial, compression, checksum, value, value_serial sql += ' AND serial = %d' % tid
FROM obj
WHERE partition = %d AND oid = %d AND serial = %d""" \
% (partition, oid, tid))
try:
serial, compression, checksum, data, value_serial = r[0]
next_serial = None
except IndexError:
return None
elif before_tid is not None: elif before_tid is not None:
r = q("""SELECT serial, compression, checksum, value, value_serial sql += ' AND serial < %d ORDER BY serial DESC LIMIT 1' % before_tid
FROM obj
WHERE partition = %d
AND oid = %d AND serial < %d
ORDER BY serial DESC LIMIT 1""" \
% (partition, oid, before_tid))
try:
serial, compression, checksum, data, value_serial = r[0]
except IndexError:
return None
r = q("""SELECT serial FROM obj_short
WHERE partition = %d
AND oid = %d AND serial >= %d
ORDER BY serial LIMIT 1""" \
% (partition, oid, before_tid))
try:
next_serial = r[0][0]
except IndexError:
next_serial = None
else: else:
# XXX I want to express "HAVING serial = MAX(serial)", but # XXX I want to express "HAVING serial = MAX(serial)", but
# MySQL does not use an index for a HAVING clause! # MySQL does not use an index for a HAVING clause!
r = q("""SELECT serial, compression, checksum, value, value_serial sql += ' ORDER BY serial DESC LIMIT 1'
FROM obj r = q(sql)
WHERE partition = %d AND oid = %d try:
ORDER BY serial DESC LIMIT 1""" \ serial, compression, checksum, data, value_serial = r[0]
% (partition, oid)) except IndexError:
try: return None
serial, compression, checksum, data, value_serial = r[0] r = q("""SELECT serial FROM obj_short
next_serial = None WHERE partition = %d AND oid = %d AND serial > %d
except IndexError: ORDER BY serial LIMIT 1""" % (partition, oid, serial))
return None try:
next_serial = r[0][0]
except IndexError:
next_serial = None
return serial, next_serial, compression, checksum, data, value_serial return serial, next_serial, compression, checksum, data, value_serial
def doSetPartitionTable(self, ptid, cell_list, reset): def doSetPartitionTable(self, ptid, cell_list, reset):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment