Commit edde1fe3 authored by Julien Muchembled's avatar Julien Muchembled

Rewrite storage cache of client

- Stop using a list of (in)validated tid (hence removal of RevisionIndex),
  because it can't work in all cases and would even cause memory leaks.
  For example, this bug could lead to ConflictError with a single client.
  Fixit it also requires that database backends always return the next serial.
- Several performance improvements. The most important one is when the latest
  version of an object is cached: it inherits the access counter of the
  previous one (for the same oid), which gets in turn its counter reset.
- Do not waste CPU evaluating the real size taken by an entry in memory.
  Just use 'len' on the value (which is always a pickle data, i.e. a string).

git-svn-id: https://svn.erp5.org/repos/neo/trunk@2711 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 240c94b3
......@@ -42,12 +42,11 @@ from neo.client.handlers import storage, master
from neo.lib.dispatcher import Dispatcher, ForgottenPacket
from neo.client.poll import ThreadedPoll, psThreadedPoll
from neo.client.iterator import Iterator
from neo.client.mq import MQ
from neo.client.cache import ClientCache
from neo.client.pool import ConnectionPool
from neo.lib.util import u64, parseMasterList
from neo.lib.profiling import profiler_decorator, PROFILING_ENABLED
from neo.lib.debug import register as registerLiveDebugger
from neo.client.mq_index import RevisionIndex
from neo.client.container import ThreadContainer, TransactionContainer
if PROFILING_ENABLED:
......@@ -93,9 +92,7 @@ class Application(object):
# no self-assigned UUID, primary master will supply us one
self.uuid = None
self.mq_cache = MQ()
self.cache_revision_index = RevisionIndex()
self.mq_cache.addIndex(self.cache_revision_index)
self._cache = ClientCache()
self.new_oid_list = []
self.last_oid = '\0' * 8
self.storage_event_handler = storage.StorageEventHandler(self)
......@@ -432,20 +429,17 @@ class Application(object):
self._load_lock_acquire()
try:
try:
return self._loadFromCache(oid, serial, tid)
except KeyError:
pass
data, start_serial, end_serial = self._loadFromStorage(oid, serial,
tid)
result = self._loadFromCache(oid, serial, tid)
if not result:
result = self._loadFromStorage(oid, serial, tid)
self._cache_lock_acquire()
try:
self.mq_cache[(oid, start_serial)] = data, end_serial
self._cache.store(oid, *result)
finally:
self._cache_lock_release()
if data == '':
if result[0] == '':
raise NEOStorageCreationUndoneError(dump(oid))
return data, start_serial, end_serial
return result
finally:
self._load_lock_release()
......@@ -479,24 +473,17 @@ class Application(object):
return data, tid, next_tid
@profiler_decorator
def _loadFromCache(self, oid, at_tid, before_tid):
def _loadFromCache(self, oid, at_tid=None, before_tid=None):
"""
Load from local cache, raising KeyError if not found.
Load from local cache, return None if not found.
"""
self._cache_lock_acquire()
try:
if at_tid is not None:
tid = at_tid
elif before_tid is not None:
tid = self.cache_revision_index.getSerialBefore(oid,
before_tid)
else:
tid = self.cache_revision_index.getLatestSerial(oid)
if tid is None:
raise KeyError
# Raises KeyError on miss
data, next_tid = self.mq_cache[(oid, tid)]
return (data, tid, next_tid)
if at_tid:
result = self._cache.load(oid, at_tid + '*')
assert not result or result[1] == at_tid
return result
return self._cache.load(oid, before_tid)
finally:
self._cache_lock_release()
......@@ -808,14 +795,7 @@ class Application(object):
# Update cache
self._cache_lock_acquire()
try:
mq_cache = self.mq_cache
update = mq_cache.update
def updateNextSerial(value):
data, next_tid = value
assert next_tid is None, (dump(oid), dump(base_tid),
dump(next_tid))
return (data, tid)
get_baseTID = txn_context['object_base_serial_dict'].get
cache = self._cache
for oid, data in txn_context['data_dict'].iteritems():
if data is None:
# this is just a remain of
......@@ -823,16 +803,10 @@ class Application(object):
# was modified).
continue
# Update ex-latest value in cache
base_tid = get_baseTID(oid)
try:
update((oid, base_tid), updateNextSerial)
except KeyError:
pass
if data == '':
self.cache_revision_index.invalidate([oid], tid)
else:
cache.invalidate(oid, tid)
if data:
# Store in cache with no next_tid
mq_cache[(oid, tid)] = (data, None)
cache.store(oid, data, tid, None)
finally:
self._cache_lock_release()
txn_container.delete(transaction)
......@@ -1105,7 +1079,7 @@ class Application(object):
# by a pack), so don't bother invalidating on other clients.
self._cache_lock_acquire()
try:
self.mq_cache.clear()
self._cache.clear()
finally:
self._cache_lock_release()
......
This diff is collapsed.
......@@ -121,11 +121,12 @@ class PrimaryNotificationsHandler(BaseHandler):
app = self.app
app._cache_lock_acquire()
try:
# ZODB required a dict with oid as key, so create it
app.cache_revision_index.invalidate(oid_list, tid)
invalidate = app._cache.invalidate
for oid in oid_list:
invalidate(oid, tid)
db = app.getDB()
if db is not None:
db.invalidate(tid, dict.fromkeys(oid_list, tid))
db.invalidate(tid, oid_list)
finally:
app._cache_lock_release()
......
#
# Copyright (C) 2010-2011 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from neo.client.mq import MQIndex
class RevisionIndex(MQIndex):
"""
This cache index allows accessing a specific revision of a cached object.
It requires cache key to be a 2-tuple, composed of oid and revision.
Note: it is expected that rather few revisions are held in cache, with few
lookups for old revisions, so they are held in a simple sorted list
Note2: all methods here must be called with cache lock acquired.
"""
def __init__(self):
# key: oid
# value: tid list, from highest to lowest
self._oid_dict = {}
# key: oid
# value: tid list, from lowest to highest
self._invalidated = {}
def clear(self):
self._oid_dict.clear()
self._invalidated.clear()
def remove(self, key):
oid_dict = self._oid_dict
oid, tid = key
tid_list = oid_dict[oid]
tid_list.remove(tid)
if not tid_list:
# No more serial known for this object, drop entirely
del oid_dict[oid]
self._invalidated.pop(oid, None)
def add(self, key):
oid_dict = self._oid_dict
oid, tid = key
try:
serial_list = oid_dict[oid]
except KeyError:
serial_list = oid_dict[oid] = []
else:
assert tid not in serial_list
if not(serial_list) or tid > serial_list[0]:
serial_list.insert(0, tid)
else:
serial_list.insert(0, tid)
serial_list.sort(reverse=True)
invalidated = self._invalidated
try:
tid_list = invalidated[oid]
except KeyError:
pass
else:
try:
tid_list.remove(tid)
except ValueError:
pass
else:
if not tid_list:
del invalidated[oid]
def invalidate(self, oid_list, tid):
"""
Mark object invalidated by given transaction.
Must be called with increasing TID values (which is standard for
ZODB).
"""
invalidated = self._invalidated
oid_dict = self._oid_dict
for oid in (x for x in oid_list if x in oid_dict):
try:
tid_list = invalidated[oid]
except KeyError:
tid_list = invalidated[oid] = []
assert not tid_list or tid > tid_list[-1], (dump(oid), dump(tid),
dump(tid_list[-1]))
tid_list.append(tid)
def getSerialBefore(self, oid, tid):
"""
Get the first tid in cache which value is lower that given tid.
"""
# WARNING: return-intensive to save on indentation
oid_list = self._oid_dict.get(oid)
if oid_list is None:
# Unknown oid
return None
for result in oid_list:
if result < tid:
# Candidate found
break
else:
# No candidate in cache.
return None
# Check if there is a chance that an intermediate revision would
# exist, while missing from cache.
try:
inv_tid_list = self._invalidated[oid]
except KeyError:
return result
# Remember: inv_tid_list is sorted in ascending order.
for inv_tid in inv_tid_list:
if tid < inv_tid:
# We don't care about invalidations past requested TID.
break
elif result < inv_tid < tid:
# An invalidation was received between candidate revision,
# and before requested TID: there is a matching revision we
# don't know of, so we cannot answer.
return None
return result
def getLatestSerial(self, oid):
"""
Get the latest tid for given object.
"""
result = self._oid_dict.get(oid)
if result is not None:
result = result[0]
try:
tid_list = self._invalidated[oid]
except KeyError:
pass
else:
if result < tid_list[-1]:
# An invalidation happened from a transaction later than
# our most recent view of this object, so we cannot answer.
result = None
return result
def getSerialList(self, oid):
"""
Get the list of all serials cache knows about for given object.
"""
return self._oid_dict.get(oid, [])[:]
......@@ -275,37 +275,22 @@ class BTreeDatabaseManager(DatabaseManager):
def _getObject(self, oid, tid=None, before_tid=None):
tserial = self._obj.get(oid)
if tserial is None:
result = None
else:
if tserial is not None:
if tid is None:
if before_tid is None:
try:
if before_tid is None:
tid = tserial.maxKey()
except ValueError:
tid = None
else:
before_tid -= 1
try:
tid = tserial.maxKey(before_tid)
tid = tserial.maxKey(before_tid - 1)
except ValueError:
tid = None
if tid is None:
result = None
else:
result = tserial.get(tid, None)
return
result = tserial.get(tid)
if result:
compression, checksum, data, value_serial = result
if before_tid is None:
next_serial = None
else:
try:
next_serial = tserial.minKey(tid + 1)
except ValueError:
next_serial = None
result = (tid, next_serial, compression, checksum, data,
value_serial)
return result
return (tid, next_serial) + result
def doSetPartitionTable(self, ptid, cell_list, reset):
pt = self._pt
......
......@@ -255,6 +255,8 @@ class DatabaseManager(object):
else:
serial, next_serial, compression, checksum, data, data_serial = \
result
assert before_tid is None or next_serial is None or \
before_tid <= next_serial
if data is None and resolve_data:
try:
_, compression, checksum, data = self._getObjectData(oid,
......
......@@ -335,50 +335,30 @@ class MySQLDatabaseManager(DatabaseManager):
def _getObject(self, oid, tid=None, before_tid=None):
q = self.query
partition = self._getPartition(oid)
if tid is not None:
r = q("""SELECT serial, compression, checksum, value, value_serial
FROM obj
WHERE partition = %d AND oid = %d AND serial = %d""" \
% (partition, oid, tid))
try:
serial, compression, checksum, data, value_serial = r[0]
next_serial = None
except IndexError:
return None
elif before_tid is not None:
r = q("""SELECT serial, compression, checksum, value, value_serial
sql = """SELECT serial, compression, checksum, value, value_serial
FROM obj
WHERE partition = %d
AND oid = %d AND serial < %d
ORDER BY serial DESC LIMIT 1""" \
% (partition, oid, before_tid))
AND oid = %d""" % (partition, oid)
if tid is not None:
sql += ' AND serial = %d' % tid
elif before_tid is not None:
sql += ' AND serial < %d ORDER BY serial DESC LIMIT 1' % before_tid
else:
# XXX I want to express "HAVING serial = MAX(serial)", but
# MySQL does not use an index for a HAVING clause!
sql += ' ORDER BY serial DESC LIMIT 1'
r = q(sql)
try:
serial, compression, checksum, data, value_serial = r[0]
except IndexError:
return None
r = q("""SELECT serial FROM obj_short
WHERE partition = %d
AND oid = %d AND serial >= %d
ORDER BY serial LIMIT 1""" \
% (partition, oid, before_tid))
WHERE partition = %d AND oid = %d AND serial > %d
ORDER BY serial LIMIT 1""" % (partition, oid, serial))
try:
next_serial = r[0][0]
except IndexError:
next_serial = None
else:
# XXX I want to express "HAVING serial = MAX(serial)", but
# MySQL does not use an index for a HAVING clause!
r = q("""SELECT serial, compression, checksum, value, value_serial
FROM obj
WHERE partition = %d AND oid = %d
ORDER BY serial DESC LIMIT 1""" \
% (partition, oid))
try:
serial, compression, checksum, data, value_serial = r[0]
next_serial = None
except IndexError:
return None
return serial, next_serial, compression, checksum, data, value_serial
def doSetPartitionTable(self, ptid, cell_list, reset):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment