Commit 5d042452 authored by Julien Muchembled's avatar Julien Muchembled

MySQL: change schema of 'data' table to use autoincrement integer as primary key

This optimizes storage layout on disk, because more recent entries are often
being accessed more.

This will also simplify implementation of incremental backups.

Storage API is changed so that backends are not forced to use the checksum to
index data.
parent d4fac5c0
NEO 1.0
=======
The format of MySQL tables has changed in NEO 1.0 and there is no backward
compatibility or transparent migration, so you will have to use the following
SQL commands to migrate each storage from NEO 0.10.x::
-- make sure 'tobj' is empty first
CREATE TABLE new_data (id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, hash BINARY(20) NOT NULL UNIQUE, compression TINYINT UNSIGNED NULL, value LONGBLOB NULL) ENGINE = InnoDB SELECT DISTINCT obj.hash as hash, compression, value FROM obj, data WHERE obj.hash=data.hash ORDER BY serial;
DROP TABLE data;
RENAME TABLE new_data TO data;
CREATE TABLE new_obj (partition SMALLINT UNSIGNED NOT NULL, oid BIGINT UNSIGNED NOT NULL, serial BIGINT UNSIGNED NOT NULL, data_id BIGINT UNSIGNED NULL, value_serial BIGINT UNSIGNED NULL, PRIMARY KEY (partition, oid, serial), KEY (data_id)) ENGINE = InnoDB SELECT partition, oid, serial, data.id as data_id, value_serial FROM obj LEFT JOIN data ON (obj.hash=data.hash);
DROP TABLE obj;
RENAME TABLE new_obj TO obj;
ALTER TABLE tobj CHANGE hash data_id BIGINT UNSIGNED NULL;
NEO 0.10 NEO 0.10
======== ========
......
...@@ -347,6 +347,7 @@ class BTreeDatabaseManager(DatabaseManager): ...@@ -347,6 +347,7 @@ class BTreeDatabaseManager(DatabaseManager):
raise AssertionError("hash collision") raise AssertionError("hash collision")
except KeyError: except KeyError:
self._data[checksum] = compression, data, set() self._data[checksum] = compression, data, set()
return checksum
def finishTransaction(self, tid): def finishTransaction(self, tid):
tid = util.u64(tid) tid = util.u64(tid)
......
...@@ -68,7 +68,7 @@ class DatabaseManager(object): ...@@ -68,7 +68,7 @@ class DatabaseManager(object):
_uncommitted_data is a dict containing refcounts to data of _uncommitted_data is a dict containing refcounts to data of
write-locked objects, except in case of undo, where the refcount is write-locked objects, except in case of undo, where the refcount is
increased later, when the object is read-locked. increased later, when the object is read-locked.
Keys are checksums and values are number of references. Keys are data ids and values are number of references.
If reset is true, existing data must be discarded and If reset is true, existing data must be discarded and
self._uncommitted_data must be an empty dict. self._uncommitted_data must be an empty dict.
...@@ -294,19 +294,19 @@ class DatabaseManager(object): ...@@ -294,19 +294,19 @@ class DatabaseManager(object):
"""Store a transaction temporarily, if temporary is true. Note """Store a transaction temporarily, if temporary is true. Note
that this transaction is not finished yet. The list of objects that this transaction is not finished yet. The list of objects
contains tuples, each of which consists of an object ID, contains tuples, each of which consists of an object ID,
a checksum and object serial. a data_id and object serial.
The transaction is either None or a tuple of the list of OIDs, The transaction is either None or a tuple of the list of OIDs,
user information, a description, extension information and transaction user information, a description, extension information and transaction
pack state (True for packed).""" pack state (True for packed)."""
raise NotImplementedError raise NotImplementedError
def _pruneData(self, checksum_list): def _pruneData(self, data_id_list):
"""To be overriden by the backend to delete any unreferenced data """To be overriden by the backend to delete any unreferenced data
'unreferenced' means: 'unreferenced' means:
- not in self._uncommitted_data - not in self._uncommitted_data
- and not referenced by a fully-committed object (storage should have - and not referenced by a fully-committed object (storage should have
an index or a refcound of all data checksums of all objects) an index or a refcound of all data ids of all objects)
""" """
raise NotImplementedError raise NotImplementedError
...@@ -318,38 +318,39 @@ class DatabaseManager(object): ...@@ -318,38 +318,39 @@ class DatabaseManager(object):
""" """
raise NotImplementedError raise NotImplementedError
def storeData(self, checksum, data=None, compression=None): def storeData(self, checksum_or_id, data=None, compression=None):
"""Store object raw data """Store object raw data
'checksum' must be the result of neo.lib.util.makeChecksum(data) checksum must be the result of neo.lib.util.makeChecksum(data)
'compression' indicates if 'data' is compressed. 'compression' indicates if 'data' is compressed.
A volatile reference is set to this data until 'unlockData' is called A volatile reference is set to this data until 'unlockData' is called
with this checksum. with this checksum.
If called with only a checksum, it only increment the volatile If called with only an id, it only increment the volatile
reference to the data matching the checksum. reference to the data matching the id.
""" """
refcount = self._uncommitted_data refcount = self._uncommitted_data
refcount[checksum] = 1 + refcount.get(checksum, 0)
if data is not None: if data is not None:
self._storeData(checksum, data, compression) checksum_or_id = self._storeData(checksum_or_id, data, compression)
refcount[checksum_or_id] = 1 + refcount.get(checksum_or_id, 0)
return checksum_or_id
def unlockData(self, checksum_list, prune=False): def unlockData(self, data_id_list, prune=False):
"""Release 1 volatile reference to given list of checksums """Release 1 volatile reference to given list of checksums
If 'prune' is true, any data that is not referenced anymore (either by If 'prune' is true, any data that is not referenced anymore (either by
a volatile reference or by a fully-committed object) is deleted. a volatile reference or by a fully-committed object) is deleted.
""" """
refcount = self._uncommitted_data refcount = self._uncommitted_data
for checksum in checksum_list: for data_id in data_id_list:
count = refcount[checksum] - 1 count = refcount[data_id] - 1
if count: if count:
refcount[checksum] = count refcount[data_id] = count
else: else:
del refcount[checksum] del refcount[data_id]
if prune: if prune:
self.begin() self.begin()
try: try:
self._pruneData(checksum_list) self._pruneData(data_id_list)
except: except:
self.rollback() self.rollback()
raise raise
...@@ -379,8 +380,8 @@ class DatabaseManager(object): ...@@ -379,8 +380,8 @@ class DatabaseManager(object):
" of _getDataTID. It should be overriden by backend storage.") " of _getDataTID. It should be overriden by backend storage.")
r = self._getObject(oid, tid, before_tid) r = self._getObject(oid, tid, before_tid)
if r: if r:
serial, _, _, checksum, _, value_serial = r serial, _, _, data_id, _, value_serial = r
if value_serial is None and checksum: if value_serial is None and data_id:
return serial, serial return serial, serial
return serial, value_serial return serial, value_serial
return None, None return None, None
...@@ -524,7 +525,7 @@ class DatabaseManager(object): ...@@ -524,7 +525,7 @@ class DatabaseManager(object):
to it. to it.
- getObjectData function - getObjectData function
To call if value_serial is None and an object needs to be updated. To call if value_serial is None and an object needs to be updated.
Takes no parameter, returns a 3-tuple: compression, checksum, Takes no parameter, returns a 3-tuple: compression, data_id,
value value
""" """
raise NotImplementedError raise NotImplementedError
......
This diff is collapsed.
...@@ -173,11 +173,11 @@ class ReplicationHandler(EventHandler): ...@@ -173,11 +173,11 @@ class ReplicationHandler(EventHandler):
serial_end, compression, checksum, data, data_serial): serial_end, compression, checksum, data, data_serial):
dm = self.app.dm dm = self.app.dm
if data or checksum != ZERO_HASH: if data or checksum != ZERO_HASH:
dm.storeData(checksum, data, compression) data_id = dm.storeData(checksum, data, compression)
else: else:
checksum = None data_id = None
# Directly store the transaction. # Directly store the transaction.
obj = oid, checksum, data_serial obj = oid, data_id, data_serial
dm.storeTransaction(serial_start, [obj], None, False) dm.storeTransaction(serial_start, [obj], None, False)
def _doAskCheckSerialRange(self, min_oid, min_tid, max_tid, def _doAskCheckSerialRange(self, min_oid, min_tid, max_tid,
......
...@@ -98,12 +98,12 @@ class Transaction(object): ...@@ -98,12 +98,12 @@ class Transaction(object):
# assert self._transaction is not None # assert self._transaction is not None
self._transaction = (oid_list, user, desc, ext, packed) self._transaction = (oid_list, user, desc, ext, packed)
def addObject(self, oid, checksum, value_serial): def addObject(self, oid, data_id, value_serial):
""" """
Add an object to the transaction Add an object to the transaction
""" """
assert oid not in self._checked_set, dump(oid) assert oid not in self._checked_set, dump(oid)
self._object_dict[oid] = oid, checksum, value_serial self._object_dict[oid] = oid, data_id, value_serial
def delObject(self, oid): def delObject(self, oid):
try: try:
...@@ -241,9 +241,9 @@ class TransactionManager(object): ...@@ -241,9 +241,9 @@ class TransactionManager(object):
# drop the lock it held on this object, and drop object data for # drop the lock it held on this object, and drop object data for
# consistency. # consistency.
del self._store_lock_dict[oid] del self._store_lock_dict[oid]
checksum = self._transaction_dict[ttid].delObject(oid) data_id = self._transaction_dict[ttid].delObject(oid)
if checksum: if data_id:
self._app.dm.pruneData((checksum,)) self._app.dm.pruneData((data_id,))
# Give a chance to pending events to take that lock now. # Give a chance to pending events to take that lock now.
self._app.executeQueuedEvents() self._app.executeQueuedEvents()
# Attemp to acquire lock again. # Attemp to acquire lock again.
...@@ -303,10 +303,10 @@ class TransactionManager(object): ...@@ -303,10 +303,10 @@ class TransactionManager(object):
# store object # store object
assert ttid in self, "Transaction not registered" assert ttid in self, "Transaction not registered"
if data is None: if data is None:
checksum = None data_id = None
else: else:
self._app.dm.storeData(checksum, data, compression) data_id = self._app.dm.storeData(checksum, data, compression)
self._transaction_dict[ttid].addObject(oid, checksum, value_serial) self._transaction_dict[ttid].addObject(oid, data_id, value_serial)
def abort(self, ttid, even_if_locked=False): def abort(self, ttid, even_if_locked=False):
""" """
...@@ -328,9 +328,9 @@ class TransactionManager(object): ...@@ -328,9 +328,9 @@ class TransactionManager(object):
if not even_if_locked: if not even_if_locked:
return return
else: else:
self._app.dm.unlockData([checksum self._app.dm.unlockData([data_id
for oid, checksum, value_serial in transaction.getObjectList() for oid, data_id, value_serial in transaction.getObjectList()
if checksum], True) if data_id], True)
# unlock any object # unlock any object
for oid in transaction.getLockedOIDList(): for oid in transaction.getLockedOIDList():
if has_load_lock: if has_load_lock:
...@@ -379,13 +379,13 @@ class TransactionManager(object): ...@@ -379,13 +379,13 @@ class TransactionManager(object):
for oid, ttid in self._store_lock_dict.items(): for oid, ttid in self._store_lock_dict.items():
neo.lib.logging.info(' %r by %r', dump(oid), dump(ttid)) neo.lib.logging.info(' %r by %r', dump(oid), dump(ttid))
def updateObjectDataForPack(self, oid, orig_serial, new_serial, checksum): def updateObjectDataForPack(self, oid, orig_serial, new_serial, data_id):
lock_tid = self.getLockingTID(oid) lock_tid = self.getLockingTID(oid)
if lock_tid is not None: if lock_tid is not None:
transaction = self._transaction_dict[lock_tid] transaction = self._transaction_dict[lock_tid]
if transaction.getObject(oid)[2] == orig_serial: if transaction.getObject(oid)[2] == orig_serial:
if new_serial: if new_serial:
checksum = None data_id = None
else: else:
self._app.dm.storeData(checksum) self._app.dm.storeData(data_id)
transaction.addObject(oid, checksum, new_serial) transaction.addObject(oid, data_id, new_serial)
...@@ -271,6 +271,7 @@ class StorageReplicationHandlerTests(NeoUnitTestBase): ...@@ -271,6 +271,7 @@ class StorageReplicationHandlerTests(NeoUnitTestBase):
checksum = "0" * 20 checksum = "0" * 20
data = 'foo' data = 'foo'
data_serial = None data_serial = None
app.dm.mockAddReturnValues(storeData=checksum)
ReplicationHandler(app).answerObject(conn, oid, serial_start, ReplicationHandler(app).answerObject(conn, oid, serial_start,
serial_end, compression, checksum, data, data_serial) serial_end, compression, checksum, data, data_serial)
calls = app.dm.mockGetNamedCalls('storeTransaction') calls = app.dm.mockGetNamedCalls('storeTransaction')
......
...@@ -122,9 +122,8 @@ class StorageDBTests(NeoUnitTestBase): ...@@ -122,9 +122,8 @@ class StorageDBTests(NeoUnitTestBase):
def getTransaction(self, oid_list): def getTransaction(self, oid_list):
transaction = (oid_list, 'user', 'desc', 'ext', False) transaction = (oid_list, 'user', 'desc', 'ext', False)
H = "0" * 20 H = "0" * 20
for _ in oid_list: object_list = [(oid, self.db.storeData(H, '', 1), None)
self.db.storeData(H, '', 1) for oid in oid_list]
object_list = [(oid, H, None) for oid in oid_list]
return (transaction, object_list) return (transaction, object_list)
def checkSet(self, list1, list2): def checkSet(self, list1, list2):
...@@ -584,10 +583,8 @@ class StorageDBTests(NeoUnitTestBase): ...@@ -584,10 +583,8 @@ class StorageDBTests(NeoUnitTestBase):
tid4 = self.getNextTID() tid4 = self.getNextTID()
tid5 = self.getNextTID() tid5 = self.getNextTID()
oid1 = self.getOID(1) oid1 = self.getOID(1)
foo = "3" * 20 foo = db.storeData("3" * 20, 'foo', 0)
bar = "4" * 20 bar = db.storeData("4" * 20, 'bar', 0)
db.storeData(foo, 'foo', 0)
db.storeData(bar, 'bar', 0)
db.unlockData((foo, bar)) db.unlockData((foo, bar))
db.storeTransaction( db.storeTransaction(
tid1, ( tid1, (
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import random
import unittest import unittest
from mock import Mock from mock import Mock, ReturnValues
from .. import NeoUnitTestBase from .. import NeoUnitTestBase
from neo.storage.transactions import Transaction, TransactionManager from neo.storage.transactions import Transaction, TransactionManager
from neo.storage.transactions import ConflictError, DelayedError from neo.storage.transactions import ConflictError, DelayedError
...@@ -125,6 +125,8 @@ class TransactionManagerTests(NeoUnitTestBase): ...@@ -125,6 +125,8 @@ class TransactionManagerTests(NeoUnitTestBase):
def testSimpleCase(self): def testSimpleCase(self):
""" One node, one transaction, not abort """ """ One node, one transaction, not abort """
data_id_list = random.random(), random.random()
self.app.dm.mockAddReturnValues(storeData=ReturnValues(*data_id_list))
uuid = self.getNewUUID() uuid = self.getNewUUID()
ttid = self.getNextTID() ttid = self.getNextTID()
tid, txn = self._getTransaction() tid, txn = self._getTransaction()
...@@ -137,8 +139,8 @@ class TransactionManagerTests(NeoUnitTestBase): ...@@ -137,8 +139,8 @@ class TransactionManagerTests(NeoUnitTestBase):
self.assertTrue(ttid in self.manager) self.assertTrue(ttid in self.manager)
self.manager.lock(ttid, tid, txn[0]) self.manager.lock(ttid, tid, txn[0])
self._checkTransactionStored(tid, [ self._checkTransactionStored(tid, [
(object1[0], object1[2], object1[4]), (object1[0], data_id_list[0], object1[4]),
(object2[0], object2[2], object2[4]), (object2[0], data_id_list[1], object2[4]),
], txn) ], txn)
self.manager.unlock(ttid) self.manager.unlock(ttid)
self.assertFalse(ttid in self.manager) self.assertFalse(ttid in self.manager)
...@@ -331,6 +333,8 @@ class TransactionManagerTests(NeoUnitTestBase): ...@@ -331,6 +333,8 @@ class TransactionManagerTests(NeoUnitTestBase):
self.assertFalse(self.manager.loadLocked(oid)) self.assertFalse(self.manager.loadLocked(oid))
def test_getObjectFromTransaction(self): def test_getObjectFromTransaction(self):
data_id = random.random()
self.app.dm.mockAddReturnValues(storeData=ReturnValues(data_id))
uuid = self.getNewUUID() uuid = self.getNewUUID()
tid1, txn1 = self._getTransaction() tid1, txn1 = self._getTransaction()
tid2, txn2 = self._getTransaction() tid2, txn2 = self._getTransaction()
...@@ -343,7 +347,7 @@ class TransactionManagerTests(NeoUnitTestBase): ...@@ -343,7 +347,7 @@ class TransactionManagerTests(NeoUnitTestBase):
self.assertEqual(self.manager.getObjectFromTransaction(tid1, obj2[0]), self.assertEqual(self.manager.getObjectFromTransaction(tid1, obj2[0]),
None) None)
self.assertEqual(self.manager.getObjectFromTransaction(tid1, obj1[0]), self.assertEqual(self.manager.getObjectFromTransaction(tid1, obj1[0]),
(obj1[0], obj1[2], obj1[4])) (obj1[0], data_id, obj1[4]))
def test_getLockingTID(self): def test_getLockingTID(self):
uuid = self.getNewUUID() uuid = self.getNewUUID()
......
...@@ -308,13 +308,14 @@ class StorageApplication(ServerNode, neo.storage.app.Application): ...@@ -308,13 +308,14 @@ class StorageApplication(ServerNode, neo.storage.app.Application):
adapter = self._init_args[1]['getAdapter'] adapter = self._init_args[1]['getAdapter']
dm = self.dm dm = self.dm
if adapter == 'BTree': if adapter == 'BTree':
checksum_list = dm._data checksum_dict = dict((x, x) for x in dm._data)
elif adapter == 'MySQL': elif adapter == 'MySQL':
checksum_list = [x for x, in dm.query("SELECT hash FROM data")] checksum_dict = dict(dm.query("SELECT id, hash FROM data"))
else: else:
assert False assert False
assert set(dm._uncommitted_data).issubset(checksum_list) assert set(dm._uncommitted_data).issubset(checksum_dict)
return dict((x, dm._uncommitted_data.get(x, 0)) for x in checksum_list) get = dm._uncommitted_data.get
return dict((v, get(k, 0)) for k, v in checksum_dict.iteritems())
class ClientApplication(Node, neo.client.app.Application): class ClientApplication(Node, neo.client.app.Application):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment