Commit 3c7a3160 authored by Julien Muchembled's avatar Julien Muchembled

storage: speed up reads by indexing 'obj' primarily by 'oid' (instead of 'tid')

getObject becomes faster because it does not use secondary index anymore.
Only the primary one. This frees RAM during normal operation. For MySQL,
DatabaseManager._getObject is sped up by ~3% for in-memory loads.
An improvement of ~1% from ERP5 was also mesured for IO-bound loads.

On insertion, the fast index is (`partition`, tid, oid) because we almost
always insert lines with increasing tid, whereas oid values are more random.
Although the value (data_id+value_tid) is moved from the fast to the slow index,
this should have little impact on performance because the value size is quite
small compared to the key.

The impact on replication should also be negligible:
- a little faster when there's no oid to replicate: only the secondary index,
  smaller, is scanned
- otherwise: the (slightly) biggest index is scanned randomly

On disk usage, an increase of ~4% was observed for TokuDB.
Less compressibility ? Any link with https://jira.percona.com/browse/TDB-86 ?
parent 875fc1b9
...@@ -229,8 +229,8 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -229,8 +229,8 @@ class MySQLDatabaseManager(DatabaseManager):
tid BIGINT UNSIGNED NOT NULL, tid BIGINT UNSIGNED NOT NULL,
data_id BIGINT UNSIGNED NULL, data_id BIGINT UNSIGNED NULL,
value_tid BIGINT UNSIGNED NULL, value_tid BIGINT UNSIGNED NULL,
PRIMARY KEY (`partition`, tid, oid), PRIMARY KEY (`partition`, oid, tid),
KEY (`partition`, oid, tid), KEY tid (`partition`, tid, oid),
KEY (data_id) KEY (data_id)
) ENGINE=""" + p) ) ENGINE=""" + p)
...@@ -335,15 +335,14 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -335,15 +335,14 @@ class MySQLDatabaseManager(DatabaseManager):
offset_list = self._getAssignedPartitionList() offset_list = self._getAssignedPartitionList()
p64 = util.p64 p64 = util.p64
q = self.query q = self.query
sql = ("SELECT MAX(tid) FROM %s FORCE INDEX (PRIMARY)" sql = "SELECT MAX(tid) FROM %s WHERE `partition`=%s"
" WHERE `partition`=%s")
trans, obj = ({partition: p64(tid) trans, obj = ({partition: p64(tid)
for partition in offset_list for partition in offset_list
for tid, in q(sql % (t, partition)) for tid, in q(sql % (t, partition))
if tid is not None} if tid is not None}
for t in ('trans', 'obj')) for t in ('trans FORCE INDEX (PRIMARY)', 'obj FORCE INDEX (tid)'))
oid = self._sqlmax( oid = self._sqlmax(
"SELECT MAX(oid) FROM obj FORCE INDEX (`partition`)" "SELECT MAX(oid) FROM obj FORCE INDEX (PRIMARY)"
" WHERE `partition`=%s", offset_list) " WHERE `partition`=%s", offset_list)
return trans, obj, None if oid is None else p64(oid) return trans, obj, None if oid is None else p64(oid)
...@@ -363,7 +362,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -363,7 +362,7 @@ class MySQLDatabaseManager(DatabaseManager):
def getLastObjectTID(self, oid): def getLastObjectTID(self, oid):
oid = util.u64(oid) oid = util.u64(oid)
r = self.query("SELECT tid FROM obj FORCE INDEX(`partition`)" r = self.query("SELECT tid FROM obj FORCE INDEX(PRIMARY)"
" WHERE `partition`=%d AND oid=%d" " WHERE `partition`=%d AND oid=%d"
" ORDER BY tid DESC LIMIT 1" " ORDER BY tid DESC LIMIT 1"
% (self._getReadablePartition(oid), oid)) % (self._getReadablePartition(oid), oid))
...@@ -371,7 +370,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -371,7 +370,7 @@ class MySQLDatabaseManager(DatabaseManager):
def _getNextTID(self, *args): # partition, oid, tid def _getNextTID(self, *args): # partition, oid, tid
r = self.query("SELECT tid FROM obj" r = self.query("SELECT tid FROM obj"
" FORCE INDEX(`partition`)" " FORCE INDEX(PRIMARY)"
" WHERE `partition`=%d AND oid=%d AND tid>%d" " WHERE `partition`=%d AND oid=%d AND tid>%d"
" ORDER BY tid LIMIT 1" % args) " ORDER BY tid LIMIT 1" % args)
return r[0][0] if r else None return r[0][0] if r else None
...@@ -380,7 +379,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -380,7 +379,7 @@ class MySQLDatabaseManager(DatabaseManager):
q = self.query q = self.query
partition = self._getReadablePartition(oid) partition = self._getReadablePartition(oid)
sql = ('SELECT tid, compression, data.hash, value, value_tid' sql = ('SELECT tid, compression, data.hash, value, value_tid'
' FROM obj FORCE INDEX(`partition`)' ' FROM obj FORCE INDEX(PRIMARY)'
' LEFT JOIN data ON (obj.data_id = data.id)' ' LEFT JOIN data ON (obj.data_id = data.id)'
' WHERE `partition` = %d AND oid = %d') % (partition, oid) ' WHERE `partition` = %d AND oid = %d') % (partition, oid)
if before_tid is not None: if before_tid is not None:
...@@ -437,7 +436,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -437,7 +436,7 @@ class MySQLDatabaseManager(DatabaseManager):
for partition in offset_list: for partition in offset_list:
where = " WHERE `partition`=%d" % partition where = " WHERE `partition`=%d" % partition
data_id_list = [x for x, in data_id_list = [x for x, in
q("SELECT DISTINCT data_id FROM obj FORCE INDEX(PRIMARY)" q("SELECT DISTINCT data_id FROM obj FORCE INDEX(tid)"
+ where) + where)
if x] if x]
if not self._use_partition: if not self._use_partition:
...@@ -602,7 +601,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -602,7 +601,7 @@ class MySQLDatabaseManager(DatabaseManager):
del _structLL del _structLL
def _getDataTID(self, oid, tid=None, before_tid=None): def _getDataTID(self, oid, tid=None, before_tid=None):
sql = ('SELECT tid, value_tid FROM obj FORCE INDEX(`partition`)' sql = ('SELECT tid, value_tid FROM obj FORCE INDEX(PRIMARY)'
' WHERE `partition` = %d AND oid = %d' ' WHERE `partition` = %d AND oid = %d'
) % (self._getReadablePartition(oid), oid) ) % (self._getReadablePartition(oid), oid)
if tid is not None: if tid is not None:
...@@ -693,7 +692,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -693,7 +692,7 @@ class MySQLDatabaseManager(DatabaseManager):
p64 = util.p64 p64 = util.p64
r = self.query("SELECT tid, IF(compression < 128, LENGTH(value)," r = self.query("SELECT tid, IF(compression < 128, LENGTH(value),"
" CAST(CONV(HEX(SUBSTR(value, 5, 4)), 16, 10) AS INT))" " CAST(CONV(HEX(SUBSTR(value, 5, 4)), 16, 10) AS INT))"
" FROM obj FORCE INDEX(`partition`)" " FROM obj FORCE INDEX(PRIMARY)"
" LEFT JOIN data ON (obj.data_id = data.id)" " LEFT JOIN data ON (obj.data_id = data.id)"
" WHERE `partition` = %d AND oid = %d AND tid >= %d" " WHERE `partition` = %d AND oid = %d AND tid >= %d"
" ORDER BY tid DESC LIMIT %d, %d" % " ORDER BY tid DESC LIMIT %d, %d" %
...@@ -722,7 +721,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -722,7 +721,7 @@ class MySQLDatabaseManager(DatabaseManager):
u64 = util.u64 u64 = util.u64
p64 = util.p64 p64 = util.p64
min_tid = u64(min_tid) min_tid = u64(min_tid)
r = self.query('SELECT tid, oid FROM obj FORCE INDEX(PRIMARY)' r = self.query('SELECT tid, oid FROM obj FORCE INDEX(tid)'
' WHERE `partition` = %d AND tid <= %d' ' WHERE `partition` = %d AND tid <= %d'
' AND (tid = %d AND %d <= oid OR %d < tid)' ' AND (tid = %d AND %d <= oid OR %d < tid)'
' ORDER BY tid ASC, oid ASC LIMIT %d' % ( ' ORDER BY tid ASC, oid ASC LIMIT %d' % (
...@@ -787,7 +786,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -787,7 +786,7 @@ class MySQLDatabaseManager(DatabaseManager):
q = self.query q = self.query
self._setPackTID(tid) self._setPackTID(tid)
for count, oid, max_serial in q("SELECT COUNT(*) - 1, oid, MAX(tid)" for count, oid, max_serial in q("SELECT COUNT(*) - 1, oid, MAX(tid)"
" FROM obj FORCE INDEX(`partition`)" " FROM obj FORCE INDEX(PRIMARY)"
" WHERE tid <= %d GROUP BY oid" " WHERE tid <= %d GROUP BY oid"
% tid): % tid):
partition = getPartition(oid) partition = getPartition(oid)
...@@ -838,7 +837,7 @@ class MySQLDatabaseManager(DatabaseManager): ...@@ -838,7 +837,7 @@ class MySQLDatabaseManager(DatabaseManager):
# last grouped value, instead of the greatest one. # last grouped value, instead of the greatest one.
r = self.query( r = self.query(
"""SELECT tid, oid """SELECT tid, oid
FROM obj FORCE INDEX(PRIMARY) FROM obj FORCE INDEX(tid)
WHERE `partition` = %(partition)s WHERE `partition` = %(partition)s
AND tid <= %(max_tid)d AND tid <= %(max_tid)d
AND (tid > %(min_tid)d OR AND (tid > %(min_tid)d OR
......
...@@ -163,10 +163,10 @@ class SQLiteDatabaseManager(DatabaseManager): ...@@ -163,10 +163,10 @@ class SQLiteDatabaseManager(DatabaseManager):
tid INTEGER NOT NULL, tid INTEGER NOT NULL,
data_id INTEGER, data_id INTEGER,
value_tid INTEGER, value_tid INTEGER,
PRIMARY KEY (partition, tid, oid)) PRIMARY KEY (partition, oid, tid))
""") """)
q("""CREATE INDEX IF NOT EXISTS _obj_i1 ON q("""CREATE INDEX IF NOT EXISTS _obj_i1 ON
obj(partition, oid, tid) obj(partition, tid, oid)
""") """)
q("""CREATE INDEX IF NOT EXISTS _obj_i2 ON q("""CREATE INDEX IF NOT EXISTS _obj_i2 ON
obj(data_id) obj(data_id)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment