storage: speed up reads by indexing 'obj' primarily by 'oid' (instead of 'tid')

getObject becomes faster because it does not use secondary index anymore. Only the primary one. This frees RAM during normal operation. For MySQL, DatabaseManager._getObject is sped up by ~3% for in-memory loads. An improvement of ~1% from ERP5 was also mesured for IO-bound loads. On insertion, the fast index is (`partition`, tid, oid) because we almost always insert lines with increasing tid, whereas oid values are more random. Although the value (data_id+value_tid) is moved from the fast to the slow index, this should have little impact on performance because the value size is quite small compared to the key. The impact on replication should also be negligible: - a little faster when there's no oid to replicate: only the secondary index, smaller, is scanned - otherwise: the (slightly) biggest index is scanned randomly On disk usage, an increase of ~4% was observed for TokuDB. Less compressibility ? Any link with https://jira.percona.com/browse/TDB-86 ?

storage: speed up reads by indexing 'obj' primarily by 'oid' (instead of 'tid')
getObject becomes faster because it does not use secondary index anymore. Only the primary one. This frees RAM during normal operation. For MySQL, DatabaseManager._getObject is sped up by ~3% for in-memory loads. An improvement of ~1% from ERP5 was also mesured for IO-bound loads. On insertion, the fast index is (`partition`, tid, oid) because we almost always insert lines with increasing tid, whereas oid values are more random. Although the value (data_id+value_tid) is moved from the fast to the slow index, this should have little impact on performance because the value size is quite small compared to the key. The impact on replication should also be negligible: - a little faster when there's no oid to replicate: only the secondary index, smaller, is scanned - otherwise: the (slightly) biggest index is scanned randomly On disk usage, an increase of ~4% was observed for TokuDB. Less compressibility ? Any link with https://jira.percona.com/browse/TDB-86 ?
3c7a3160 · Julien Muchembled · 875fc1b9 · 3c7a3160 · 3c7a3160
Commit 3c7a3160 authored Nov 20, 2017 by Julien Muchembled
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 17 deletions

neo/storage/database/mysqldb.py neo/storage/database/mysqldb.py +14 -15

neo/storage/database/sqlite.py neo/storage/database/sqlite.py +2 -2

No files found.
--- a/neo/storage/database/mysqldb.py
+++ b/neo/storage/database/mysqldb.py
@@ -229,8 +229,8 @@ class MySQLDatabaseManager(DatabaseManager):
                 tid BIGINT UNSIGNED NOT NULL,
                 data_id BIGINT UNSIGNED NULL,
                 value_tid BIGINT UNSIGNED NULL,
-                 PRIMARY KEY (`partition`, tid, oid),
+                 PRIMARY KEY (`partition`, oid, tid),
-                 KEY (`partition`, oid, tid),
+                 KEY tid (`partition`, tid, oid),
                 KEY (data_id)
             ) ENGINE=""" + p)
@@ -335,15 +335,14 @@ class MySQLDatabaseManager(DatabaseManager):
        offset_list = self._getAssignedPartitionList()
        p64 = util.p64
        q = self.query
-        sql = ("SELECT MAX(tid) FROM %s FORCE INDEX (PRIMARY)"
+        sql = "SELECT MAX(tid) FROM %s WHERE `partition`=%s"
-               " WHERE `partition`=%s")
        trans, obj = ({partition: p64(tid)
            for partition in offset_list
            for tid, in q(sql % (t, partition))
            if tid is not None}
-            for t in ('trans', 'obj'))
+            for t in ('trans FORCE INDEX (PRIMARY)', 'obj FORCE INDEX (tid)'))
        oid = self._sqlmax(
-            "SELECT MAX(oid) FROM obj FORCE INDEX (`partition`)"
+            "SELECT MAX(oid) FROM obj FORCE INDEX (PRIMARY)"
            " WHERE `partition`=%s", offset_list)
        return trans, obj, None if oid is None else p64(oid)
@@ -363,7 +362,7 @@ class MySQLDatabaseManager(DatabaseManager):
    def getLastObjectTID(self, oid):
        oid = util.u64(oid)
-        r = self.query("SELECT tid FROM obj FORCE INDEX(`partition`)"
+        r = self.query("SELECT tid FROM obj FORCE INDEX(PRIMARY)"
                       " WHERE `partition`=%d AND oid=%d"
                       " ORDER BY tid DESC LIMIT 1"
                       % (self._getReadablePartition(oid), oid))
@@ -371,7 +370,7 @@ class MySQLDatabaseManager(DatabaseManager):
    def _getNextTID(self, *args): # partition, oid, tid
        r = self.query("SELECT tid FROM obj"
-                       " FORCE INDEX(`partition`)"
+                       " FORCE INDEX(PRIMARY)"
                       " WHERE `partition`=%d AND oid=%d AND tid>%d"
                       " ORDER BY tid LIMIT 1" % args)
        return r[0][0] if r else None
@@ -380,7 +379,7 @@ class MySQLDatabaseManager(DatabaseManager):
        q = self.query
        partition = self._getReadablePartition(oid)
        sql = ('SELECT tid, compression, data.hash, value, value_tid'
-               ' FROM obj FORCE INDEX(`partition`)'
+               ' FROM obj FORCE INDEX(PRIMARY)'
               ' LEFT JOIN data ON (obj.data_id = data.id)'
               ' WHERE `partition` = %d AND oid = %d') % (partition, oid)
        if before_tid is not None:
@@ -437,7 +436,7 @@ class MySQLDatabaseManager(DatabaseManager):
        for partition in offset_list:
            where = " WHERE `partition`=%d" % partition
            data_id_list = [x for x, in
-                q("SELECT DISTINCT data_id FROM obj FORCE INDEX(PRIMARY)"
+                q("SELECT DISTINCT data_id FROM obj FORCE INDEX(tid)"
                  + where)
                if x]
            if not self._use_partition:
@@ -602,7 +601,7 @@ class MySQLDatabaseManager(DatabaseManager):
    del _structLL
    def _getDataTID(self, oid, tid=None, before_tid=None):
-        sql = ('SELECT tid, value_tid FROM obj FORCE INDEX(`partition`)'
+        sql = ('SELECT tid, value_tid FROM obj FORCE INDEX(PRIMARY)'
               ' WHERE `partition` = %d AND oid = %d'
              ) % (self._getReadablePartition(oid), oid)
        if tid is not None:
@@ -693,7 +692,7 @@ class MySQLDatabaseManager(DatabaseManager):
        p64 = util.p64
        r = self.query("SELECT tid, IF(compression < 128, LENGTH(value),"
            "  CAST(CONV(HEX(SUBSTR(value, 5, 4)), 16, 10) AS INT))"
-            " FROM obj FORCE INDEX(`partition`)"
+            " FROM obj FORCE INDEX(PRIMARY)"
            " LEFT JOIN data ON (obj.data_id = data.id)"
            " WHERE `partition` = %d AND oid = %d AND tid >= %d"
            " ORDER BY tid DESC LIMIT %d, %d" %
@@ -722,7 +721,7 @@ class MySQLDatabaseManager(DatabaseManager):
        u64 = util.u64
        p64 = util.p64
        min_tid = u64(min_tid)
-        r = self.query('SELECT tid, oid FROM obj FORCE INDEX(PRIMARY)'
+        r = self.query('SELECT tid, oid FROM obj FORCE INDEX(tid)'
                       ' WHERE `partition` = %d AND tid <= %d'
                       ' AND (tid = %d AND %d <= oid OR %d < tid)'
                       ' ORDER BY tid ASC, oid ASC LIMIT %d' % (
@@ -787,7 +786,7 @@ class MySQLDatabaseManager(DatabaseManager):
        q = self.query
        self._setPackTID(tid)
        for count, oid, max_serial in q("SELECT COUNT(*) - 1, oid, MAX(tid)"
-                                        " FROM obj FORCE INDEX(`partition`)"
+                                        " FROM obj FORCE INDEX(PRIMARY)"
                                        " WHERE tid <= %d GROUP BY oid"
                                        % tid):
            partition = getPartition(oid)
@@ -838,7 +837,7 @@ class MySQLDatabaseManager(DatabaseManager):
        # last grouped value, instead of the greatest one.
        r = self.query(
            """SELECT tid, oid
-               FROM obj FORCE INDEX(PRIMARY)
+               FROM obj FORCE INDEX(tid)
               WHERE `partition` = %(partition)s
                 AND tid <= %(max_tid)d
                 AND (tid > %(min_tid)d OR

--- a/neo/storage/database/sqlite.py
+++ b/neo/storage/database/sqlite.py
@@ -163,10 +163,10 @@ class SQLiteDatabaseManager(DatabaseManager):
                 tid INTEGER NOT NULL,
                 data_id INTEGER,
                 value_tid INTEGER,
-                 PRIMARY KEY (partition, tid, oid))
+                 PRIMARY KEY (partition, oid, tid))
          """)
        q("""CREATE INDEX IF NOT EXISTS _obj_i1 ON
-                 obj(partition, oid, tid)
+                 obj(partition, tid, oid)
          """)
        q("""CREATE INDEX IF NOT EXISTS _obj_i2 ON
                 obj(data_id)