Commit 7469e55b by Julien Muchembled

storage: faster resumption when many transactions have already been imported to MySQL

The previous SQL query caused a full table scan of the 'trans' table at startup.
1 parent 0f0700a8
......@@ -271,8 +271,8 @@ class MySQLDatabaseManager(DatabaseManager):
return self.query("SELECT * FROM pt")
def getLastTID(self, max_tid):
return self.query("SELECT MAX(tid) FROM trans WHERE tid<=%s"
% max_tid)[0][0]
return self.query("SELECT MAX(t) FROM (SELECT MAX(tid) as t FROM trans"
" WHERE tid<=%s GROUP BY `partition`) as t" % max_tid)[0][0]
def _getLastIDs(self, all=True):
p64 = util.p64
......
......@@ -206,6 +206,16 @@ class SQLiteDatabaseManager(DatabaseManager):
def getPartitionTable(self):
return self.query("SELECT * FROM pt")
# A test with a table of 20 million lines and SQLite 3.8.7.1 shows that
# it's not worth changing getLastTID:
# - It already returns the result in less than 2 seconds, without reading
# the whole table (this is 4-7 times faster than MySQL).
# - Strangely, a "GROUP BY partition" clause makes SQLite almost twice
# slower.
# - Getting MAX(tid) is immediate with a "AND partition=?" condition so one
# way to speed up the following 2 methods is to repeat the queries for
# each partition (and finish in Python with max() for getLastTID).
def getLastTID(self, max_tid):
return self.query("SELECT MAX(tid) FROM trans WHERE tid<=?",
(max_tid,)).next()[0]
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!