CMFActivity: new activate() parameter to prefer executing on the same node
The goal is to make better use of the ZODB Storage cache. It is common to do processing on a data set in several sequential transactions: in such case, by continuing execution of these messages on the same node, data is loaded from ZODB only once. Without this, and if there are many other messages to process, processing always continue on a random node, causing much more load from ZODB. To prevent nodes from having too much work to do, or too little compared to other nodes, this new parameter is only a hint for CMFActivity. It remains possible for a node to execute a message that was intended for another node. Before this commit, a processing node selects the first message(s) according to the following ordering: priority, date and now: priority, node_preference, date where node_preference is: -1 -> same node 0 -> no preferred node 1 -> another node The implementation is tricky for 2 reasons: - MariaDB can't order this way in a single simple query, so we have 1 subquery for each case, potentially getting 3 times the wanted maximum of messages, then order/filter on the resulting union. - MariaDB also can't filter efficiently messages for other nodes, so the 3rd subquery returns messages for any node, potentially duplicating results from the first 2 subqueries. This works because they'll be ordered last. Unfortunately, this requires extra indices. In any case, message reservation must be very efficient, or MariaDB deadlocks quickly happen, and locking an activity table during reservation reduces parallelism too much. In addition to better cache efficiency, this new feature can be used as a workaround for a bug affecting serialiation_tag, causing IntegrityError when reindexing many new objects. If you have 2 recursive reindexations for both a document and one of its lines, and if you have so many messages than grouping is split between these 2 messages, then you end up with 2 nodes indexing the same line in parallel: for some tables, the pattern DELETE+INSERT conflicts since InnoDB does not take any lock when deleting a non-existent row. If you have many activities creating such documents, you can combine with grouping and appropriate priority to make sure that such pair of messages won't be executed on different nodes, except maybe at the end (when there's no document to create anymore; then activity reexecution may be enough). For example: from Products.CMFActivity.ActivityTool import getCurrentNode portal.setPlacelessDefaultReindexParameters( activate_kw={'node': 'same', 'priority': priority}, group_id=getCurrentNode()) where `priority` is the same as the activity containing the above code, which can also use grouping without increasing the probability of IntegrityError.
... | @@ -134,6 +134,7 @@ CREATE TABLE %s ( | ... | @@ -134,6 +134,7 @@ CREATE TABLE %s ( |
`method_id` VARCHAR(255) NOT NULL, | `method_id` VARCHAR(255) NOT NULL, | ||
`processing_node` SMALLINT NOT NULL DEFAULT -1, | `processing_node` SMALLINT NOT NULL DEFAULT -1, | ||
`priority` TINYINT NOT NULL DEFAULT 0, | `priority` TINYINT NOT NULL DEFAULT 0, | ||
`node` SMALLINT NOT NULL DEFAULT 0, | |||
`group_method_id` VARCHAR(255) NOT NULL DEFAULT '', | `group_method_id` VARCHAR(255) NOT NULL DEFAULT '', | ||
`tag` VARCHAR(255) NOT NULL, | `tag` VARCHAR(255) NOT NULL, | ||
`serialization_tag` VARCHAR(255) NOT NULL, | `serialization_tag` VARCHAR(255) NOT NULL, | ||
... | @@ -141,7 +142,9 @@ CREATE TABLE %s ( | ... | @@ -141,7 +142,9 @@ CREATE TABLE %s ( |
`message` LONGBLOB NOT NULL, | `message` LONGBLOB NOT NULL, | ||
PRIMARY KEY (`uid`), | PRIMARY KEY (`uid`), | ||
KEY `processing_node_priority_date` (`processing_node`, `priority`, `date`), | KEY `processing_node_priority_date` (`processing_node`, `priority`, `date`), | ||
KEY `node2_priority_date` (`processing_node`, `node`, `priority`, `date`), | |||
KEY `node_group_priority_date` (`processing_node`, `group_method_id`, `priority`, `date`), | KEY `node_group_priority_date` (`processing_node`, `group_method_id`, `priority`, `date`), | ||
KEY `node2_group_priority_date` (`processing_node`, `node`, `group_method_id`, `priority`, `date`), | |||
KEY `serialization_tag_processing_node` (`serialization_tag`, `processing_node`), | KEY `serialization_tag_processing_node` (`serialization_tag`, `processing_node`), | ||
KEY (`path`), | KEY (`path`), | ||
KEY (`active_process_uid`), | KEY (`active_process_uid`), | ||
... | @@ -172,7 +175,7 @@ CREATE TABLE %s ( | ... | @@ -172,7 +175,7 @@ CREATE TABLE %s ( |
_insert_template = ("INSERT INTO %s (uid," | _insert_template = ("INSERT INTO %s (uid," | ||
" path, active_process_uid, date, method_id, processing_node," | " path, active_process_uid, date, method_id, processing_node," | ||
" priority, group_method_id, tag, serialization_tag," | " priority, node, group_method_id, tag, serialization_tag," | ||
" message) VALUES\n(%s)") | " message) VALUES\n(%s)") | ||
_insert_separator = "),\n(" | _insert_separator = "),\n(" | ||
... | @@ -216,6 +219,7 @@ CREATE TABLE %s ( | ... | @@ -216,6 +219,7 @@ CREATE TABLE %s ( |
quote(m.method_id), | quote(m.method_id), | ||
'0' if order_validation_text == 'none' else '-1', | '0' if order_validation_text == 'none' else '-1', | ||
str(m.activity_kw.get('priority', 1)), | str(m.activity_kw.get('priority', 1)), | ||
str(m.activity_kw.get('node', 0)), | |||
quote(m.getGroupId()), | quote(m.getGroupId()), | ||
quote(m.activity_kw.get('tag', '')), | quote(m.activity_kw.get('tag', '')), | ||
quote(m.activity_kw.get('serialization_tag', '')), | quote(m.activity_kw.get('serialization_tag', '')), | ||
... | @@ -274,12 +278,26 @@ CREATE TABLE %s ( | ... | @@ -274,12 +278,26 @@ CREATE TABLE %s ( |
return "SELECT 1 FROM %s WHERE %s LIMIT 1" % ( | return "SELECT 1 FROM %s WHERE %s LIMIT 1" % ( | ||
self.sql_table, " AND ".join(where) or "1") | self.sql_table, " AND ".join(where) or "1") | ||
def getPriority(self, activity_tool): | def getPriority(self, activity_tool, node=None): | ||
result = activity_tool.getSQLConnection().query( | if node is None: | ||
"SELECT priority, date FROM %s" | q = ("SELECT 3*priority, date FROM %s" | ||
" WHERE processing_node=0 AND date <= UTC_TIMESTAMP(6)" | " WHERE processing_node=0 AND date <= UTC_TIMESTAMP(6)" | ||
" ORDER BY priority, date LIMIT 1" % self.sql_table, 0)[1] | " ORDER BY priority, date LIMIT 1" % self.sql_table) | ||
return result[0] if result else Queue.getPriority(self, activity_tool) | else: | ||
subquery = ("(SELECT 3*priority{} as effective_priority, date FROM %s" | |||
" WHERE {} AND processing_node=0 AND date <= UTC_TIMESTAMP(6)" | |||
" ORDER BY priority, date LIMIT 1)" % self.sql_table).format | |||
node = 'node=%s' % node | |||
q = ("SELECT * FROM (%s UNION ALL %s UNION %s) as t" | |||
" ORDER BY effective_priority, date LIMIT 1" % ( | |||
subquery(-1, node), | |||
subquery('', 'node=0'), | |||
subquery('+IF(node, IF(%s, -1, 1), 0)' % node, 1), | |||
|
|||
)) | |||
result = activity_tool.getSQLConnection().query(q, 0)[1] | |||
if result: | |||
return result[0] | |||
return Queue.getPriority(self, activity_tool, node) | |||
def _retryOnLockError(self, method, args=(), kw={}): | def _retryOnLockError(self, method, args=(), kw={}): | ||
while True: | while True: | ||
... | @@ -398,7 +416,7 @@ CREATE TABLE %s ( | ... | @@ -398,7 +416,7 @@ CREATE TABLE %s ( |
where_kw['above_uid'] = line.uid | where_kw['above_uid'] = line.uid | ||
def getReservedMessageList(self, db, date, processing_node, limit, | def getReservedMessageList(self, db, date, processing_node, limit, | ||
group_method_id=None): | group_method_id=None, node=None): | ||
""" | """ | ||
Get and reserve a list of messages. | Get and reserve a list of messages. | ||
limit | limit | ||
... | @@ -418,10 +436,25 @@ CREATE TABLE %s ( | ... | @@ -418,10 +436,25 @@ CREATE TABLE %s ( |
# for users and reduce the probability to do the same work several times | # for users and reduce the probability to do the same work several times | ||
# (think of an object that is modified several times in a short period of | # (think of an object that is modified several times in a short period of | ||
# time). | # time). | ||
if 1: | if node is None: | ||
result = Results(query( | result = Results(query( | ||
"SELECT * FROM %s WHERE processing_node=0 AND %s%s" | "SELECT * FROM %s WHERE processing_node=0 AND %s%s" | ||
" ORDER BY priority, date LIMIT %s FOR UPDATE" % args, 0)) | " ORDER BY priority, date LIMIT %s FOR UPDATE" % args, 0)) | ||
else: | |||
# We'd like to write | |||
# ORDER BY priority, IF(node, IF(node={node}, -1, 1), 0), date | |||
# but this makes indices inefficient. | |||
subquery = ("(SELECT *, 3*priority{} as effective_priority FROM %s" | |||
" WHERE {} AND processing_node=0 AND %s%s" | |||
" ORDER BY priority, date LIMIT %s FOR UPDATE)" % args).format | |||
node = 'node=%s' % node | |||
result = Results(query( | |||
"SELECT * FROM (%s UNION ALL %s UNION %s) as t" | |||
" ORDER BY effective_priority, date LIMIT %s"% ( | |||
subquery(-1, node), | |||
subquery('', 'node=0'), | |||
subquery('+IF(node, IF(%s, -1, 1), 0)' % node, 1), | |||
limit), 0)) | |||
if result: | if result: | ||
# Reserve messages. | # Reserve messages. | ||
uid_list = [x.uid for x in result] | uid_list = [x.uid for x in result] | ||
... | @@ -490,7 +523,7 @@ CREATE TABLE %s ( | ... | @@ -490,7 +523,7 @@ CREATE TABLE %s ( |
result = Results(result) | result = Results(result) | ||
else: | else: | ||
result = self.getReservedMessageList(db, now_date, processing_node, | result = self.getReservedMessageList(db, now_date, processing_node, | ||
1) | 1, node=processing_node) | ||
if not result: | if not result: | ||
break | break | ||
load = self.getProcessableMessageLoader(db, processing_node) | load = self.getProcessableMessageLoader(db, processing_node) | ||
... | @@ -519,7 +552,7 @@ CREATE TABLE %s ( | ... | @@ -519,7 +552,7 @@ CREATE TABLE %s ( |
# adding more results from getReservedMessageList if the | # adding more results from getReservedMessageList if the | ||
# limit is not reached. | # limit is not reached. | ||
or self.getReservedMessageList(db, now_date, processing_node, | or self.getReservedMessageList(db, now_date, processing_node, | ||
limit, group_method_id)) | limit, group_method_id, processing_node)) | ||
for line in result: | for line in result: | ||
if line.uid in uid_to_duplicate_uid_list_dict: | if line.uid in uid_to_duplicate_uid_list_dict: | ||
continue | continue | ||
... | ... |