Commit 5c8ce940 authored by Stan Hu's avatar Stan Hu

Fix statement timeouts in RemoveRestrictedTodos migration

On GitLab.com, the RemoveRestrictedTodos background migration
encountered about 700+ failures a day due to statement timeouts.

PostgreSQL might perform badly with a LIMIT 1 because the planner is
guessing that scanning the index in ID order will come across the
desired row in less time it will take the planner than using another
index. The order_hint does not affect the search results. For example,
`ORDER BY id ASC, updated_at ASC` means the same thing as `ORDER BY id
ASC`.

Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52649
parent 4d3ff28a
...@@ -39,7 +39,15 @@ module EachBatch ...@@ -39,7 +39,15 @@ module EachBatch
# #
# of - The number of rows to retrieve per batch. # of - The number of rows to retrieve per batch.
# column - The column to use for ordering the batches. # column - The column to use for ordering the batches.
def each_batch(of: 1000, column: primary_key) # order_hint - An optional column to append to the `ORDER BY id`
# clause to help the query planner. PostgreSQL might perform badly
# with a LIMIT 1 because the planner is guessing that scanning the
# index in ID order will come across the desired row in less time
# it will take the planner than using another index. The
# order_hint does not affect the search results. For example,
# `ORDER BY id ASC, updated_at ASC` means the same thing as `ORDER
# BY id ASC`.
def each_batch(of: 1000, column: primary_key, order_hint: nil)
unless column unless column
raise ArgumentError, raise ArgumentError,
'the column: argument must be set to a column name to use for ordering rows' 'the column: argument must be set to a column name to use for ordering rows'
...@@ -48,7 +56,9 @@ module EachBatch ...@@ -48,7 +56,9 @@ module EachBatch
start = except(:select) start = except(:select)
.select(column) .select(column)
.reorder(column => :asc) .reorder(column => :asc)
.take
start = start.order(order_hint) if order_hint
start = start.take
return unless start return unless start
...@@ -60,6 +70,9 @@ module EachBatch ...@@ -60,6 +70,9 @@ module EachBatch
.select(column) .select(column)
.where(arel_table[column].gteq(start_id)) .where(arel_table[column].gteq(start_id))
.reorder(column => :asc) .reorder(column => :asc)
stop = stop.order(order_hint) if order_hint
stop = stop
.offset(of) .offset(of)
.limit(1) .limit(1)
.take .take
......
---
title: Fix statement timeouts in RemoveRestrictedTodos migration
merge_request: 22795
author:
type: other
...@@ -67,7 +67,7 @@ module Gitlab ...@@ -67,7 +67,7 @@ module Gitlab
.where('access_level >= ?', 20) .where('access_level >= ?', 20)
confidential_issues = Issue.select(:id, :author_id).where(confidential: true, project_id: project_id) confidential_issues = Issue.select(:id, :author_id).where(confidential: true, project_id: project_id)
confidential_issues.each_batch(of: 100) do |batch| confidential_issues.each_batch(of: 100, order_hint: :confidential) do |batch|
batch.each do |issue| batch.each do |issue|
assigned_users = IssueAssignee.select(:user_id).where(issue_id: issue.id) assigned_users = IssueAssignee.select(:user_id).where(issue_id: issue.id)
......
...@@ -14,40 +14,45 @@ describe EachBatch do ...@@ -14,40 +14,45 @@ describe EachBatch do
5.times { create(:user, updated_at: 1.day.ago) } 5.times { create(:user, updated_at: 1.day.ago) }
end end
it 'yields an ActiveRecord::Relation when a block is given' do shared_examples 'each_batch handling' do |kwargs|
model.each_batch do |relation| it 'yields an ActiveRecord::Relation when a block is given' do
expect(relation).to be_a_kind_of(ActiveRecord::Relation) model.each_batch(kwargs) do |relation|
expect(relation).to be_a_kind_of(ActiveRecord::Relation)
end
end end
end
it 'yields a batch index as the second argument' do it 'yields a batch index as the second argument' do
model.each_batch do |_, index| model.each_batch(kwargs) do |_, index|
expect(index).to eq(1) expect(index).to eq(1)
end
end end
end
it 'accepts a custom batch size' do it 'accepts a custom batch size' do
amount = 0 amount = 0
model.each_batch(of: 1) { amount += 1 } model.each_batch(kwargs.merge({ of: 1 })) { amount += 1 }
expect(amount).to eq(5) expect(amount).to eq(5)
end end
it 'does not include ORDER BYs in the yielded relations' do it 'does not include ORDER BYs in the yielded relations' do
model.each_batch do |relation| model.each_batch do |relation|
expect(relation.to_sql).not_to include('ORDER BY') expect(relation.to_sql).not_to include('ORDER BY')
end
end end
end
it 'allows updating of the yielded relations' do it 'allows updating of the yielded relations' do
time = Time.now time = Time.now
model.each_batch do |relation| model.each_batch do |relation|
relation.update_all(updated_at: time) relation.update_all(updated_at: time)
end end
expect(model.where(updated_at: time).count).to eq(5) expect(model.where(updated_at: time).count).to eq(5)
end
end end
it_behaves_like 'each_batch handling', {}
it_behaves_like 'each_batch handling', { order_hint: :updated_at }
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment