Commit 8757820e authored by Alper Akgun's avatar Alper Akgun

Merge branch 'ali/update-batch-counter-to-not-query-past-last-record' into 'master'

Update BatchCounter to stop counting at finish value [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!44641
parents d7cdc772 6a1101c7
...@@ -49,6 +49,8 @@ module Gitlab ...@@ -49,6 +49,8 @@ module Gitlab
MAX_ALLOWED_LOOPS = 10_000 MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze ALLOWED_MODES = [:itself, :distinct].freeze
FALLBACK_FINISH = 0
OFFSET_BY_ONE = 1
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000 DEFAULT_DISTINCT_BATCH_SIZE = 10_000
...@@ -65,7 +67,7 @@ module Gitlab ...@@ -65,7 +67,7 @@ module Gitlab
(@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) || (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
(@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) || (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
(finish - start) / batch_size >= MAX_ALLOWED_LOOPS || (finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
start > finish start >= finish
end end
def count(batch_size: nil, mode: :itself, start: nil, finish: nil) def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
...@@ -85,11 +87,13 @@ module Gitlab ...@@ -85,11 +87,13 @@ module Gitlab
results = nil results = nil
batch_start = start batch_start = start
while batch_start <= finish while batch_start < finish
batch_relation = build_relation_batch(batch_start, batch_start + batch_size, mode) batch_end = [batch_start + batch_size, finish].min
batch_relation = build_relation_batch(batch_start, batch_end, mode)
begin begin
results = merge_results(results, batch_relation.send(@operation, *@operation_args)) # rubocop:disable GitlabSecurity/PublicSend results = merge_results(results, batch_relation.send(@operation, *@operation_args)) # rubocop:disable GitlabSecurity/PublicSend
batch_start += batch_size batch_start = batch_end
rescue ActiveRecord::QueryCanceled => error rescue ActiveRecord::QueryCanceled => error
# retry with a safe batch size & warmer cache # retry with a safe batch size & warmer cache
if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
...@@ -99,6 +103,7 @@ module Gitlab ...@@ -99,6 +103,7 @@ module Gitlab
return FALLBACK return FALLBACK
end end
end end
sleep(SLEEP_TIME_IN_SECONDS) sleep(SLEEP_TIME_IN_SECONDS)
end end
...@@ -138,7 +143,7 @@ module Gitlab ...@@ -138,7 +143,7 @@ module Gitlab
end end
def actual_finish(finish) def actual_finish(finish)
finish || @relation.unscope(:group, :having).maximum(@column) || 0 (finish || @relation.unscope(:group, :having).maximum(@column) || FALLBACK_FINISH) + OFFSET_BY_ONE
end end
def check_mode!(mode) def check_mode!(mode)
......
...@@ -130,6 +130,16 @@ RSpec.describe Gitlab::Database::BatchCount do ...@@ -130,6 +130,16 @@ RSpec.describe Gitlab::Database::BatchCount do
expect(described_class.batch_count(model, start: model.minimum(:id), finish: model.maximum(:id))).to eq(5) expect(described_class.batch_count(model, start: model.minimum(:id), finish: model.maximum(:id))).to eq(5)
end end
it 'stops counting when finish value is reached' do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0)
expect(described_class.batch_count(model,
start: model.minimum(:id),
finish: model.maximum(:id) - 1, # Do not count the last record
batch_size: model.count - 2 # Ensure there are multiple batches
)).to eq(model.count - 1)
end
it "defaults the batch size to #{Gitlab::Database::BatchCounter::DEFAULT_BATCH_SIZE}" do it "defaults the batch size to #{Gitlab::Database::BatchCounter::DEFAULT_BATCH_SIZE}" do
min_id = model.minimum(:id) min_id = model.minimum(:id)
relation = instance_double(ActiveRecord::Relation) relation = instance_double(ActiveRecord::Relation)
...@@ -242,6 +252,19 @@ RSpec.describe Gitlab::Database::BatchCount do ...@@ -242,6 +252,19 @@ RSpec.describe Gitlab::Database::BatchCount do
expect(described_class.batch_distinct_count(model, column, start: model.minimum(column), finish: model.maximum(column))).to eq(2) expect(described_class.batch_distinct_count(model, column, start: model.minimum(column), finish: model.maximum(column))).to eq(2)
end end
it 'stops counting when finish value is reached' do
# Create a new unique author that should not be counted
create(:issue)
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0)
expect(described_class.batch_distinct_count(model, column,
start: User.minimum(:id),
finish: User.maximum(:id) - 1, # Do not count the newly created issue
batch_size: model.count - 2 # Ensure there are multiple batches
)).to eq(2)
end
it 'counts with User min and max as start and finish' do it 'counts with User min and max as start and finish' do
expect(described_class.batch_distinct_count(model, column, start: User.minimum(:id), finish: User.maximum(:id))).to eq(2) expect(described_class.batch_distinct_count(model, column, start: User.minimum(:id), finish: User.maximum(:id))).to eq(2)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment