Commit 2e6280a6 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '213335-disallow-id-field-for-batch-counting' into 'master'

Disallow id field for distinct batch counting

See merge request gitlab-org/gitlab!29310
parents 99fef514 fc8a26c8
......@@ -37,6 +37,7 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000
......@@ -55,8 +56,8 @@ module Gitlab
def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
check_mode!(mode)
# non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
......@@ -102,6 +103,12 @@ module Gitlab
def actual_finish(finish)
finish || @relation.maximum(@column) || 0
end
def check_mode!(mode)
raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
end
end
end
end
......@@ -86,10 +86,6 @@ describe Gitlab::Database::BatchCount do
end
describe '#batch_distinct_count' do
it 'counts with :id field' do
expect(described_class.batch_distinct_count(model, :id)).to eq(5)
end
it 'counts with column field' do
expect(described_class.batch_distinct_count(model, column)).to eq(2)
end
......@@ -137,6 +133,12 @@ describe Gitlab::Database::BatchCount do
it 'returns fallback if batch size is less than min required' do
expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback)
end
it 'will raise an error if distinct count with the :id column is requested' do
expect do
described_class.batch_count(described_class.batch_distinct_count(model, :id))
end.to raise_error 'Use distinct count only with non id fields'
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment