Commit fc8a26c8 authored by Alper Akgun's avatar Alper Akgun

Disallow id field for batch counting

Using id field of the same table wouldn't make sense except in an
outer join
parent 602431a6
......@@ -37,6 +37,7 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000
......@@ -55,8 +56,8 @@ module Gitlab
def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
check_mode!(mode)
# non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
......@@ -102,6 +103,12 @@ module Gitlab
def actual_finish(finish)
finish || @relation.maximum(@column) || 0
end
def check_mode!(mode)
raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
end
end
end
end
......@@ -86,10 +86,6 @@ describe Gitlab::Database::BatchCount do
end
describe '#batch_distinct_count' do
it 'counts with :id field' do
expect(described_class.batch_distinct_count(model, :id)).to eq(5)
end
it 'counts with column field' do
expect(described_class.batch_distinct_count(model, column)).to eq(2)
end
......@@ -137,6 +133,12 @@ describe Gitlab::Database::BatchCount do
it 'returns fallback if batch size is less than min required' do
expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback)
end
it 'will raise an error if distinct count with the :id column is requested' do
expect do
described_class.batch_count(described_class.batch_distinct_count(model, :id))
end.to raise_error 'Use distinct count only with non id fields'
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment