Commit fc8a26c8 authored by Alper Akgun's avatar Alper Akgun

Disallow id field for batch counting

Using id field of the same table wouldn't make sense except in an
outer join
parent 602431a6
...@@ -37,6 +37,7 @@ module Gitlab ...@@ -37,6 +37,7 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250 MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000 MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
ALLOWED_MODES = [:itself, :distinct].freeze
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000 DEFAULT_DISTINCT_BATCH_SIZE = 10_000
...@@ -55,8 +56,8 @@ module Gitlab ...@@ -55,8 +56,8 @@ module Gitlab
def count(batch_size: nil, mode: :itself, start: nil, finish: nil) def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct check_mode!(mode)
# non-distinct have better performance # non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
...@@ -102,6 +103,12 @@ module Gitlab ...@@ -102,6 +103,12 @@ module Gitlab
def actual_finish(finish) def actual_finish(finish)
finish || @relation.maximum(@column) || 0 finish || @relation.maximum(@column) || 0
end end
def check_mode!(mode)
raise "The mode #{mode.inspect} is not supported" unless ALLOWED_MODES.include?(mode)
raise 'Use distinct count for optimized distinct counting' if @relation.limit(1).distinct_value.present? && mode != :distinct
raise 'Use distinct count only with non id fields' if @column == :id && mode == :distinct
end
end end
end end
end end
...@@ -86,10 +86,6 @@ describe Gitlab::Database::BatchCount do ...@@ -86,10 +86,6 @@ describe Gitlab::Database::BatchCount do
end end
describe '#batch_distinct_count' do describe '#batch_distinct_count' do
it 'counts with :id field' do
expect(described_class.batch_distinct_count(model, :id)).to eq(5)
end
it 'counts with column field' do it 'counts with column field' do
expect(described_class.batch_distinct_count(model, column)).to eq(2) expect(described_class.batch_distinct_count(model, column)).to eq(2)
end end
...@@ -137,6 +133,12 @@ describe Gitlab::Database::BatchCount do ...@@ -137,6 +133,12 @@ describe Gitlab::Database::BatchCount do
it 'returns fallback if batch size is less than min required' do it 'returns fallback if batch size is less than min required' do
expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback) expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback)
end end
it 'will raise an error if distinct count with the :id column is requested' do
expect do
described_class.batch_count(described_class.batch_distinct_count(model, :id))
end.to raise_error 'Use distinct count only with non id fields'
end
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment