Commit 01692785 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'id-adapt-batch-count-for-grouped-records' into 'master'

Adapt BatchCount for grouped records

See merge request gitlab-org/gitlab!42326
parents 74b8fc94 47d58335
......@@ -8,15 +8,20 @@
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically
#
# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive.
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
#
# Examples:
# extend ::Gitlab::Database::BatchCount
# batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_count(Namespace.group(:type))
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
# batch_distinct_count(Project.group(:visibility_level), :creator_id)
# batch_sum(User, :sign_in_count)
# batch_sum(Issue.group(:state_id), :weight))
module Gitlab
module Database
module BatchCount
......@@ -77,12 +82,12 @@ module Gitlab
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
counter = 0
results = nil
batch_start = start
while batch_start <= finish
begin
counter += batch_fetch(batch_start, batch_start + batch_size, mode)
results = merge_results(results, batch_fetch(batch_start, batch_start + batch_size, mode))
batch_start += batch_size
rescue ActiveRecord::QueryCanceled
# retry with a safe batch size & warmer cache
......@@ -95,7 +100,17 @@ module Gitlab
sleep(SLEEP_TIME_IN_SECONDS)
end
counter
results
end
def merge_results(results, object)
return object unless results
if object.is_a?(Hash)
results.merge!(object) { |_, a, b| a + b }
else
results + object
end
end
def batch_fetch(start, finish, mode)
......@@ -118,11 +133,11 @@ module Gitlab
end
def actual_start(start)
start || @relation.minimum(@column) || 0
start || @relation.unscope(:group).minimum(@column) || 0
end
def actual_finish(finish)
finish || @relation.maximum(@column) || 0
finish || @relation.unscope(:group).maximum(@column) || 0
end
def check_mode!(mode)
......
......@@ -108,6 +108,24 @@ RSpec.describe Gitlab::Database::BatchCount do
expect { described_class.batch_count(model.distinct(column)) }.to raise_error 'Use distinct count for optimized distinct counting'
end
end
context 'when a relation is grouped' do
let!(:one_more_issue) { create(:issue, author: user, project: model.first.project) }
before do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 1)
end
context 'count by default column' do
let(:count) do
described_class.batch_count(model.group(column), batch_size: 2)
end
it 'counts grouped records' do
expect(count).to eq({ user.id => 4, another_user.id => 2 })
end
end
end
end
describe '#batch_distinct_count' do
......@@ -175,6 +193,24 @@ RSpec.describe Gitlab::Database::BatchCount do
end.to raise_error 'Use distinct count only with non id fields'
end
end
context 'when a relation is grouped' do
let!(:one_more_issue) { create(:issue, author: user, project: model.first.project) }
before do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 1)
end
context 'distinct count by non-unique column' do
let(:count) do
described_class.batch_distinct_count(model.group(column), :project_id, batch_size: 2)
end
it 'counts grouped records' do
expect(count).to eq({ user.id => 3, another_user.id => 2 })
end
end
end
end
describe '#batch_sum' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment