Commit 47d58335 authored by Igor Drozdov's avatar Igor Drozdov Committed by Adam Hegyi

Adapt BatchCount for grouped records

If the relation is grouped, Rails returns Hash: { group-key => count }
https://apidock.com/rails/ActiveRecord/Calculations/count

Since batch_count expects Integer in order to sum the counts up,
we receive an error. We can sum up the Hash's size in this case.
parent 8a084bf2
...@@ -8,15 +8,20 @@ ...@@ -8,15 +8,20 @@
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count # In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically # the start and finish can be sent specifically
# #
# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive.
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
# #
# Examples: # Examples:
# extend ::Gitlab::Database::BatchCount # extend ::Gitlab::Database::BatchCount
# batch_count(User.active) # batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id) # batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_count(Namespace.group(:type))
# batch_distinct_count(::Project, :creator_id) # batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id)) # batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
# batch_distinct_count(Project.group(:visibility_level), :creator_id)
# batch_sum(User, :sign_in_count) # batch_sum(User, :sign_in_count)
# batch_sum(Issue.group(:state_id), :weight))
module Gitlab module Gitlab
module Database module Database
module BatchCount module BatchCount
...@@ -77,12 +82,12 @@ module Gitlab ...@@ -77,12 +82,12 @@ module Gitlab
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0 raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start) return FALLBACK if unwanted_configuration?(finish, batch_size, start)
counter = 0 results = nil
batch_start = start batch_start = start
while batch_start <= finish while batch_start <= finish
begin begin
counter += batch_fetch(batch_start, batch_start + batch_size, mode) results = merge_results(results, batch_fetch(batch_start, batch_start + batch_size, mode))
batch_start += batch_size batch_start += batch_size
rescue ActiveRecord::QueryCanceled rescue ActiveRecord::QueryCanceled
# retry with a safe batch size & warmer cache # retry with a safe batch size & warmer cache
...@@ -95,7 +100,17 @@ module Gitlab ...@@ -95,7 +100,17 @@ module Gitlab
sleep(SLEEP_TIME_IN_SECONDS) sleep(SLEEP_TIME_IN_SECONDS)
end end
counter results
end
def merge_results(results, object)
return object unless results
if object.is_a?(Hash)
results.merge!(object) { |_, a, b| a + b }
else
results + object
end
end end
def batch_fetch(start, finish, mode) def batch_fetch(start, finish, mode)
...@@ -118,11 +133,11 @@ module Gitlab ...@@ -118,11 +133,11 @@ module Gitlab
end end
def actual_start(start) def actual_start(start)
start || @relation.minimum(@column) || 0 start || @relation.unscope(:group).minimum(@column) || 0
end end
def actual_finish(finish) def actual_finish(finish)
finish || @relation.maximum(@column) || 0 finish || @relation.unscope(:group).maximum(@column) || 0
end end
def check_mode!(mode) def check_mode!(mode)
......
...@@ -108,6 +108,24 @@ RSpec.describe Gitlab::Database::BatchCount do ...@@ -108,6 +108,24 @@ RSpec.describe Gitlab::Database::BatchCount do
expect { described_class.batch_count(model.distinct(column)) }.to raise_error 'Use distinct count for optimized distinct counting' expect { described_class.batch_count(model.distinct(column)) }.to raise_error 'Use distinct count for optimized distinct counting'
end end
end end
context 'when a relation is grouped' do
let!(:one_more_issue) { create(:issue, author: user, project: model.first.project) }
before do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 1)
end
context 'count by default column' do
let(:count) do
described_class.batch_count(model.group(column), batch_size: 2)
end
it 'counts grouped records' do
expect(count).to eq({ user.id => 4, another_user.id => 2 })
end
end
end
end end
describe '#batch_distinct_count' do describe '#batch_distinct_count' do
...@@ -175,6 +193,24 @@ RSpec.describe Gitlab::Database::BatchCount do ...@@ -175,6 +193,24 @@ RSpec.describe Gitlab::Database::BatchCount do
end.to raise_error 'Use distinct count only with non id fields' end.to raise_error 'Use distinct count only with non id fields'
end end
end end
context 'when a relation is grouped' do
let!(:one_more_issue) { create(:issue, author: user, project: model.first.project) }
before do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 1)
end
context 'distinct count by non-unique column' do
let(:count) do
described_class.batch_distinct_count(model.group(column), :project_id, batch_size: 2)
end
it 'counts grouped records' do
expect(count).to eq({ user.id => 3, another_user.id => 2 })
end
end
end
end end
describe '#batch_sum' do describe '#batch_sum' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment