Commit a6b7f91b authored by Adam Hegyi's avatar Adam Hegyi

Merge branch '211802-optimize-service_desk_enabled_projects-counter-in-usage_data' into 'master'

Optimize service_desk_enabled_projects counter in usage_data

Closes #211802

See merge request gitlab-org/gitlab!27589
parents 7ee61f74 1956550b
---
title: Optimize service desk enabled projects counter
merge_request: 27589
author:
type: performance
# frozen_string_literal: true
class AddIndexOnIdCreatorIdAndCreatedAtToProjectsTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_service_desk_enabled_projects_on_id_creator_id_created_at'
disable_ddl_transaction!
def up
add_concurrent_index :projects, [:id, :creator_id, :created_at], where: '"projects"."service_desk_enabled" = TRUE', name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :projects, INDEX_NAME
end
end
......@@ -9925,6 +9925,8 @@ CREATE INDEX index_serverless_domain_cluster_on_creator_id ON public.serverless_
CREATE INDEX index_serverless_domain_cluster_on_pages_domain_id ON public.serverless_domain_cluster USING btree (pages_domain_id);
CREATE INDEX index_service_desk_enabled_projects_on_id_creator_id_created_at ON public.projects USING btree (id, creator_id, created_at) WHERE (service_desk_enabled = true);
CREATE INDEX index_services_on_project_id_and_type ON public.services USING btree (project_id, type);
CREATE INDEX index_services_on_template ON public.services USING btree (template);
......@@ -12847,6 +12849,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200323080714
20200323122201
20200323134519
20200324093258
20200324115359
20200325152327
20200325160952
......
......@@ -281,7 +281,7 @@ module EE
projects_jira_active: distinct_count(::Project.with_active_jira_services.where(time_period), :creator_id),
projects_jira_dvcs_cloud_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_cloud.where(time_period), :creator_id),
projects_jira_dvcs_server_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_server.where(time_period), :creator_id),
service_desk_enabled_projects: distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), :creator_id),
service_desk_enabled_projects: distinct_count_service_desk_enabled_projects(time_period),
service_desk_issues: count(::Issue.service_desk.where(time_period)),
todos: distinct_count(::Todo.where(time_period), :author_id)
}
......@@ -335,6 +335,16 @@ module EE
results
end
private
def distinct_count_service_desk_enabled_projects(time_period)
project_creator_id_start = ::User.minimum(:id)
project_creator_id_finish = ::User.maximum(:id)
distinct_count(::Project.service_desk_enabled.where(time_period), :creator_id, start: project_creator_id_start, finish: project_creator_id_finish)
end
# rubocop:enable CodeReuse/ActiveRecord
end
end
......
......@@ -4,12 +4,18 @@
# Implements a distinct and ordinary batch counter
# Needs indexes on the column below to calculate max, min and range queries
# For larger tables just set use higher batch_size with index optimization
#
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
#
# Examples:
# extend ::Gitlab::Database::BatchCount
# batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
module Gitlab
module Database
module BatchCount
......@@ -17,8 +23,8 @@ module Gitlab
BatchCounter.new(relation, column: column).count(batch_size: batch_size)
end
def batch_distinct_count(relation, column = nil, batch_size: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size)
def batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
end
class << self
......@@ -31,9 +37,10 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
# Each query should take <<500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000
DEFAULT_BATCH_SIZE = 100_000
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 100_000
DEFAULT_BATCH_SIZE = 10_000
def initialize(relation, column: nil)
@relation = relation
......@@ -46,15 +53,15 @@ module Gitlab
start > finish
end
def count(batch_size: nil, mode: :itself)
def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
# non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
start = @relation.minimum(@column) || 0
finish = @relation.maximum(@column) || 0
start = actual_start(start)
finish = actual_finish(finish)
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
......@@ -84,6 +91,16 @@ module Gitlab
# rubocop:disable GitlabSecurity/PublicSend
@relation.select(@column).public_send(mode).where(@column => start..(finish - 1)).count
end
private
def actual_start(start)
start || @relation.minimum(@column) || 0
end
def actual_finish(finish)
finish || @relation.maximum(@column) || 0
end
end
end
end
......@@ -240,9 +240,9 @@ module Gitlab
fallback
end
def distinct_count(relation, column = nil, fallback: -1, batch: true)
def distinct_count(relation, column = nil, fallback: -1, batch: true, start: nil, finish: nil)
if batch && Feature.enabled?(:usage_ping_batch_counter, default_enabled: true)
Gitlab::Database::BatchCount.batch_distinct_count(relation, column)
Gitlab::Database::BatchCount.batch_distinct_count(relation, column, start: start, finish: finish)
else
relation.distinct_count_by(column)
end
......
......@@ -90,5 +90,13 @@ describe Gitlab::Database::BatchCount do
[1, 2, 4, 5, 6].each { |i| expect(described_class.batch_distinct_count(model, column, batch_size: i)).to eq(2) }
end
it 'counts with a start and finish' do
expect(described_class.batch_distinct_count(model, column, start: model.minimum(column), finish: model.maximum(column))).to eq(2)
end
it 'counts with User min and max as start and finish' do
expect(described_class.batch_distinct_count(model, column, start: User.minimum(:id), finish: User.maximum(:id))).to eq(2)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment