Commit 7d68d72f authored by Alper Akgun's avatar Alper Akgun

Fix usage ping timeouts with batch counters

Usage pings time out on large instances with large tables. This change
adds batch counters to fix the issue
parent 2ab97d54
---
title: Fix usage ping timeouts with batch counters
merge_request: 22705
author:
type: performance
This diff is collapsed.
......@@ -17,6 +17,7 @@ describe Admin::InstanceReviewController do
context 'with usage ping enabled' do
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
stub_application_setting(usage_ping_enabled: true)
::Gitlab::UsageData.data(force_refresh: true)
subject
......
This diff is collapsed.
......@@ -3,6 +3,10 @@
require 'spec_helper'
describe Gitlab::UsageData do
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
end
describe '#data' do
# using Array.new to create a different creator User for each of the projects
let_it_be(:projects) { Array.new(3) { create(:project, :repository, creator: create(:user, group_view: :security_dashboard)) } }
......
# frozen_string_literal: true
# For large tables, PostgreSQL can take a long time to count rows due to MVCC.
# Implements a distinct and ordinary batch counter
# Needs indexes on the column below to calculate max, min and range queries
# For larger tables just set use higher batch_size with index optimization
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
# Examples:
# extend ::Gitlab::Database::BatchCount
# batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_distinct_count(::Project, :creator_id)
module Gitlab
module Database
module BatchCount
def batch_count(relation, column = nil, batch_size: nil)
BatchCounter.new(relation, column: column).count(batch_size: batch_size)
end
def batch_distinct_count(relation, column = nil, batch_size: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size)
end
class << self
include BatchCount
end
end
class BatchCounter
FALLBACK = -1
MIN_REQUIRED_BATCH_SIZE = 2_000
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
# Each query should take <<500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 100_000
DEFAULT_BATCH_SIZE = 10_000
def initialize(relation, column: nil)
@relation = relation
@column = column || relation.primary_key
end
def unwanted_configuration?(finish, batch_size, start)
batch_size <= MIN_REQUIRED_BATCH_SIZE ||
(finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
start > finish
end
def count(batch_size: nil, mode: :itself)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
# non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_BATCH_SIZE : DEFAULT_DISTINCT_BATCH_SIZE
start = @relation.minimum(@column) || 0
finish = @relation.maximum(@column) || 0
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
counter = 0
batch_start = start
while batch_start <= finish
begin
counter += batch_fetch(batch_start, batch_start + batch_size, mode)
batch_start += batch_size
rescue ActiveRecord::QueryCanceled
# retry with a safe batch size & warmer cache
if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
batch_size /= 2
else
return FALLBACK
end
end
sleep(SLEEP_TIME_IN_SECONDS)
end
counter
end
def batch_fetch(start, finish, mode)
# rubocop:disable GitlabSecurity/PublicSend
@relation.select(@column).public_send(mode).where(@column => start..(finish - 1)).count
end
end
end
end
......@@ -67,8 +67,8 @@ module Gitlab
clusters_disabled: count(::Clusters::Cluster.disabled),
project_clusters_disabled: count(::Clusters::Cluster.disabled.project_type),
group_clusters_disabled: count(::Clusters::Cluster.disabled.group_type),
clusters_platforms_eks: count(::Clusters::Cluster.aws_installed.enabled),
clusters_platforms_gke: count(::Clusters::Cluster.gcp_installed.enabled),
clusters_platforms_eks: count(::Clusters::Cluster.aws_installed.enabled, batch: false),
clusters_platforms_gke: count(::Clusters::Cluster.gcp_installed.enabled, batch: false),
clusters_platforms_user: count(::Clusters::Cluster.user_provided.enabled),
clusters_applications_helm: count(::Clusters::Applications::Helm.available),
clusters_applications_ingress: count(::Clusters::Applications::Ingress.available),
......@@ -85,7 +85,7 @@ module Gitlab
issues: count(Issue),
issues_created_from_gitlab_error_tracking_ui: count(SentryIssue),
issues_with_associated_zoom_link: count(ZoomMeeting.added_to_issue),
issues_using_zoom_quick_actions: count(ZoomMeeting.select(:issue_id).distinct),
issues_using_zoom_quick_actions: count(ZoomMeeting.select(:issue_id).distinct, batch: false),
issues_with_embedded_grafana_charts_approx: ::Gitlab::GrafanaEmbedUsageData.issue_count,
incident_issues: count(::Issue.authored(::User.alert_bot)),
keys: count(Key),
......@@ -99,7 +99,7 @@ module Gitlab
projects_imported_from_github: count(Project.where(import_type: 'github')),
projects_with_repositories_enabled: count(ProjectFeature.where('repository_access_level > ?', ProjectFeature::DISABLED)),
projects_with_error_tracking_enabled: count(::ErrorTracking::ProjectErrorTrackingSetting.where(enabled: true)),
projects_with_alerts_service_enabled: count(AlertsService.active),
projects_with_alerts_service_enabled: count(AlertsService.active, batch: false),
protected_branches: count(ProtectedBranch),
releases: count(Release),
remote_mirrors: count(RemoteMirror),
......@@ -181,7 +181,7 @@ module Gitlab
# rubocop: disable CodeReuse/ActiveRecord
def services_usage
service_counts = count(Service.active.where(template: false).where.not(type: 'JiraService').group(:type), fallback: Hash.new(-1))
service_counts = count(Service.active.where(template: false).where.not(type: 'JiraService').group(:type), fallback: Hash.new(-1), batch: false)
results = Service.available_services_names.each_with_object({}) do |service_name, response|
response["projects_#{service_name}_active".to_sym] = service_counts["#{service_name}_service".camelize] || 0
......@@ -217,9 +217,9 @@ module Gitlab
results[:projects_jira_server_active] += counts[:server].count if counts[:server]
results[:projects_jira_cloud_active] += counts[:cloud].count if counts[:cloud]
if results[:projects_jira_active] == -1
results[:projects_jira_active] = count(services)
results[:projects_jira_active] = count(services, batch: false)
else
results[:projects_jira_active] += count(services)
results[:projects_jira_active] += count(services, batch: false)
end
end
......@@ -231,8 +231,22 @@ module Gitlab
{} # augmented in EE
end
def count(relation, fallback: -1)
relation.count
def count(relation, column = nil, fallback: -1, batch: true)
if batch && Feature.enabled?(:usage_ping_batch_counter)
Gitlab::Database::BatchCount.batch_count(relation, column)
else
relation.count
end
rescue ActiveRecord::StatementInvalid
fallback
end
def distinct_count(relation, column = nil, fallback: -1, batch: true)
if batch && Feature.enabled?(:usage_ping_batch_counter)
Gitlab::Database::BatchCount.batch_distinct_count(relation, column)
else
relation.distinct_count_by(column)
end
rescue ActiveRecord::StatementInvalid
fallback
end
......
......@@ -16,6 +16,7 @@ describe Admin::ApplicationSettingsController do
describe 'GET #usage_data with no access' do
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
sign_in(user)
end
......@@ -28,6 +29,7 @@ describe Admin::ApplicationSettingsController do
describe 'GET #usage_data' do
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
sign_in(admin)
end
......
......@@ -326,6 +326,8 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc
end
it 'loads usage ping payload on click', :js do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
expect(page).to have_button 'Preview payload'
find('.js-usage-ping-payload-trigger').click
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Database::BatchCount do
let(:model) { Issue }
let(:column) { :author_id }
let(:in_transaction) { false }
let(:user) { create(:user) }
let(:another_user) { create(:user) }
before do
create_list(:issue, 3, author: user )
create_list(:issue, 2, author: another_user )
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(in_transaction)
end
describe '#batch_count' do
it 'counts table' do
expect(described_class.batch_count(model)).to eq(5)
end
it 'counts with :id field' do
expect(described_class.batch_count(model, :id)).to eq(5)
end
it 'counts with "id" field' do
expect(described_class.batch_count(model, 'id')).to eq(5)
end
it 'counts with table.id field' do
expect(described_class.batch_count(model, "#{model.table_name}.id")).to eq(5)
end
it 'counts table with batch_size 50K' do
expect(described_class.batch_count(model, batch_size: 50_000)).to eq(5)
end
it 'will not count table with batch_size 1K' do
fallback = ::Gitlab::Database::BatchCounter::FALLBACK
expect(described_class.batch_count(model, batch_size: fallback / 2)).to eq(fallback)
end
it 'counts with a small edge case batch_sizes than result' do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0)
[1, 2, 4, 5, 6].each { |i| expect(described_class.batch_count(model, batch_size: i)).to eq(5) }
end
context 'in a transaction' do
let(:in_transaction) { true }
it 'cannot count' do
expect do
described_class.batch_count(model)
end.to raise_error 'BatchCount can not be run inside a transaction'
end
end
end
describe '#batch_distinct_count' do
it 'counts with :id field' do
expect(described_class.batch_distinct_count(model, :id)).to eq(5)
end
it 'counts with column field' do
expect(described_class.batch_distinct_count(model, column)).to eq(2)
end
it 'counts with "id" field' do
expect(described_class.batch_distinct_count(model, "#{column}")).to eq(2)
end
it 'counts with table.column field' do
expect(described_class.batch_distinct_count(model, "#{model.table_name}.#{column}")).to eq(2)
end
it 'counts with :column field with batch_size of 50K' do
expect(described_class.batch_distinct_count(model, column, batch_size: 50_000)).to eq(2)
end
it 'will not count table with batch_size 1K' do
fallback = ::Gitlab::Database::BatchCounter::FALLBACK
expect(described_class.batch_distinct_count(model, column, batch_size: fallback / 2)).to eq(fallback)
end
it 'counts with a small edge case batch_sizes than result' do
stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0)
[1, 2, 4, 5, 6].each { |i| expect(described_class.batch_distinct_count(model, column, batch_size: i)).to eq(2) }
end
end
end
This diff is collapsed.
......@@ -76,6 +76,7 @@ describe SubmitUsagePingService do
context 'when usage ping is enabled' do
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
stub_application_setting(usage_ping_enabled: true)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment