Commit 786422e6 authored by Alper Akgun's avatar Alper Akgun

Merge branch 'bypass-min-max-in-ee' into 'master'

Override min/max for usage data queries in EE module

See merge request gitlab-org/gitlab!59365
parents f265f394 1f443ede
...@@ -137,7 +137,7 @@ module EE ...@@ -137,7 +137,7 @@ module EE
end end
def count_approval_rules_with_users(relation) def count_approval_rules_with_users(relation)
count(relation, batch_size: 10_000, start: ApprovalProjectRule.regular.minimum(:id), finish: ApprovalProjectRule.regular.maximum(:id)).size count(relation, batch_size: 10_000, start: minimum_id(ApprovalProjectRule.regular), finish: maximum_id(ApprovalProjectRule.regular)).size
end end
# rubocop:enable CodeReuse/ActiveRecord, UsageData/LargeTable # rubocop:enable CodeReuse/ActiveRecord, UsageData/LargeTable
...@@ -196,15 +196,15 @@ module EE ...@@ -196,15 +196,15 @@ module EE
epic_issues: count(::EpicIssue), epic_issues: count(::EpicIssue),
feature_flags: count(Operations::FeatureFlag), feature_flags: count(Operations::FeatureFlag),
geo_nodes: count(::GeoNode), geo_nodes: count(::GeoNode),
geo_event_log_max_id: alt_usage_data { Geo::EventLog.maximum(:id) || 0 }, geo_event_log_max_id: alt_usage_data { maximum_id(Geo::EventLog) || 0 },
ldap_group_links: count(::LdapGroupLink), ldap_group_links: count(::LdapGroupLink),
issues_with_health_status: count(::Issue.with_health_status, start: minimum_id(::Issue), finish: maximum_id(::Issue)), issues_with_health_status: count(::Issue.with_health_status, start: minimum_id(::Issue), finish: maximum_id(::Issue)),
ldap_keys: count(::LDAPKey), ldap_keys: count(::LDAPKey),
ldap_users: count(::User.ldap, 'users.id'), ldap_users: count(::User.ldap, 'users.id'),
pod_logs_usages_total: redis_usage_data { ::Gitlab::UsageCounters::PodLogs.usage_totals[:total] }, pod_logs_usages_total: redis_usage_data { ::Gitlab::UsageCounters::PodLogs.usage_totals[:total] },
merged_merge_requests_using_approval_rules: count(::MergeRequest.merged.joins(:approval_rules), # rubocop: disable CodeReuse/ActiveRecord merged_merge_requests_using_approval_rules: count(::MergeRequest.merged.joins(:approval_rules), # rubocop: disable CodeReuse/ActiveRecord
start: merge_request_minimum_id, start: minimum_id(::MergeRequest),
finish: merge_request_maximum_id), finish: maximum_id(::MergeRequest)),
projects_mirrored_with_pipelines_enabled: count(::Project.mirrored_with_enabled_pipelines), projects_mirrored_with_pipelines_enabled: count(::Project.mirrored_with_enabled_pipelines),
projects_reporting_ci_cd_back_to_github: count(::GithubService.active), projects_reporting_ci_cd_back_to_github: count(::GithubService.active),
status_page_projects: count(::StatusPage::ProjectSetting.enabled), status_page_projects: count(::StatusPage::ProjectSetting.enabled),
...@@ -226,12 +226,6 @@ module EE ...@@ -226,12 +226,6 @@ module EE
) )
end end
def epics_deepest_relationship_level
# rubocop: disable UsageData/LargeTable
{ epics_deepest_relationship_level: ::Epic.deepest_relationship_level.to_i }
# rubocop: enable UsageData/LargeTable
end
# Omitted because no user, creator or author associated: `auto_devops_disabled`, `auto_devops_enabled` # Omitted because no user, creator or author associated: `auto_devops_disabled`, `auto_devops_enabled`
# Omitted because not in use anymore: `gcp_clusters`, `gcp_clusters_disabled`, `gcp_clusters_enabled` # Omitted because not in use anymore: `gcp_clusters`, `gcp_clusters_disabled`, `gcp_clusters_enabled`
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
...@@ -252,8 +246,8 @@ module EE ...@@ -252,8 +246,8 @@ module EE
projects_with_sectional_code_owner_rules: projects_with_sectional_code_owner_rules(time_period), projects_with_sectional_code_owner_rules: projects_with_sectional_code_owner_rules(time_period),
merge_requests_with_added_rules: distinct_count(::ApprovalMergeRequestRule.where(time_period).with_added_approval_rules, merge_requests_with_added_rules: distinct_count(::ApprovalMergeRequestRule.where(time_period).with_added_approval_rules,
:merge_request_id, :merge_request_id,
start: approval_merge_request_rule_minimum_id, start: minimum_id(::ApprovalMergeRequestRule, :merge_request_id),
finish: approval_merge_request_rule_maximum_id), finish: maximum_id(::ApprovalMergeRequestRule, :merge_request_id)),
merge_requests_with_optional_codeowners: distinct_count(::ApprovalMergeRequestRule.code_owner_approval_optional.where(time_period), :merge_request_id), merge_requests_with_optional_codeowners: distinct_count(::ApprovalMergeRequestRule.code_owner_approval_optional.where(time_period), :merge_request_id),
merge_requests_with_overridden_project_rules: merge_requests_with_overridden_project_rules(time_period), merge_requests_with_overridden_project_rules: merge_requests_with_overridden_project_rules(time_period),
merge_requests_with_required_codeowners: distinct_count(::ApprovalMergeRequestRule.code_owner_approval_required.where(time_period), :merge_request_id), merge_requests_with_required_codeowners: distinct_count(::ApprovalMergeRequestRule.code_owner_approval_required.where(time_period), :merge_request_id),
...@@ -426,8 +420,8 @@ module EE ...@@ -426,8 +420,8 @@ module EE
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
# rubocop: disable UsageData/LargeTable # rubocop: disable UsageData/LargeTable
def count_secure_scans(time_period) def count_secure_scans(time_period)
start = ::Security::Scan.minimum(:build_id) start = minimum_id(::Security::Scan, :build_id)
finish = ::Security::Scan.maximum(:build_id) finish = maximum_id(::Security::Scan, :build_id)
{}.tap do |secure_jobs| {}.tap do |secure_jobs|
::Security::Scan.scan_types.each do |name, scan_type| ::Security::Scan.scan_types.each do |name, scan_type|
...@@ -479,8 +473,8 @@ module EE ...@@ -479,8 +473,8 @@ module EE
end end
end end
else else
start = ::Ci::Pipeline.minimum(:id) start = minimum_id(::Ci::Pipeline)
finish = ::Ci::Pipeline.maximum(:id) finish = maximum_id(::Ci::Pipeline)
::Security::Scan.scan_types.each do |name, scan_type| ::Security::Scan.scan_types.each do |name, scan_type|
relation = ::Ci::Build.joins(:security_scans) relation = ::Ci::Build.joins(:security_scans)
...@@ -527,30 +521,6 @@ module EE ...@@ -527,30 +521,6 @@ module EE
Arel::Nodes::NamedFunction.new('DATE', [locked_timezone]) Arel::Nodes::NamedFunction.new('DATE', [locked_timezone])
end end
def approval_merge_request_rule_minimum_id
strong_memoize(:approval_merge_request_rule_minimum_id) do
::ApprovalMergeRequestRule.minimum(:merge_request_id)
end
end
def approval_merge_request_rule_maximum_id
strong_memoize(:approval_merge_request_rule_maximum_id) do
::ApprovalMergeRequestRule.maximum(:merge_request_id)
end
end
def merge_request_minimum_id
strong_memoize(:merge_request_minimum_id) do
::MergeRequest.minimum(:id)
end
end
def merge_request_maximum_id
strong_memoize(:merge_request_maximum_id) do
::MergeRequest.maximum(:id)
end
end
def ldap_config_present_for_any_provider?(configuration_item) def ldap_config_present_for_any_provider?(configuration_item)
ldap_available_servers.any? { |server_config| server_config[configuration_item.to_s] } ldap_available_servers.any? { |server_config| server_config[configuration_item.to_s] }
end end
...@@ -590,15 +560,15 @@ module EE ...@@ -590,15 +560,15 @@ module EE
distinct_count( distinct_count(
::ApprovalMergeRequestRule.where(time_period).where(sql), ::ApprovalMergeRequestRule.where(time_period).where(sql),
:merge_request_id, :merge_request_id,
start: approval_merge_request_rule_minimum_id, start: minimum_id(::ApprovalMergeRequestRule, :merge_request_id),
finish: approval_merge_request_rule_maximum_id finish: maximum_id(::ApprovalMergeRequestRule, :merge_request_id)
) )
end end
def projects_jira_issuelist_active def projects_jira_issuelist_active
# rubocop: disable UsageData/LargeTable: # rubocop: disable UsageData/LargeTable:
min_id = JiraTrackerData.where(issues_enabled: true).minimum(:service_id) min_id = minimum_id(JiraTrackerData.where(issues_enabled: true), :service_id)
max_id = JiraTrackerData.where(issues_enabled: true).maximum(:service_id) max_id = maximum_id(JiraTrackerData.where(issues_enabled: true), :service_id)
# rubocop: enable UsageData/LargeTable: # rubocop: enable UsageData/LargeTable:
count(::JiraService.active.includes(:jira_tracker_data).where(jira_tracker_data: { issues_enabled: true }), start: min_id, finish: max_id) count(::JiraService.active.includes(:jira_tracker_data).where(jira_tracker_data: { issues_enabled: true }), start: min_id, finish: max_id)
end end
......
...@@ -15,7 +15,7 @@ RSpec.describe Gitlab::UsageDataNonSqlMetrics do ...@@ -15,7 +15,7 @@ RSpec.describe Gitlab::UsageDataNonSqlMetrics do
described_class.uncached_data described_class.uncached_data
end end
expect(recorder.count).to eq(74) expect(recorder.count).to eq(50)
end end
end end
end end
...@@ -436,18 +436,10 @@ module Gitlab ...@@ -436,18 +436,10 @@ module Gitlab
projects_jira_dvcs_server_active: count(ProjectFeatureUsage.with_jira_dvcs_integration_enabled(cloud: false)) projects_jira_dvcs_server_active: count(ProjectFeatureUsage.with_jira_dvcs_integration_enabled(cloud: false))
} }
# rubocop: disable UsageData/LargeTable: jira_service_data_hash = jira_service_data
JiraService.active.includes(:jira_tracker_data).find_in_batches(batch_size: 100) do |services| results[:projects_jira_server_active] = jira_service_data_hash[:projects_jira_server_active]
counts = services.group_by do |service| results[:projects_jira_cloud_active] = jira_service_data_hash[:projects_jira_cloud_active]
# TODO: Simplify as part of https://gitlab.com/gitlab-org/gitlab/issues/29404
service_url = service.data_fields&.url || (service.properties && service.properties['url'])
service_url&.include?('.atlassian.net') ? :cloud : :server
end
results[:projects_jira_server_active] += counts[:server].size if counts[:server]
results[:projects_jira_cloud_active] += counts[:cloud].size if counts[:cloud]
end
# rubocop: enable UsageData/LargeTable:
results results
rescue ActiveRecord::StatementInvalid rescue ActiveRecord::StatementInvalid
{ projects_jira_server_active: FALLBACK, projects_jira_cloud_active: FALLBACK } { projects_jira_server_active: FALLBACK, projects_jira_cloud_active: FALLBACK }
......
...@@ -25,10 +25,17 @@ module Gitlab ...@@ -25,10 +25,17 @@ module Gitlab
SQL_METRIC_DEFAULT SQL_METRIC_DEFAULT
end end
def maximum_id(model) def maximum_id(model, column = nil)
end end
def minimum_id(model) def minimum_id(model, column = nil)
end
def jira_service_data
{
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
end end
end end
end end
......
...@@ -25,6 +25,27 @@ module Gitlab ...@@ -25,6 +25,27 @@ module Gitlab
relation.select(relation.all.table[column].sum).to_sql relation.select(relation.all.table[column].sum).to_sql
end end
# rubocop: disable CodeReuse/ActiveRecord
def histogram(relation, column, buckets:, bucket_size: buckets.size)
count_grouped = relation.group(column).select(Arel.star.count.as('count_grouped'))
cte = Gitlab::SQL::CTE.new(:count_cte, count_grouped)
bucket_segments = bucket_size - 1
width_bucket = Arel::Nodes::NamedFunction
.new('WIDTH_BUCKET', [cte.table[:count_grouped], buckets.first, buckets.last, bucket_segments])
.as('buckets')
query = cte
.table
.project(width_bucket, cte.table[:count])
.group('buckets')
.order('buckets')
.with(cte.to_arel)
query.to_sql
end
# rubocop: enable CodeReuse/ActiveRecord
# For estimated distinct count use exact query instead of hll # For estimated distinct count use exact query instead of hll
# buckets query, because it can't be used to obtain estimations without # buckets query, because it can't be used to obtain estimations without
# supplementary ruby code present in Gitlab::Database::PostgresHll::BatchDistinctCounter # supplementary ruby code present in Gitlab::Database::PostgresHll::BatchDistinctCounter
...@@ -36,10 +57,21 @@ module Gitlab ...@@ -36,10 +57,21 @@ module Gitlab
'SELECT ' + args.map {|arg| "(#{arg})" }.join(' + ') 'SELECT ' + args.map {|arg| "(#{arg})" }.join(' + ')
end end
def maximum_id(model) def maximum_id(model, column = nil)
end
def minimum_id(model, column = nil)
end
def jira_service_data
{
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
end end
def minimum_id(model) def epics_deepest_relationship_level
{ epics_deepest_relationship_level: 0 }
end end
private private
......
...@@ -210,20 +210,54 @@ module Gitlab ...@@ -210,20 +210,54 @@ module Gitlab
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values) Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
end end
def maximum_id(model) def maximum_id(model, column = nil)
key = :"#{model.name.downcase}_maximum_id" key = :"#{model.name.downcase.gsub('::', '_')}_maximum_id"
column_to_read = column || :id
strong_memoize(key) do strong_memoize(key) do
model.maximum(:id) model.maximum(column_to_read)
end end
end end
def minimum_id(model) # rubocop: disable UsageData/LargeTable:
key = :"#{model.name.downcase}_minimum_id" def jira_service_data
data = {
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
# rubocop: disable CodeReuse/ActiveRecord
JiraService.active.includes(:jira_tracker_data).find_in_batches(batch_size: 100) do |services|
counts = services.group_by do |service|
# TODO: Simplify as part of https://gitlab.com/gitlab-org/gitlab/issues/29404
service_url = service.data_fields&.url || (service.properties && service.properties['url'])
service_url&.include?('.atlassian.net') ? :cloud : :server
end
data[:projects_jira_server_active] += counts[:server].size if counts[:server]
data[:projects_jira_cloud_active] += counts[:cloud].size if counts[:cloud]
end
data
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: enable UsageData/LargeTable:
def minimum_id(model, column = nil)
key = :"#{model.name.downcase.gsub('::', '_')}_minimum_id"
column_to_read = column || :id
strong_memoize(key) do strong_memoize(key) do
model.minimum(:id) model.minimum(column_to_read)
end end
end end
def epics_deepest_relationship_level
# rubocop: disable UsageData/LargeTable
{ epics_deepest_relationship_level: ::Epic.deepest_relationship_level.to_i }
# rubocop: enable UsageData/LargeTable
end
private private
def prometheus_client(verify:) def prometheus_client(verify:)
......
...@@ -59,6 +59,14 @@ RSpec.describe Gitlab::UsageDataQueries do ...@@ -59,6 +59,14 @@ RSpec.describe Gitlab::UsageDataQueries do
end end
end end
describe '.histogram' do
it 'returns the histogram sql' do
expect(described_class.histogram(AlertManagement::HttpIntegration.active,
:project_id, buckets: 1..2, bucket_size: 101))
.to eq('WITH "count_cte" AS (SELECT COUNT(*) AS count_grouped FROM "alert_management_http_integrations" WHERE "alert_management_http_integrations"."active" = TRUE GROUP BY "alert_management_http_integrations"."project_id") SELECT WIDTH_BUCKET("count_cte"."count_grouped", 1, 2, 100) AS buckets, "count_cte"."count" FROM "count_cte" GROUP BY buckets ORDER BY buckets')
end
end
describe 'min/max methods' do describe 'min/max methods' do
it 'returns nil' do it 'returns nil' do
# user min/max # user min/max
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment