Commit ba5277e9 authored by Kamil Trzciński's avatar Kamil Trzciński

Extend `gitlab_schema` to understand partitioned and deleted tables

We seem to analyze data sources in a number of different formats
that are outside of currently visible tables (as defined by `gitlab_schema.yml`).

These tables include:
- Rails accessing `pg_*` catalog tables
- Rails accessing `information_schema.*` catalog tables
- GitLab Rails accessing and deleting data from partitioned tables
- GitLab Rails in specs accessing deleted tables as they are referenced by some migrations
parent f15b64dd
...@@ -8,17 +8,84 @@ ...@@ -8,17 +8,84 @@
# - gitlab_shared - defines a set of tables that are found on all databases (data accessed is dependent on connection) # - gitlab_shared - defines a set of tables that are found on all databases (data accessed is dependent on connection)
# - gitlab_main / gitlab_ci - defines a set of tables that can only exist on a given database # - gitlab_main / gitlab_ci - defines a set of tables that can only exist on a given database
# #
# Tables for the purpose of tests should be prefixed with `_test_my_table_name`
module Gitlab module Gitlab
module Database module Database
module GitlabSchema module GitlabSchema
# These tables are deleted/renamed, but still referenced by migrations.
# This is needed for now, but should be removed in the future
DELETED_TABLES = {
# main tables
'alerts_service_data' => :gitlab_main,
'analytics_devops_adoption_segment_selections' => :gitlab_main,
'analytics_repository_file_commits' => :gitlab_main,
'analytics_repository_file_edits' => :gitlab_main,
'analytics_repository_files' => :gitlab_main,
'audit_events_archived' => :gitlab_main,
'backup_labels' => :gitlab_main,
'clusters_applications_fluentd' => :gitlab_main,
'forked_project_links' => :gitlab_main,
'issue_milestones' => :gitlab_main,
'merge_request_milestones' => :gitlab_main,
'namespace_onboarding_actions' => :gitlab_main,
'services' => :gitlab_main,
'terraform_state_registry' => :gitlab_main,
'tmp_fingerprint_sha256_migration' => :gitlab_main, # used by lib/gitlab/background_migration/migrate_fingerprint_sha256_within_keys.rb
'web_hook_logs_archived' => :gitlab_main,
'vulnerability_export_registry' => :gitlab_main,
'vulnerability_finding_fingerprints' => :gitlab_main,
'vulnerability_export_verification_status' => :gitlab_main,
# CI tables
'ci_build_trace_sections' => :gitlab_ci,
'ci_build_trace_section_names' => :gitlab_ci,
'ci_daily_report_results' => :gitlab_ci,
'ci_test_cases' => :gitlab_ci,
'ci_test_case_failures' => :gitlab_ci,
# leftovers from early implementation of partitioning
'audit_events_part_5fc467ac26' => :gitlab_main,
'web_hook_logs_part_0c5294f417' => :gitlab_main
}.freeze
def self.table_schemas(tables) def self.table_schemas(tables)
tables.map { |table| table_schema(table) }.to_set tables.map { |table| table_schema(table) }.to_set
end end
def self.table_schema(name) def self.table_schema(name)
schema_name, table_name = name.split('.', 2) # Strip schema name like: `public.`
# Most of names do not have schemas, ensure that this is table
unless table_name
table_name = schema_name
schema_name = nil
end
# strip partition number of a form `loose_foreign_keys_deleted_records_1`
table_name.gsub!(/_[0-9]+$/, '')
# Tables that are properly mapped
if gitlab_schema = tables_to_schema[table_name]
return gitlab_schema
end
# Tables that are deleted, but we still need to reference them
if gitlab_schema = DELETED_TABLES[table_name]
return gitlab_schema
end
# All tables from `information_schema.` are `:gitlab_shared`
return :gitlab_shared if schema_name == 'information_schema'
# All tables that start with `_test_` are shared and ignored
return :gitlab_shared if table_name.start_with?('_test_')
# All `pg_` tables are marked as `shared`
return :gitlab_shared if table_name.start_with?('pg_')
# When undefined it's best to return a unique name so that we don't incorrectly assume that 2 undefined schemas belong on the same database # When undefined it's best to return a unique name so that we don't incorrectly assume that 2 undefined schemas belong on the same database
tables_to_schema[name] || :"undefined_#{name}" :"undefined_#{table_name}"
end end
def self.tables_to_schema def self.tables_to_schema
......
...@@ -35,4 +35,24 @@ RSpec.describe Gitlab::Database::GitlabSchema do ...@@ -35,4 +35,24 @@ RSpec.describe Gitlab::Database::GitlabSchema do
end end
end end
end end
describe '.table_schema' do
using RSpec::Parameterized::TableSyntax
where(:name, :classification) do
'ci_builds' | :gitlab_ci
'my_schema.ci_builds' | :gitlab_ci
'information_schema.columns' | :gitlab_shared
'audit_events_part_5fc467ac26' | :gitlab_main
'_test_my_table' | :gitlab_shared
'pg_attribute' | :gitlab_shared
'my_other_table' | :undefined_my_other_table
end
with_them do
subject { described_class.table_schema(name) }
it { is_expected.to eq(classification) }
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment