Commit 6af41560 authored by Alexandru Croitor's avatar Alexandru Croitor

Add migration for backfilling project namespaces

Adding capabilities to backfill project namespace for each
project. Starting with ability to backfill project namespaces
for a single root namespace first.

Changelog: added
parent eb04b06a
......@@ -17,6 +17,9 @@ class Namespace < ApplicationRecord
include EachBatch
ignore_column :delayed_project_removal, remove_with: '14.1', remove_after: '2021-05-22'
# Temporary column used for back-filling project namespaces.
# Remove it once the back-filling of all project namespaces is done.
ignore_column :tmp_project_id, remove_with: '14.7', remove_after: '2022-01-22'
# Tells ActiveRecord not to store the full class name, in order to save some space
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69794
......
# frozen_string_literal: true
class AddTmpProjectIdColumnToNamespaces < Gitlab::Database::Migration[1.0]
enable_lock_retries!
def change
# this is a temporary column to be able to batch insert records into namespaces table and then be able to link these
# to projects table.
add_column :namespaces, :tmp_project_id, :integer # rubocop: disable Migration/AddColumnsToWideTables
end
end
# frozen_string_literal: true
class AddIndexToTmpProjectIdColumnOnNamespacesTable < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
INDEX_NAME = 'tmp_index_on_tmp_project_id_on_namespaces'
def up
add_concurrent_index :namespaces, :tmp_project_id, name: INDEX_NAME, unique: true
end
def down
remove_concurrent_index_by_name :namespaces, INDEX_NAME
end
end
# frozen_string_literal: true
class AddFkToTmpProjectIdColumnOnNamespacesTable < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
def up
add_concurrent_foreign_key :namespaces, :projects, column: :tmp_project_id
end
def down
remove_foreign_key :namespaces, column: :tmp_project_id
end
end
# frozen_string_literal: true
class AddIndexToGroupIdColumnOnWebhooksTable < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
INDEX_NAME = 'index_on_group_id_on_webhooks'
def up
add_concurrent_index :web_hooks, :group_id, name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :web_hooks, INDEX_NAME
end
end
1cadc3a932d5b62cfeafcd4090eddc37b44997dbbd0b34da1c7c87a5774bb683
\ No newline at end of file
9a62f0ec43ab295619d82494090c38539cb16408c8971bdde86bb8d02546f558
\ No newline at end of file
30e9632877d3ad33528be0f56962c0ab57f5eee3889183d9638cbaea903a3d82
\ No newline at end of file
14bb815cbdad2db56dafb7eaaff893de96116a1a9e8d6c5ed95f4bef9b9717fc
\ No newline at end of file
......@@ -16375,7 +16375,8 @@ CREATE TABLE namespaces (
push_rule_id bigint,
shared_runners_enabled boolean DEFAULT true NOT NULL,
allow_descendants_override_disabled_shared_runners boolean DEFAULT false NOT NULL,
traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL
traversal_ids integer[] DEFAULT '{}'::integer[] NOT NULL,
tmp_project_id integer
);
CREATE SEQUENCE namespaces_id_seq
......@@ -26591,6 +26592,8 @@ CREATE INDEX index_oauth_openid_requests_on_access_grant_id ON oauth_openid_requ
CREATE UNIQUE INDEX index_on_deploy_keys_id_and_type_and_public ON keys USING btree (id, type) WHERE (public = true);
CREATE INDEX index_on_group_id_on_webhooks ON web_hooks USING btree (group_id);
CREATE INDEX index_on_identities_lower_extern_uid_and_provider ON identities USING btree (lower((extern_uid)::text), provider);
CREATE UNIQUE INDEX index_on_instance_statistics_recorded_at_and_identifier ON analytics_usage_trends_measurements USING btree (identifier, recorded_at);
......@@ -27769,6 +27772,8 @@ CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_child_namespaces ON n
CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_root_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NULL) AND (traversal_ids = '{}'::integer[]));
CREATE UNIQUE INDEX tmp_index_on_tmp_project_id_on_namespaces ON namespaces USING btree (tmp_project_id);
CREATE INDEX tmp_index_on_vulnerabilities_non_dismissed ON vulnerabilities USING btree (id) WHERE (state <> 2);
CREATE UNIQUE INDEX uniq_pkgs_deb_grp_architectures_on_distribution_id_and_name ON packages_debian_group_architectures USING btree (distribution_id, name);
......@@ -29012,6 +29017,9 @@ ALTER TABLE ONLY application_settings
ALTER TABLE ONLY merge_requests
ADD CONSTRAINT fk_6a5165a692 FOREIGN KEY (milestone_id) REFERENCES milestones(id) ON DELETE SET NULL;
ALTER TABLE ONLY namespaces
ADD CONSTRAINT fk_6a77f66919 FOREIGN KEY (tmp_project_id) REFERENCES projects(id) ON DELETE CASCADE;
ALTER TABLE ONLY geo_event_log
ADD CONSTRAINT fk_6ada82d42a FOREIGN KEY (container_repository_updated_event_id) REFERENCES geo_container_repository_updated_events(id) ON DELETE CASCADE;
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module ProjectNamespaces
# Back-fill project namespaces for projects that do not yet have a namespace.
#
# TODO: remove this comment when an actuall backfill migration is added.
#
# This is first being added without an actual migration as we need to initially test
# if backfilling project namespaces affects performance in any significant way.
# rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces
BATCH_SIZE = 100
DELETE_BATCH_SIZE = 10
PROJECT_NAMESPACE_STI_NAME = 'Project'
IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models
def perform(start_id, end_id, namespace_id, migration_type = 'up')
load_project_ids(start_id, end_id, namespace_id)
case migration_type
when 'up'
backfill_project_namespaces(namespace_id)
mark_job_as_succeeded(start_id, end_id, namespace_id, 'up')
when 'down'
cleanup_backfilled_project_namespaces(namespace_id)
mark_job_as_succeeded(start_id, end_id, namespace_id, 'down')
else
raise "Unknown migration type"
end
end
private
attr_accessor :project_ids
def backfill_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids|
# We need to lock these project records for the period when we create project namespaces
# and link them to projects so that if a project is modified in the time between creating
# project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
# we do not get them out of sync.
#
# see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469
Project.transaction do
Project.where(id: project_ids).select(:id).lock!('FOR UPDATE')
batch_insert_namespaces(project_ids)
batch_update_projects(project_ids)
end
batch_update_project_namespaces_traversal_ids(project_ids)
end
end
def cleanup_backfilled_project_namespaces(namespace_id)
project_ids.each_slice(BATCH_SIZE) do |project_ids|
# IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
# from namespaces are deleted due to FK/triggers
nullify_project_namespaces_in_projects(project_ids)
delete_project_namespace_records(project_ids)
end
end
def batch_insert_namespaces(project_ids)
projects = IsolatedModels::Project.where(id: project_ids)
.select("projects.id, projects.name, projects.path, projects.namespace_id, projects.visibility_level, shared_runners_enabled, '#{PROJECT_NAMESPACE_STI_NAME}', now(), now()")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (tmp_project_id, name, path, parent_id, visibility_level, shared_runners_enabled, type, created_at, updated_at)
#{projects.to_sql}
ON CONFLICT DO NOTHING;
SQL
end
def batch_update_projects(project_ids)
projects = IsolatedModels::Project.where(id: project_ids)
.joins("INNER JOIN namespaces ON projects.id = namespaces.tmp_project_id")
.select("namespaces.id, namespaces.tmp_project_id")
ActiveRecord::Base.connection.execute <<~SQL
WITH cte(project_namespace_id, project_id) AS #{::Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
#{projects.to_sql}
)
UPDATE projects
SET project_namespace_id = cte.project_namespace_id
FROM cte
WHERE id = cte.project_id AND projects.project_namespace_id IS DISTINCT FROM cte.project_namespace_id
SQL
end
def batch_update_project_namespaces_traversal_ids(project_ids)
namespaces = Namespace.where(tmp_project_id: project_ids)
.joins("INNER JOIN namespaces n2 ON namespaces.parent_id = n2.id")
.select("namespaces.id as project_namespace_id, n2.traversal_ids")
ActiveRecord::Base.connection.execute <<~SQL
UPDATE namespaces
SET traversal_ids = array_append(project_namespaces.traversal_ids, project_namespaces.project_namespace_id)
FROM (#{namespaces.to_sql}) as project_namespaces(project_namespace_id, traversal_ids)
WHERE id = project_namespaces.project_namespace_id
SQL
end
def nullify_project_namespaces_in_projects(project_ids)
IsolatedModels::Project.where(id: project_ids).update_all(project_namespace_id: nil)
end
def delete_project_namespace_records(project_ids)
project_ids.each_slice(DELETE_BATCH_SIZE) do |p_ids|
IsolatedModels::Namespace.where(type: PROJECT_NAMESPACE_STI_NAME).where(tmp_project_id: p_ids).delete_all
end
end
def load_project_ids(start_id, end_id, namespace_id)
projects = IsolatedModels::Project.arel_table
relation = IsolatedModels::Project.where(projects[:id].between(start_id..end_id))
relation = relation.where(projects[:namespace_id].in(Arel::Nodes::SqlLiteral.new(hierarchy_cte(namespace_id)))) if namespace_id
@project_ids = relation.pluck(:id)
end
def mark_job_as_succeeded(*arguments)
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded('BackfillProjectNamespaces', arguments)
end
def hierarchy_cte(root_namespace_id)
<<-SQL
WITH RECURSIVE "base_and_descendants" AS (
(
SELECT "namespaces"."id"
FROM "namespaces"
WHERE "namespaces"."type" = 'Group' AND "namespaces"."id" = #{root_namespace_id.to_i}
)
UNION
(
SELECT "namespaces"."id"
FROM "namespaces", "base_and_descendants"
WHERE "namespaces"."type" = 'Group' AND "namespaces"."parent_id" = "base_and_descendants"."id"
)
)
SELECT "id" FROM "base_and_descendants" AS "namespaces"
SQL
end
end
# rubocop: enable Metrics/ClassLength
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module ProjectNamespaces
module Models
# isolated Namespace model
class Namespace < ActiveRecord::Base
include EachBatch
self.table_name = 'namespaces'
self.inheritance_column = :_type_disabled
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
module ProjectNamespaces
module Models
# isolated Project model
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment