Commit 058c4528 authored by Jan Provaznik's avatar Jan Provaznik

Merge branch 'fix_duplicate_project_name_and_path_in_a_namespace' into 'master'

Fix project name duplicates and missing project namespace ids

See merge request gitlab-org/gitlab!83587
parents 7bfad7c3 a12776af
# frozen_string_literal: true
class FixAndBackfillProjectNamespacesForProjectsWithDuplicateName < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
MIGRATION = 'FixDuplicateProjectNameAndPath'
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 1000
class Project < ActiveRecord::Base
include ::EachBatch
self.table_name = 'projects'
scope :without_project_namespace, -> { where(project_namespace_id: nil) }
end
def up
queue_background_migration_jobs_by_range_at_intervals(
Project.without_project_namespace, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE, track_jobs: true
)
end
def down
# no-op
end
end
6ce75a41607ed1e60ea4ecdfb36703d9f4192dc26ecaa774f2c30818027dd1d7
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Fix project name duplicates and backfill missing project namespace ids
class FixDuplicateProjectNameAndPath
SUB_BATCH_SIZE = 10
# isolated project active record
class Project < ActiveRecord::Base
include ::EachBatch
self.table_name = 'projects'
scope :without_project_namespace, -> { where(project_namespace_id: nil) }
scope :id_in, ->(ids) { where(id: ids) }
end
def perform(start_id, end_id)
@project_ids = fetch_project_ids(start_id, end_id)
backfill_project_namespaces_service = init_backfill_service(project_ids)
backfill_project_namespaces_service.cleanup_gin_index('projects')
project_ids.each_slice(SUB_BATCH_SIZE) do |ids|
ActiveRecord::Base.connection.execute(update_projects_name_and_path_sql(ids))
end
backfill_project_namespaces_service.backfill_project_namespaces
mark_job_as_succeeded(start_id, end_id)
end
private
attr_accessor :project_ids
def fetch_project_ids(start_id, end_id)
Project.without_project_namespace.where(id: start_id..end_id)
end
def init_backfill_service(project_ids)
service = Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.new
service.project_ids = project_ids
service.sub_batch_size = SUB_BATCH_SIZE
service
end
def update_projects_name_and_path_sql(project_ids)
<<~SQL
WITH cte (project_id, path_from_route ) AS (
#{path_from_route_sql(project_ids).to_sql}
)
UPDATE
projects
SET
name = concat(projects.name, '-', id),
path = CASE
WHEN projects.path <> cte.path_from_route THEN path_from_route
ELSE projects.path
END
FROM
cte
WHERE
projects.id = cte.project_id;
SQL
end
def path_from_route_sql(project_ids)
Project.without_project_namespace.id_in(project_ids)
.joins("INNER JOIN routes ON routes.source_id = projects.id AND routes.source_type = 'Project'")
.select("projects.id, SUBSTRING(routes.path FROM '[^/]+(?=/$|$)') AS path_from_route")
end
def mark_job_as_succeeded(*arguments)
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
'FixDuplicateProjectNameAndPath',
arguments
)
end
end
end
end
......@@ -7,6 +7,8 @@ module Gitlab
#
# rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces
attr_accessor :project_ids, :sub_batch_size
SUB_BATCH_SIZE = 25
PROJECT_NAMESPACE_STI_NAME = 'Project'
......@@ -18,7 +20,7 @@ module Gitlab
case migration_type
when 'up'
backfill_project_namespaces(namespace_id)
backfill_project_namespaces
mark_job_as_succeeded(start_id, end_id, namespace_id, 'up')
when 'down'
cleanup_backfilled_project_namespaces(namespace_id)
......@@ -28,11 +30,7 @@ module Gitlab
end
end
private
attr_accessor :project_ids, :sub_batch_size
def backfill_project_namespaces(namespace_id)
def backfill_project_namespaces
project_ids.each_slice(sub_batch_size) do |project_ids|
# cleanup gin indexes on namespaces table
cleanup_gin_index('namespaces')
......@@ -64,6 +62,8 @@ module Gitlab
end
end
private
def cleanup_backfilled_project_namespaces(namespace_id)
project_ids.each_slice(sub_batch_size) do |project_ids|
# IMPORTANT: first nullify project_namespace_id in projects table to avoid removing projects when records
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::FixDuplicateProjectNameAndPath, :migration do
let(:migration) { described_class.new }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:routes) { table(:routes) }
let(:namespace1) { namespaces.create!(name: 'batchtest1', type: 'Group', path: 'batch-test1') }
let(:namespace2) { namespaces.create!(name: 'batchtest2', type: 'Group', parent_id: namespace1.id, path: 'batch-test2') }
let(:namespace3) { namespaces.create!(name: 'batchtest3', type: 'Group', parent_id: namespace2.id, path: 'batch-test3') }
let(:project_namespace2) { namespaces.create!(name: 'project2', path: 'project2', type: 'Project', parent_id: namespace2.id, visibility_level: 20) }
let(:project_namespace3) { namespaces.create!(name: 'project3', path: 'project3', type: 'Project', parent_id: namespace3.id, visibility_level: 20) }
let(:project1) { projects.create!(name: 'project1', path: 'project1', namespace_id: namespace1.id, visibility_level: 20) }
let(:project2) { projects.create!(name: 'project2', path: 'project2', namespace_id: namespace2.id, project_namespace_id: project_namespace2.id, visibility_level: 20) }
let(:project2_dup) { projects.create!(name: 'project2', path: 'project2', namespace_id: namespace2.id, visibility_level: 20) }
let(:project3) { projects.create!(name: 'project3', path: 'project3', namespace_id: namespace3.id, project_namespace_id: project_namespace3.id, visibility_level: 20) }
let(:project3_dup) { projects.create!(name: 'project3', path: 'project3', namespace_id: namespace3.id, visibility_level: 20) }
let!(:namespace_route1) { routes.create!(path: 'batch-test1', source_id: namespace1.id, source_type: 'Namespace') }
let!(:namespace_route2) { routes.create!(path: 'batch-test1/batch-test2', source_id: namespace2.id, source_type: 'Namespace') }
let!(:namespace_route3) { routes.create!(path: 'batch-test1/batch-test3', source_id: namespace3.id, source_type: 'Namespace') }
let!(:proj_route1) { routes.create!(path: 'batch-test1/project1', source_id: project1.id, source_type: 'Project') }
let!(:proj_route2) { routes.create!(path: 'batch-test1/batch-test2/project2', source_id: project2.id, source_type: 'Project') }
let!(:proj_route2_dup) { routes.create!(path: "batch-test1/batch-test2/project2-route-#{project2_dup.id}", source_id: project2_dup.id, source_type: 'Project') }
let!(:proj_route3) { routes.create!(path: 'batch-test1/batch-test3/project3', source_id: project3.id, source_type: 'Project') }
let!(:proj_route3_dup) { routes.create!(path: "batch-test1/batch-test3/project3-route-#{project3_dup.id}", source_id: project3_dup.id, source_type: 'Project') }
subject(:perform_migration) { migration.perform(projects.minimum(:id), projects.maximum(:id)) }
describe '#up' do
it 'backfills namespace_id for the selected records', :aggregate_failures do
expect(namespaces.where(type: 'Project').count).to eq(2)
perform_migration
expect(namespaces.where(type: 'Project').count).to eq(5)
expect(project1.reload.name).to eq("project1-#{project1.id}")
expect(project1.path).to eq('project1')
expect(project2.reload.name).to eq('project2')
expect(project2.path).to eq('project2')
expect(project2_dup.reload.name).to eq("project2-#{project2_dup.id}")
expect(project2_dup.path).to eq("project2-route-#{project2_dup.id}")
expect(project3.reload.name).to eq("project3")
expect(project3.path).to eq("project3")
expect(project3_dup.reload.name).to eq("project3-#{project3_dup.id}")
expect(project3_dup.path).to eq("project3-route-#{project3_dup.id}")
projects.all.each do |pr|
project_namespace = namespaces.find(pr.project_namespace_id)
expect(project_namespace).to be_in_sync_with_project(pr)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe FixAndBackfillProjectNamespacesForProjectsWithDuplicateName, :migration do
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let!(:group) { namespaces.create!(name: 'group1', path: 'group1', type: 'Group') }
let!(:project_namespace) { namespaces.create!(name: 'project2', path: 'project2', type: 'Project') }
let!(:project1) { projects.create!(name: 'project1', path: 'project1', project_namespace_id: nil, namespace_id: group.id, visibility_level: 20) }
let!(:project2) { projects.create!(name: 'project2', path: 'project2', project_namespace_id: project_namespace.id, namespace_id: group.id, visibility_level: 20) }
let!(:project3) { projects.create!(name: 'project3', path: 'project3', project_namespace_id: nil, namespace_id: group.id, visibility_level: 20) }
let!(:project4) { projects.create!(name: 'project4', path: 'project4', project_namespace_id: nil, namespace_id: group.id, visibility_level: 20) }
describe '#up' do
it 'schedules background migrations' do
Sidekiq::Testing.fake! do
freeze_time do
described_class.new.up
migration = described_class::MIGRATION
expect(migration).to be_scheduled_delayed_migration(2.minutes, project1.id, project4.id)
expect(BackgroundMigrationWorker.jobs.size).to eq 1
end
end
end
context 'in batches' do
before do
stub_const('FixAndBackfillProjectNamespacesForProjectsWithDuplicateName::BATCH_SIZE', 2)
end
it 'schedules background migrations' do
Sidekiq::Testing.fake! do
freeze_time do
described_class.new.up
migration = described_class::MIGRATION
expect(migration).to be_scheduled_delayed_migration(2.minutes, project1.id, project3.id)
expect(migration).to be_scheduled_delayed_migration(4.minutes, project4.id, project4.id)
expect(BackgroundMigrationWorker.jobs.size).to eq 2
end
end
end
end
end
end
......@@ -10,7 +10,7 @@ RSpec::Matchers.define :be_in_sync_with_project do |project|
project_namespace.present? &&
project.name == project_namespace.name &&
project.path == project_namespace.path &&
project.namespace == project_namespace.parent &&
project.namespace_id == project_namespace.parent_id &&
project.visibility_level == project_namespace.visibility_level &&
project.shared_runners_enabled == project_namespace.shared_runners_enabled
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment