Commit 2d4cbcaf authored by Alex Pooley's avatar Alex Pooley

Merge branch 'merge_topics_with_same_name' into 'master'

Background migration: merge topics with same name

See merge request gitlab-org/gitlab!81503
parents 515d9327 ff6cc894
# frozen_string_literal: true
class ScheduleMergeTopicsWithSameName < Gitlab::Database::Migration[1.0]
MIGRATION = 'MergeTopicsWithSameName'
BATCH_SIZE = 100
disable_ddl_transaction!
class Topic < ActiveRecord::Base
self.table_name = 'topics'
end
def up
Topic.select('LOWER(name) as name').group('LOWER(name)').having('COUNT(*) > 1').order('LOWER(name)')
.in_groups_of(BATCH_SIZE, false).each_with_index do |group, i|
migrate_in((i + 1) * 2.minutes, MIGRATION, [group.map(&:name)])
end
end
def down
# no-op
end
end
8fb72b15bfaa1b58f87cb3f1836df1e8bfa1a5ddec4e480a2cb6a3c9fafe3bda
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# The class to merge project topics with the same case insensitive name
class MergeTopicsWithSameName
# Temporary AR model for topics
class Topic < ActiveRecord::Base
self.table_name = 'topics'
end
# Temporary AR model for project topic assignment
class ProjectTopic < ActiveRecord::Base
self.table_name = 'project_topics'
end
def perform(topic_names)
topic_names.each do |topic_name|
topics = Topic.where('LOWER(name) = ?', topic_name)
.order(total_projects_count: :desc, non_private_projects_count: :desc, id: :asc)
.to_a
topic_to_keep = topics.shift
merge_topics(topic_to_keep, topics) if topics.any?
end
end
private
def merge_topics(topic_to_keep, topics_to_remove)
description = topic_to_keep.description
topics_to_remove.each do |topic|
description ||= topic.description if topic.description.present?
process_avatar(topic_to_keep, topic) if topic.avatar.present?
ProjectTopic.transaction do
ProjectTopic.where(topic_id: topic.id)
.where.not(project_id: ProjectTopic.where(topic_id: topic_to_keep).select(:project_id))
.update_all(topic_id: topic_to_keep.id)
ProjectTopic.where(topic_id: topic.id).delete_all
end
end
Topic.where(id: topics_to_remove).delete_all
topic_to_keep.update(
description: description,
total_projects_count: total_projects_count(topic_to_keep.id),
non_private_projects_count: non_private_projects_count(topic_to_keep.id)
)
end
# We intentionally use application code here because we need to copy/remove avatar files
def process_avatar(topic_to_keep, topic_to_remove)
topic_to_remove = ::Projects::Topic.find(topic_to_remove.id)
topic_to_keep = ::Projects::Topic.find(topic_to_keep.id)
unless topic_to_keep.avatar.present?
topic_to_keep.avatar = topic_to_remove.avatar
topic_to_keep.save!
end
topic_to_remove.remove_avatar!
topic_to_remove.save!
end
def total_projects_count(topic_id)
ProjectTopic.where(topic_id: topic_id).count
end
def non_private_projects_count(topic_id)
ProjectTopic.joins('INNER JOIN projects ON project_topics.project_id = projects.id')
.where(project_topics: { topic_id: topic_id }).where('projects.visibility_level in (10, 20)').count
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::MergeTopicsWithSameName, schema: 20220223124428 do
def set_avatar(topic_id, avatar)
topic = ::Projects::Topic.find(topic_id)
topic.avatar = avatar
topic.save!
topic.avatar.absolute_path
end
it 'merges project topics with same case insensitive name' do
namespaces = table(:namespaces)
projects = table(:projects)
topics = table(:topics)
project_topics = table(:project_topics)
group = namespaces.create!(name: 'group', path: 'group')
project_1 = projects.create!(namespace_id: group.id, visibility_level: 20)
project_2 = projects.create!(namespace_id: group.id, visibility_level: 10)
project_3 = projects.create!(namespace_id: group.id, visibility_level: 0)
topic_1_keep = topics.create!(
name: 'topic1',
description: 'description 1 to keep',
total_projects_count: 2,
non_private_projects_count: 2
)
topic_1_remove = topics.create!(
name: 'TOPIC1',
description: 'description 1 to remove',
total_projects_count: 2,
non_private_projects_count: 1
)
topic_2_remove = topics.create!(
name: 'topic2',
total_projects_count: 0
)
topic_2_keep = topics.create!(
name: 'TOPIC2',
description: 'description 2 to keep',
total_projects_count: 1
)
topic_3_remove_1 = topics.create!(
name: 'topic3',
total_projects_count: 2,
non_private_projects_count: 1
)
topic_3_keep = topics.create!(
name: 'Topic3',
total_projects_count: 2,
non_private_projects_count: 2
)
topic_3_remove_2 = topics.create!(
name: 'TOPIC3',
description: 'description 3 to keep',
total_projects_count: 2,
non_private_projects_count: 1
)
topic_4_keep = topics.create!(
name: 'topic4'
)
project_topics_1 = []
project_topics_3 = []
project_topics_removed = []
project_topics_1 << project_topics.create!(topic_id: topic_1_keep.id, project_id: project_1.id)
project_topics_1 << project_topics.create!(topic_id: topic_1_keep.id, project_id: project_2.id)
project_topics_removed << project_topics.create!(topic_id: topic_1_remove.id, project_id: project_2.id)
project_topics_1 << project_topics.create!(topic_id: topic_1_remove.id, project_id: project_3.id)
project_topics_3 << project_topics.create!(topic_id: topic_3_keep.id, project_id: project_1.id)
project_topics_3 << project_topics.create!(topic_id: topic_3_keep.id, project_id: project_2.id)
project_topics_removed << project_topics.create!(topic_id: topic_3_remove_1.id, project_id: project_1.id)
project_topics_3 << project_topics.create!(topic_id: topic_3_remove_1.id, project_id: project_3.id)
project_topics_removed << project_topics.create!(topic_id: topic_3_remove_2.id, project_id: project_1.id)
project_topics_removed << project_topics.create!(topic_id: topic_3_remove_2.id, project_id: project_3.id)
avatar_paths = {
topic_1_keep: set_avatar(topic_1_keep.id, fixture_file_upload('spec/fixtures/avatars/avatar1.png')),
topic_1_remove: set_avatar(topic_1_remove.id, fixture_file_upload('spec/fixtures/avatars/avatar2.png')),
topic_2_remove: set_avatar(topic_2_remove.id, fixture_file_upload('spec/fixtures/avatars/avatar3.png')),
topic_3_remove_1: set_avatar(topic_3_remove_1.id, fixture_file_upload('spec/fixtures/avatars/avatar4.png')),
topic_3_remove_2: set_avatar(topic_3_remove_2.id, fixture_file_upload('spec/fixtures/avatars/avatar5.png'))
}
subject.perform(%w[topic1 topic2 topic3 topic4])
# Topics
[topic_1_keep, topic_2_keep, topic_3_keep, topic_4_keep].each(&:reload)
expect(topic_1_keep.name).to eq('topic1')
expect(topic_1_keep.description).to eq('description 1 to keep')
expect(topic_1_keep.total_projects_count).to eq(3)
expect(topic_1_keep.non_private_projects_count).to eq(2)
expect(topic_2_keep.name).to eq('TOPIC2')
expect(topic_2_keep.description).to eq('description 2 to keep')
expect(topic_2_keep.total_projects_count).to eq(0)
expect(topic_2_keep.non_private_projects_count).to eq(0)
expect(topic_3_keep.name).to eq('Topic3')
expect(topic_3_keep.description).to eq('description 3 to keep')
expect(topic_3_keep.total_projects_count).to eq(3)
expect(topic_3_keep.non_private_projects_count).to eq(2)
expect(topic_4_keep.reload.name).to eq('topic4')
[topic_1_remove, topic_2_remove, topic_3_remove_1, topic_3_remove_2].each do |topic|
expect { topic.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
# Topic avatars
expect(topic_1_keep.avatar).to eq('avatar1.png')
expect(File.exist?(::Projects::Topic.find(topic_1_keep.id).avatar.absolute_path)).to be_truthy
expect(topic_2_keep.avatar).to eq('avatar3.png')
expect(File.exist?(::Projects::Topic.find(topic_2_keep.id).avatar.absolute_path)).to be_truthy
expect(topic_3_keep.avatar).to eq('avatar4.png')
expect(File.exist?(::Projects::Topic.find(topic_3_keep.id).avatar.absolute_path)).to be_truthy
[:topic_1_remove, :topic_2_remove, :topic_3_remove_1, :topic_3_remove_2].each do |topic|
expect(File.exist?(avatar_paths[topic])).to be_falsey
end
# Project Topic assignments
project_topics_1.each do |project_topic|
expect(project_topic.reload.topic_id).to eq(topic_1_keep.id)
end
project_topics_3.each do |project_topic|
expect(project_topic.reload.topic_id).to eq(topic_3_keep.id)
end
project_topics_removed.each do |project_topic|
expect { project_topic.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe ScheduleMergeTopicsWithSameName do
let(:topics) { table(:topics) }
describe '#up' do
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
topics.create!(name: 'topic1')
topics.create!(name: 'Topic2')
topics.create!(name: 'Topic3')
topics.create!(name: 'Topic4')
topics.create!(name: 'topic2')
topics.create!(name: 'topic3')
topics.create!(name: 'topic4')
topics.create!(name: 'TOPIC2')
topics.create!(name: 'topic5')
end
it 'schedules MergeTopicsWithSameName background jobs', :aggregate_failures do
Sidekiq::Testing.fake! do
freeze_time do
migrate!
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, %w[topic2 topic3])
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, %w[topic4])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment