Commit 291dc6e8 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '55487-backfill-lfs-objects-projects-sql' into 'master'

Backfill LfsObjectsProject records of forks

See merge request gitlab-org/gitlab!24767
parents 8e42e95d f560f1d6
---
title: Backfill LfsObjectsProject records of forks
merge_request: 24767
author:
type: other
# frozen_string_literal: true
class ScheduleLinkLfsObjects < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'LinkLfsObjects'
BATCH_SIZE = 1_000
disable_ddl_transaction!
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
end
def up
fork_network_members =
Gitlab::BackgroundMigration::LinkLfsObjects::ForkNetworkMember
.select(1)
.with_non_existing_lfs_objects
.where('fork_network_members.project_id = projects.id')
forks = Project.where('EXISTS (?)', fork_network_members)
queue_background_migration_jobs_by_range_at_intervals(
forks,
MIGRATION,
BackgroundMigrationWorker.minimum_interval,
batch_size: BATCH_SIZE
)
end
def down
# no-op
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Create missing LfsObjectsProject records for forks
class LinkLfsObjects
# Model definition used for migration
class ForkNetworkMember < ActiveRecord::Base
self.table_name = 'fork_network_members'
def self.with_non_existing_lfs_objects
joins('JOIN lfs_objects_projects lop ON fork_network_members.forked_from_project_id = lop.project_id')
.where(
<<~SQL
NOT EXISTS (
SELECT 1
FROM lfs_objects_projects
WHERE lfs_objects_projects.project_id = fork_network_members.project_id
AND lfs_objects_projects.lfs_object_id = lop.lfs_object_id
)
SQL
)
end
end
def perform(start_id, end_id)
select_query =
ForkNetworkMember
.select('lop.lfs_object_id, fork_network_members.project_id')
.with_non_existing_lfs_objects
.where(project_id: start_id..end_id)
return if select_query.empty?
execute <<-SQL
INSERT INTO lfs_objects_projects (lfs_object_id, project_id)
#{select_query.to_sql}
SQL
end
private
def execute(sql)
::ActiveRecord::Base.connection.execute(sql)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::LinkLfsObjects, :migration, schema: 2020_02_10_062432 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:source_project) { projects.create(namespace_id: namespace.id) }
let(:another_source_project) { projects.create(namespace_id: namespace.id) }
let(:project) { projects.create(namespace_id: namespace.id) }
let(:another_project) { projects.create(namespace_id: namespace.id) }
let(:other_project) { projects.create(namespace_id: namespace.id) }
let(:linked_project) { projects.create(namespace_id: namespace.id) }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
before do
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
[project, another_project, linked_project].each do |p|
fork_network_members.create(
fork_network_id: fork_network.id,
project_id: p.id,
forked_from_project_id: fork_network.root_project_id
)
end
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: other_project.id, forked_from_project_id: another_fork_network.root_project_id)
# Links LFS objects to some projects
[source_project, another_source_project, linked_project].each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'creates LfsObjectsProject records for forks within the specified range of project IDs' do
expect { subject.perform(project.id, other_project.id) }.to change { lfs_objects_projects.count }.by(6)
expect(lfs_object_ids_for(project)).to match_array(lfs_object_ids_for(source_project))
expect(lfs_object_ids_for(another_project)).to match_array(lfs_object_ids_for(source_project))
expect(lfs_object_ids_for(other_project)).to match_array(lfs_object_ids_for(another_source_project))
expect { subject.perform(project.id, other_project.id) }.not_to change { lfs_objects_projects.count }
end
context 'when it is not necessary to create LfsObjectProject records' do
it 'does not create LfsObjectProject records' do
expect { subject.perform(linked_project.id, linked_project.id) }
.not_to change { lfs_objects_projects.count }
end
end
def lfs_object_ids_for(project)
lfs_objects_projects.where(project_id: project.id).pluck(:lfs_object_id)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200210062432_schedule_link_lfs_objects.rb')
describe ScheduleLinkLfsObjects, :migration, :sidekiq do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:source_project) { projects.create(namespace_id: namespace.id) }
let(:another_source_project) { projects.create(namespace_id: namespace.id) }
let(:project) { projects.create(namespace_id: namespace.id) }
let(:another_project) { projects.create(namespace_id: namespace.id) }
let(:other_project) { projects.create(namespace_id: namespace.id) }
let(:linked_project) { projects.create(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
before do
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
[project, another_project, linked_project].each do |p|
fork_network_members.create(
fork_network_id: fork_network.id,
project_id: p.id,
forked_from_project_id: fork_network.root_project_id
)
end
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: other_project.id, forked_from_project_id: another_fork_network.root_project_id)
end
context 'when there are forks to be backfilled' do
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
# Links LFS objects to some projects
[source_project, another_source_project, linked_project].each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'schedules background migration to link LFS objects' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, project.id, another_project.id)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(4.minutes, other_project.id, other_project.id)
end
end
end
context 'when there are no forks to be backfilled' do
before do
# Links LFS objects to all projects
projects.all.each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'does not schedule any job' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment