Commit 2edf27ea authored by Kerri Miller's avatar Kerri Miller Committed by Bob Van Landuyt

Refactor not_linked_lfs_objects to remove AR query

This will make sure we only query 3 times per batch of LFS objects:
- The existing LFS objects in a batch
- The existing LFS objects not linked to the project
- The insert for the LFS objects to link them to the project
parent e0fda460
...@@ -18,6 +18,11 @@ class LfsObject < ApplicationRecord ...@@ -18,6 +18,11 @@ class LfsObject < ApplicationRecord
after_save :update_file_store, if: :saved_change_to_file? after_save :update_file_store, if: :saved_change_to_file?
def self.not_linked_to_project(project)
where('NOT EXISTS (?)',
project.lfs_objects_projects.select(1).where('lfs_objects_projects.lfs_object_id = lfs_objects.id'))
end
def update_file_store def update_file_store
# The file.object_store is set during `uploader.store!` # The file.object_store is set during `uploader.store!`
# which happens after object is inserted/updated # which happens after object is inserted/updated
......
...@@ -20,24 +20,27 @@ module Projects ...@@ -20,24 +20,27 @@ module Projects
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def link_existing_lfs_objects(oids) def link_existing_lfs_objects(oids)
all_existing_objects = [] linked_existing_objects = []
iterations = 0 iterations = 0
oids.each_slice(BATCH_SIZE) do |oids_batch| oids.each_slice(BATCH_SIZE) do |oids_batch|
existent_lfs_objects = LfsObject.where(oid: oids_batch) # Load all existing LFS Objects immediately so we don't issue an extra
# query for the `.any?`
existent_lfs_objects = LfsObject.where(oid: oids_batch).load
next unless existent_lfs_objects.any? next unless existent_lfs_objects.any?
rows = existent_lfs_objects
.not_linked_to_project(project)
.map { |existing_lfs_object| { project_id: project.id, lfs_object_id: existing_lfs_object.id } }
Gitlab::Database.bulk_insert(:lfs_objects_projects, rows)
iterations += 1 iterations += 1
not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
project.all_lfs_objects << not_linked_lfs_objects
all_existing_objects += existent_lfs_objects.pluck(:oid) linked_existing_objects += existent_lfs_objects.map(&:oid)
end end
log_lfs_link_results(all_existing_objects.count, iterations) log_lfs_link_results(linked_existing_objects.count, iterations)
all_existing_objects linked_existing_objects
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
...@@ -3,6 +3,18 @@ ...@@ -3,6 +3,18 @@
require 'spec_helper' require 'spec_helper'
describe LfsObject do describe LfsObject do
context 'scopes' do
describe '.not_existing_in_project' do
it 'contains only lfs objects not linked to the project' do
project = create(:project)
create(:lfs_objects_project, project: project)
other_lfs_object = create(:lfs_object)
expect(described_class.not_linked_to_project(project)).to contain_exactly(other_lfs_object)
end
end
end
it 'has a distinct has_many :projects relation through lfs_objects_projects' do it 'has a distinct has_many :projects relation through lfs_objects_projects' do
lfs_object = create(:lfs_object) lfs_object = create(:lfs_object)
project = create(:project) project = create(:project)
......
...@@ -28,7 +28,7 @@ describe Projects::LfsPointers::LfsLinkService do ...@@ -28,7 +28,7 @@ describe Projects::LfsPointers::LfsLinkService do
it 'returns linked oids' do it 'returns linked oids' do
linked = lfs_objects_project.map(&:lfs_object).map(&:oid) << new_lfs_object.oid linked = lfs_objects_project.map(&:lfs_object).map(&:oid) << new_lfs_object.oid
expect(subject.execute(new_oid_list.keys)).to eq linked expect(subject.execute(new_oid_list.keys)).to contain_exactly(*linked)
end end
it 'links in batches' do it 'links in batches' do
...@@ -58,5 +58,16 @@ describe Projects::LfsPointers::LfsLinkService do ...@@ -58,5 +58,16 @@ describe Projects::LfsPointers::LfsLinkService do
subject.execute(oids) subject.execute(oids)
end end
it 'only queries 3 times' do
# make sure that we don't count the queries in the setup
new_oid_list
# These are repeated for each batch of oids: maximum (MAX_OIDS / BATCH_SIZE) times
# 1. Load the batch of lfs object ids that we might know already
# 2. Load the objects that have not been linked to the project yet
# 3. Insert the lfs_objects_projects for that batch
expect { subject.execute(new_oid_list.keys) }.not_to exceed_query_limit(3)
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment