Commit 5f0e4040 authored by Douwe Maan's avatar Douwe Maan

Batch load only data from same repository when lazy object is accessed

By specifying `key`, we get a different lazy batch loader for each
repository, which means that accessing a lazy object from one repository
will only result in that repository's objects being fetched, not those
of other repositories, saving us some unnecessary Gitaly lookups.
parent ba9eeea4
...@@ -11,10 +11,11 @@ module Resolvers ...@@ -11,10 +11,11 @@ module Resolvers
end end
def model_by_full_path(model, full_path) def model_by_full_path(model, full_path)
BatchLoader.for(full_path).batch(key: "#{model.model_name.param_key}:full_path") do |full_paths, loader| BatchLoader.for(full_path).batch(key: model) do |full_paths, loader, args|
# `with_route` avoids an N+1 calculating full_path # `with_route` avoids an N+1 calculating full_path
results = model.where_full_path_in(full_paths).with_route args[:key].where_full_path_in(full_paths).with_route.each do |project|
results.each { |project| loader.call(project.full_path, project) } loader.call(project.full_path, project)
end
end end
end end
end end
......
...@@ -14,9 +14,10 @@ module Resolvers ...@@ -14,9 +14,10 @@ module Resolvers
def resolve(iid:) def resolve(iid:)
return unless project.present? return unless project.present?
BatchLoader.for(iid.to_s).batch(key: project.id) do |iids, loader| BatchLoader.for(iid.to_s).batch(key: project) do |iids, loader, args|
results = project.merge_requests.where(iid: iids) args[:key].merge_requests.where(iid: iids).each do |mr|
results.each { |mr| loader.call(mr.iid.to_s, mr) } loader.call(mr.iid.to_s, mr)
end
end end
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
...@@ -80,15 +80,9 @@ class Blob < SimpleDelegator ...@@ -80,15 +80,9 @@ class Blob < SimpleDelegator
end end
def self.lazy(project, commit_id, path) def self.lazy(project, commit_id, path)
BatchLoader.for({ project: project, commit_id: commit_id, path: path }).batch do |items, loader| BatchLoader.for([commit_id, path]).batch(key: project.repository) do |items, loader, args|
items_by_project = items.group_by { |i| i[:project] } args[:key].blobs_at(items).each do |blob|
loader.call([blob.commit_id, blob.path], blob) if blob
items_by_project.each do |project, items|
items = items.map { |i| i.values_at(:commit_id, :path) }
project.repository.blobs_at(items).each do |blob|
loader.call({ project: blob.project, commit_id: blob.commit_id, path: blob.path }, blob) if blob
end
end end
end end
end end
......
---
title: Batch load only data from same repository when lazy object is accessed
merge_request: 23309
author:
type: performance
...@@ -155,17 +155,9 @@ module Gitlab ...@@ -155,17 +155,9 @@ module Gitlab
end end
def extract_signature_lazily(repository, commit_id) def extract_signature_lazily(repository, commit_id)
BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader| BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args|
items_by_repo = items.group_by { |i| i[:repository] } batch_signature_extraction(args[:key], commit_ids).each do |commit_id, signature_data|
loader.call(commit_id, signature_data)
items_by_repo.each do |repo, items|
commit_ids = items.map { |i| i[:commit_id] }
signatures = batch_signature_extraction(repository, commit_ids)
signatures.each do |commit_sha, signature_data|
loader.call({ repository: repository, commit_id: commit_sha }, signature_data)
end
end end
end end
end end
...@@ -175,17 +167,9 @@ module Gitlab ...@@ -175,17 +167,9 @@ module Gitlab
end end
def get_message(repository, commit_id) def get_message(repository, commit_id)
BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader| BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args|
items_by_repo = items.group_by { |i| i[:repository] } get_messages(args[:key], commit_ids).each do |commit_id, message|
loader.call(commit_id, message)
items_by_repo.each do |repo, items|
commit_ids = items.map { |i| i[:commit_id] }
messages = get_messages(repository, commit_ids)
messages.each do |commit_sha, message|
loader.call({ repository: repository, commit_id: commit_sha }, message)
end
end end
end end
end end
......
...@@ -14,17 +14,9 @@ module Gitlab ...@@ -14,17 +14,9 @@ module Gitlab
class << self class << self
def get_message(repository, tag_id) def get_message(repository, tag_id)
BatchLoader.for({ repository: repository, tag_id: tag_id }).batch do |items, loader| BatchLoader.for(tag_id).batch(key: repository) do |tag_ids, loader, args|
items_by_repo = items.group_by { |i| i[:repository] } get_messages(args[:key], tag_ids).each do |tag_id, message|
loader.call(tag_id, message)
items_by_repo.each do |repo, items|
tag_ids = items.map { |i| i[:tag_id] }
messages = get_messages(repository, tag_ids)
messages.each do |id, message|
loader.call({ repository: repository, tag_id: id }, message)
end
end end
end end
end end
......
...@@ -450,11 +450,17 @@ describe Gitlab::Git::Commit, :seed_helper do ...@@ -450,11 +450,17 @@ describe Gitlab::Git::Commit, :seed_helper do
described_class.extract_signature_lazily(repository, commit_id) described_class.extract_signature_lazily(repository, commit_id)
end end
other_repository = double(:repository)
described_class.extract_signature_lazily(other_repository, commit_ids.first)
expect(described_class).to receive(:batch_signature_extraction) expect(described_class).to receive(:batch_signature_extraction)
.with(repository, commit_ids) .with(repository, commit_ids)
.once .once
.and_return({}) .and_return({})
expect(described_class).not_to receive(:batch_signature_extraction)
.with(other_repository, commit_ids.first)
2.times { signatures.each(&:itself) } 2.times { signatures.each(&:itself) }
end end
end end
......
...@@ -38,6 +38,9 @@ describe Gitlab::Git::Tag, :seed_helper do ...@@ -38,6 +38,9 @@ describe Gitlab::Git::Tag, :seed_helper do
end end
it 'gets messages in one batch', :request_store do it 'gets messages in one batch', :request_store do
other_repository = double(:repository)
described_class.get_message(other_repository, tag_ids.first)
expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1) expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1)
end end
end end
......
...@@ -18,14 +18,23 @@ describe Blob do ...@@ -18,14 +18,23 @@ describe Blob do
describe '.lazy' do describe '.lazy' do
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') } let(:other_project) { create(:project, :repository) }
let(:commit_id) { 'e63f41fe459e62e1228fcef60d7189127aeba95a' }
it 'fetches all blobs when the first is accessed' do it 'does not fetch blobs when none are accessed' do
changelog = described_class.lazy(project, commit.id, 'CHANGELOG') expect(project.repository).not_to receive(:blobs_at)
contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md')
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original described_class.lazy(project, commit_id, 'CHANGELOG')
expect(Gitlab::Git::Blob).not_to receive(:find) end
it 'fetches all blobs for the same repository when one is accessed' do
expect(project.repository).to receive(:blobs_at).with([[commit_id, 'CHANGELOG'], [commit_id, 'CONTRIBUTING.md']]).once.and_call_original
expect(other_project.repository).not_to receive(:blobs_at)
changelog = described_class.lazy(project, commit_id, 'CHANGELOG')
contributing = described_class.lazy(project, commit_id, 'CONTRIBUTING.md')
described_class.lazy(other_project, commit_id, 'CHANGELOG')
# Access property so the values are loaded # Access property so the values are loaded
changelog.id changelog.id
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment