Commit 0d4008ff authored by Dmitry Gruzd's avatar Dmitry Gruzd

Merge branch '324745-preload-notes-data-for-es-indexing' into 'master'

Avoid N+1 queries when loading notes for indexing in Elasticsearch

See merge request gitlab-org/gitlab!56808
parents b8592751 6935bdff
...@@ -9,6 +9,7 @@ module ActiveRecord ...@@ -9,6 +9,7 @@ module ActiveRecord
end end
def self.run def self.run
self
end end
def self.preloaded_records def self.preloaded_records
......
...@@ -47,6 +47,8 @@ module Elastic ...@@ -47,6 +47,8 @@ module Elastic
after_commit :maintain_elasticsearch_create, on: :create, if: :maintaining_elasticsearch? after_commit :maintain_elasticsearch_create, on: :create, if: :maintaining_elasticsearch?
after_commit :maintain_elasticsearch_update, on: :update, if: :maintaining_elasticsearch? after_commit :maintain_elasticsearch_update, on: :update, if: :maintaining_elasticsearch?
after_commit :maintain_elasticsearch_destroy, on: :destroy, if: :maintaining_elasticsearch? after_commit :maintain_elasticsearch_destroy, on: :destroy, if: :maintaining_elasticsearch?
scope :preload_indexing_data, -> { __elasticsearch__.preload_indexing_data(self) }
end end
end end
......
---
title: Improve performance of indexing notes in Elasticsearch
merge_request: 56808
author:
type: performance
...@@ -37,6 +37,12 @@ module Elastic ...@@ -37,6 +37,12 @@ module Elastic
self.import(options) self.import(options)
end end
# Should be overriden in *ClassProxy for specific model if data needs to
# be preloaded by #as_indexed_json method
def preload_indexing_data(relation)
relation
end
private private
def default_operator def default_operator
......
...@@ -24,6 +24,12 @@ module Elastic ...@@ -24,6 +24,12 @@ module Elastic
search(query_hash, options) search(query_hash, options)
end end
# rubocop: disable CodeReuse/ActiveRecord
def preload_indexing_data(relation)
relation.includes(noteable: :assignees)
end
# rubocop: enable CodeReuse/ActiveRecord
private private
def confidentiality_filter(query_hash, options) def confidentiality_filter(query_hash, options)
......
...@@ -31,7 +31,7 @@ module Gitlab ...@@ -31,7 +31,7 @@ module Gitlab
@refs.group_by(&:klass).each do |klass, group| @refs.group_by(&:klass).each do |klass, group|
ids = group.map(&:db_id) ids = group.map(&:db_id)
records = klass.id_in(ids) records = klass.id_in(ids).preload_indexing_data
records_by_id = records.each_with_object({}) { |record, hash| hash[record.id] = record } records_by_id = records.each_with_object({}) { |record, hash| hash[record.id] = record }
group.each do |ref| group.each do |ref|
...@@ -112,7 +112,6 @@ module Gitlab ...@@ -112,7 +112,6 @@ module Gitlab
klass.to_s klass.to_s
end end
# TODO: return a promise for batch loading: https://gitlab.com/gitlab-org/gitlab/issues/207280
def database_record def database_record
strong_memoize(:database_record) { klass.find_by_id(db_id) } strong_memoize(:database_record) { klass.find_by_id(db_id) }
end end
......
...@@ -200,22 +200,28 @@ RSpec.describe Gitlab::Elastic::DocumentReference do ...@@ -200,22 +200,28 @@ RSpec.describe Gitlab::Elastic::DocumentReference do
let(:note_ref2) { described_class.new(Note, note2.id, note2.es_id, note2.es_parent) } let(:note_ref2) { described_class.new(Note, note2.id, note2.es_id, note2.es_parent) }
let(:note_ref_deleted) { described_class.new(Note, note_deleted.id, note_deleted.es_id, note_deleted.es_parent) } let(:note_ref_deleted) { described_class.new(Note, note_deleted.id, note_deleted.es_id, note_deleted.es_parent) }
it 'preloads database records in one query per type' do it 'preloads database records to avoid N+1 queries' do
collection = described_class::Collection.new collection = described_class::Collection.new
collection.deserialize_and_add(issue_ref1.serialize) collection.deserialize_and_add(issue_ref1.serialize)
collection.deserialize_and_add(issue_ref2.serialize)
collection.deserialize_and_add(note_ref1.serialize) collection.deserialize_and_add(note_ref1.serialize)
control = ActiveRecord::QueryRecorder.new { collection.preload_database_records.map(&:database_record) }
collection = described_class::Collection.new
collection.deserialize_and_add(issue_ref1.serialize)
collection.deserialize_and_add(note_ref1.serialize)
collection.deserialize_and_add(issue_ref2.serialize)
collection.deserialize_and_add(note_ref2.serialize) collection.deserialize_and_add(note_ref2.serialize)
collection.deserialize_and_add(note_ref_deleted.serialize) collection.deserialize_and_add(note_ref_deleted.serialize)
database_records = nil database_records = nil
expect do expect do
database_records = collection.preload_database_records.map { |ref| ref.database_record } database_records = collection.preload_database_records.map { |ref| ref.database_record }
end.not_to exceed_query_limit(2) end.not_to exceed_query_limit(control)
expect(database_records[0]).to eq(issue1) expect(database_records[0]).to eq(issue1)
expect(database_records[1]).to eq(issue2) expect(database_records[1]).to eq(note1)
expect(database_records[2]).to eq(note1) expect(database_records[2]).to eq(issue2)
expect(database_records[3]).to eq(note2) expect(database_records[3]).to eq(note2)
expect(database_records[4]).to eq(nil) # Deleted database record will be nil expect(database_records[4]).to eq(nil) # Deleted database record will be nil
end end
......
...@@ -208,6 +208,34 @@ RSpec.describe Elastic::ProcessBookkeepingService, :clean_gitlab_redis_shared_st ...@@ -208,6 +208,34 @@ RSpec.describe Elastic::ProcessBookkeepingService, :clean_gitlab_redis_shared_st
expect(described_class.queue_size).to eq(1) expect(described_class.queue_size).to eq(1)
end end
context 'N+1 queries' do
it 'does not have N+1 queries for notes' do
notes = []
2.times do
notes << create(:note)
notes << create(:discussion_note_on_merge_request)
notes << create(:note_on_merge_request)
notes << create(:note_on_commit)
end
described_class.track!(*notes)
control = ActiveRecord::QueryRecorder.new { described_class.new.execute }
3.times do
notes << create(:note)
notes << create(:discussion_note_on_merge_request)
notes << create(:note_on_merge_request)
notes << create(:note_on_commit)
end
described_class.track!(*notes)
expect { described_class.new.execute }.not_to exceed_all_query_limit(control)
end
end
def expect_processing(*refs, failures: []) def expect_processing(*refs, failures: [])
expect_next_instance_of(::Gitlab::Elastic::BulkIndexer) do |indexer| expect_next_instance_of(::Gitlab::Elastic::BulkIndexer) do |indexer|
refs.each { |ref| expect(indexer).to receive(:process).with(ref) } refs.each { |ref| expect(indexer).to receive(:process).with(ref) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment