Commit 8541d5a9 authored by Dylan Griffith's avatar Dylan Griffith

Prevent 2nd Elasticsearch query for project searches

We memoize the result object so that when it is later evaluated the same
Elasticsearch result can be used for the `total_count`.

We also extract a shared test examples for the 3 different kinds of
Elastic search results to reduce the chance of regression here.

This memoize approach to solving this problem was originally introduced
in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/13120

Note that some of the methods being memoized have arguments and as such
the memoization is not logically consistent from the code perspective.
We must memoize the results without the page argument because
the current page we're on - if we were to memoize with dynamic
parameters we would end up hitting ES twice for any page that's not page
1, and that's something we want to avoid.  It is safe to memoize the
page we get here because the method is always called before
the equivalent `#_count` method.

Read more at https://gitlab.com/gitlab-org/gitlab/-/issues/219209
parent 3ecbbe3a
---
title: Prevent 2nd Elasticsearch query for project searches
merge_request: 36672
author:
type: performance
......@@ -30,35 +30,41 @@ module Gitlab
return Kaminari.paginate_array([]) if project.empty_repo? || query.blank?
return Kaminari.paginate_array([]) unless root_ref?
project.repository.__elasticsearch__.elastic_search_as_found_blob(
query,
page: (page || 1).to_i,
per: per_page
)
strong_memoize(:blobs) do
project.repository.__elasticsearch__.elastic_search_as_found_blob(
query,
page: (page || 1).to_i,
per: per_page
)
end
end
def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :read_wiki, project)
if project.wiki_enabled? && !project.wiki.empty? && query.present?
project.wiki.__elasticsearch__.elastic_search_as_wiki_page(
query,
page: (page || 1).to_i,
per: per_page
)
strong_memoize(:wiki_blobs) do
project.wiki.__elasticsearch__.elastic_search_as_wiki_page(
query,
page: (page || 1).to_i,
per: per_page
)
end
else
Kaminari.paginate_array([])
end
end
def notes
opt = {
project_ids: limit_project_ids,
current_user: @current_user,
public_and_internal_projects: @public_and_internal_projects
}
Note.elastic_search(query, options: opt)
strong_memoize(:notes) do
opt = {
project_ids: limit_project_ids,
current_user: @current_user,
public_and_internal_projects: @public_and_internal_projects
}
Note.elastic_search(query, options: opt)
end
end
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
......@@ -69,12 +75,14 @@ module Gitlab
else
# We use elastic for default branch only
if root_ref?
project.repository.find_commits_by_message_with_elastic(
query,
page: (page || 1).to_i,
per_page: per_page,
preload_method: preload_method
)
strong_memoize(:commits) do
project.repository.find_commits_by_message_with_elastic(
query,
page: (page || 1).to_i,
per_page: per_page,
preload_method: preload_method
)
end
else
offset = per_page * ((page || 1) - 1)
......
......@@ -38,4 +38,10 @@ RSpec.describe Gitlab::Elastic::GroupSearchResults do
end
end
end
context 'query performance' do
let(:query) { '*' }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|projects notes blobs wiki_blobs commits issues merge_requests milestones|
end
end
......@@ -240,4 +240,18 @@ RSpec.describe Gitlab::Elastic::ProjectSearchResults, :elastic do
end
end
end
context 'query performance' do
let(:project) { create(:project, :public, :repository, :wiki_repo) }
before do
# wiki_blobs method checks to see if there is a wiki page before doing
# the search
create(:wiki_page, wiki: project.wiki)
end
let(:results) { described_class.new(user, '*', project) }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|notes blobs wiki_blobs commits issues merge_requests milestones|
end
end
......@@ -12,16 +12,6 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need
let(:project_2) { create(:project, :public, :repository, :wiki_repo) }
let(:limit_project_ids) { [project_1.id] }
describe 'counts' do
it 'does not hit Elasticsearch twice for result and counts' do
expect(Repository).to receive(:find_commits_by_message_with_elastic).with('hello world', anything).once.and_call_original
results = described_class.new(user, 'hello world', limit_project_ids)
expect(results.objects('commits', page: 2)).to be_empty
expect(results.commits_count).to eq 0
end
end
describe '#formatted_count' do
using RSpec::Parameterized::TableSyntax
......@@ -1134,4 +1124,10 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need
end
end
end
context 'query performance' do
let(:results) { described_class.new(user, 'hello world', limit_project_ids) }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|projects notes blobs wiki_blobs commits issues merge_requests milestones|
end
end
# frozen_string_literal: true
RSpec.shared_examples 'does not hit Elasticsearch twice for objects and counts' do |scopes|
scopes.each do |scope|
context "for scope #{scope}", :elastic, :request_store do
it 'makes 1 Elasticsearch query' do
::Gitlab::SafeRequestStore.clear!
results.objects(scope)
results.public_send("#{scope}_count")
expect(::Gitlab::Instrumentation::ElasticsearchTransport.get_request_count).to eq(1)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment