Commit cb4316d3 authored by Arturo Herrero's avatar Arturo Herrero

Merge branch '299099-update-es-counts-query' into 'master'

Improve Advanced Search counts queries

See merge request gitlab-org/gitlab!51971
parents 8017511f 458e9039
---
title: Improve Advanced Search counts queries
merge_request: 51971
author:
type: performance
......@@ -54,15 +54,14 @@ module Elastic
}
end
def basic_query_hash(fields, query)
def basic_query_hash(fields, query, count_only: false)
fields = CustomLanguageAnalyzers.add_custom_analyzers_fields(fields)
fields = remove_fields_boost(fields) if count_only
query_hash =
if query.present?
{
query: {
bool: {
must: [{
simple_query_string = {
simple_query_string: {
_name: context.name(self.es_type, :match, :search_terms),
fields: fields,
......@@ -70,8 +69,11 @@ module Elastic
lenient: true,
default_operator: default_operator
}
}],
filter: [{
}
must = []
filter = [{
term: {
type: {
_name: context.name(:doc, :is_a, self.es_type),
......@@ -79,6 +81,18 @@ module Elastic
}
}
}]
if count_only
filter << simple_query_string
else
must << simple_query_string
end
{
query: {
bool: {
must: must,
filter: filter
}
}
}
......@@ -93,7 +107,11 @@ module Elastic
}
end
if count_only
query_hash[:size] = 0
else
query_hash[:highlight] = highlight_options(fields)
end
query_hash
end
......@@ -168,6 +186,10 @@ module Elastic
end
end
def remove_fields_boost(fields)
fields.map { |m| m.split('^').first }
end
# Builds an elasticsearch query that will select projects the user is
# granted access to.
#
......
......@@ -76,7 +76,7 @@ module Elastic
query_hash = {
query: { bool: bool_expr },
size: per,
size: (options[:count_only] ? 0 : per),
from: per * (page - 1),
sort: [:_score]
}
......@@ -91,14 +91,13 @@ module Elastic
bool_expr[:must] = { match_all: {} }
query_hash[:track_scores] = true
else
bool_expr[:must] = {
simple_query_string: {
_name: context.name(:commit, :match, :search_terms),
bool_expr = apply_simple_query_string(
name: context.name(:commit, :match, :search_terms),
fields: fields,
query: query_with_prefix,
default_operator: :and
}
}
bool_expr: bool_expr,
count_only: options[:count_only]
)
end
# add the document type filter
......@@ -117,7 +116,7 @@ module Elastic
options[:order] = :default if options[:order].blank?
if options[:highlight]
if options[:highlight] && !options[:count_only]
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
memo[field.to_sym] = {}
end
......@@ -150,20 +149,18 @@ module Elastic
bool_expr = ::Gitlab::Elastic::BoolExpr.new
query_hash = {
query: { bool: bool_expr },
size: per,
size: (options[:count_only] ? 0 : per),
from: per * (page - 1),
sort: [:_score]
}
# add the term matching
bool_expr[:must] = {
simple_query_string: {
_name: context.name(:blob, :match, :search_terms),
bool_expr = apply_simple_query_string(
name: context.name(:blob, :match, :search_terms),
query: query.term,
default_operator: :and,
fields: %w[blob.content blob.file_name blob.path]
}
}
fields: %w[blob.content blob.file_name blob.path],
bool_expr: bool_expr,
count_only: options[:count_only]
)
# If there is a :current_user set in the `options`, we can assume
# we need to do a project visibility check.
......@@ -192,7 +189,7 @@ module Elastic
options[:order] = :default if options[:order].blank?
if options[:highlight]
if options[:highlight] && !options[:count_only]
query_hash[:highlight] = {
pre_tags: [HIGHLIGHT_START_TAG],
post_tags: [HIGHLIGHT_END_TAG],
......@@ -265,6 +262,27 @@ module Elastic
def project_id_for_commit_or_blob(result, type)
result.dig('_source', 'project_id') || result.dig('_source', type, 'rid').to_i
end
def apply_simple_query_string(name:, fields:, query:, bool_expr:, count_only:)
fields = remove_fields_boost(fields) if count_only
simple_query_string = {
simple_query_string: {
_name: name,
fields: fields,
query: query,
default_operator: :and
}
}
bool_expr.tap do |expr|
if count_only
expr[:filter] << simple_query_string
else
expr[:must] = simple_query_string
end
end
end
end
end
end
......@@ -12,8 +12,8 @@ module Elastic
else
# iid field can be added here as lenient option will
# pardon format errors, like integer out of range.
fields = %w(iid^3 title^2 description)
basic_query_hash(fields, query)
fields = %w[iid^3 title^2 description]
basic_query_hash(fields, query, count_only: options[:count_only])
end
options[:features] = 'issues'
......
......@@ -12,8 +12,9 @@ module Elastic
else
# iid field can be added here as lenient option will
# pardon format errors, like integer out of range.
fields = %w(iid^3 title^2 description)
basic_query_hash(fields, query)
fields = %w[iid^3 title^2 description]
basic_query_hash(fields, query, count_only: options[:count_only])
end
options[:features] = 'merge_requests'
......
......@@ -4,9 +4,9 @@ module Elastic
module Latest
class MilestoneClassProxy < ApplicationClassProxy
def elastic_search(query, options: {})
options[:in] = %w(title^2 description)
options[:in] = %w[title^2 description]
query_hash = basic_query_hash(options[:in], query)
query_hash = basic_query_hash(options[:in], query, count_only: options[:count_only])
query_hash = context.name(:milestone, :related) { project_ids_filter(query_hash, options) }
search(query_hash, options)
......
......@@ -11,14 +11,14 @@ module Elastic
def elastic_search(query, options: {})
options[:in] = ['note']
query_hash = basic_query_hash(%w[note], query)
query_hash = basic_query_hash(%w[note], query, count_only: options[:count_only])
context.name(:note) do
query_hash = context.name(:authorized) { project_ids_filter(query_hash, options) }
query_hash = context.name(:confidentiality) { confidentiality_filter(query_hash, options) }
end
query_hash[:highlight] = highlight_options(options[:in])
query_hash[:highlight] = highlight_options(options[:in]) unless options[:count_only]
search(query_hash, options)
end
......
......@@ -4,9 +4,9 @@ module Elastic
module Latest
class ProjectClassProxy < ApplicationClassProxy
def elastic_search(query, options: {})
options[:in] = %w(name^10 name_with_namespace^2 path_with_namespace path^9 description)
options[:in] = %w[name^10 name_with_namespace^2 path_with_namespace path^9 description]
query_hash = basic_query_hash(options[:in], query)
query_hash = basic_query_hash(options[:in], query, count_only: options[:count_only])
filters = [{ terms: { _name: context.name(:doc, :is_a, es_type), type: [es_type] } }]
......
......@@ -8,8 +8,8 @@ module Elastic
delegate :project, to: :target
delegate :id, to: :project, prefix: true
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, preload_method: nil)
response = elastic_search(query, type: 'commit', page: page, per: per_page)[:commits][:results]
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {}, preload_method: nil)
response = elastic_search(query, type: 'commit', options: options, page: page, per: per_page)[:commits][:results]
commits = response.map do |result|
commit result["_source"]["commit"]["sha"]
......
......@@ -17,29 +17,31 @@ module Gitlab
private
def blobs(page: 1, per_page: DEFAULT_PER_PAGE)
def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project)
return Kaminari.paginate_array([]) if project.empty_repo? || query.blank?
return Kaminari.paginate_array([]) unless root_ref?
strong_memoize(:blobs) do
strong_memoize(memoize_key(:blobs, count_only: count_only)) do
project.repository.__elasticsearch__.elastic_search_as_found_blob(
query,
page: (page || 1).to_i,
per: per_page
per: per_page,
options: { count_only: count_only }
)
end
end
def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE)
def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :read_wiki, project)
if project.wiki_enabled? && !project.wiki.empty? && query.present?
strong_memoize(:wiki_blobs) do
strong_memoize(memoize_key(:wiki_blobs, count_only: count_only)) do
project.wiki.__elasticsearch__.elastic_search_as_wiki_page(
query,
page: (page || 1).to_i,
per: per_page
per: per_page,
options: { count_only: count_only }
)
end
else
......@@ -47,19 +49,20 @@ module Gitlab
end
end
def notes
strong_memoize(:notes) do
def notes(count_only: false)
strong_memoize(memoize_key(:notes, count_only: count_only)) do
opt = {
project_ids: limit_project_ids,
current_user: @current_user,
public_and_internal_projects: @public_and_internal_projects
public_and_internal_projects: @public_and_internal_projects,
count_only: count_only
}
Note.elastic_search(query, options: opt)
end
end
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil, count_only: false)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project)
if project.empty_repo? || query.blank?
......@@ -67,12 +70,13 @@ module Gitlab
else
# We use elastic for default branch only
if root_ref?
strong_memoize(:commits) do
strong_memoize(memoize_key(:commits, count_only: count_only)) do
project.repository.find_commits_by_message_with_elastic(
query,
page: (page || 1).to_i,
per_page: per_page,
preload_method: preload_method
preload_method: preload_method,
options: { count_only: count_only }
)
end
else
......
......@@ -89,35 +89,67 @@ module Gitlab
end
def projects_count
@projects_count ||= projects.total_count
@projects_count ||= if strong_memoized?(:projects)
projects.total_count
else
projects(count_only: true).total_count
end
end
def notes_count
@notes_count ||= notes.total_count
@notes_count ||= if strong_memoized?(:notes)
notes.total_count
else
notes(count_only: true).total_count
end
end
def blobs_count
@blobs_count ||= blobs.total_count
@blobs_count ||= if strong_memoized?(:blobs)
blobs.total_count
else
blobs(count_only: true).total_count
end
end
def wiki_blobs_count
@wiki_blobs_count ||= wiki_blobs.total_count
@wiki_blobs_count ||= if strong_memoized?(:wiki_blobs)
wiki_blobs.total_count
else
wiki_blobs(count_only: true).total_count
end
end
def commits_count
@commits_count ||= commits.total_count
@commits_count ||= if strong_memoized?(:commits)
commits.total_count
else
commits(count_only: true).total_count
end
end
def issues_count
@issues_count ||= issues.total_count
@issues_count ||= if strong_memoized?(:issues)
issues.total_count
else
issues(count_only: true).total_count
end
end
def merge_requests_count
@merge_requests_count ||= merge_requests.total_count
@merge_requests_count ||= if strong_memoized?(:merge_requests)
merge_requests.total_count
else
merge_requests(count_only: true).total_count
end
end
def milestones_count
@milestones_count ||= milestones.total_count
@milestones_count ||= if strong_memoized?(:milestones)
milestones.total_count
else
milestones(count_only: true).total_count
end
end
# mbergeron: these aliases act as an adapter to the Gitlab::SearchResults
......@@ -208,69 +240,77 @@ module Gitlab
}
end
def projects
strong_memoize(:projects) do
Project.elastic_search(query, options: base_options)
def scope_options(scope)
case scope
when :merge_requests
base_options.merge(filters.slice(:order_by, :sort, :state))
when :issues
base_options.merge(filters.slice(:order_by, :sort, :confidential, :state))
when :milestones
# Must pass 'issues' and 'merge_requests' to check
# if any of the features is available for projects in ApplicationClassProxy#project_ids_query
# Otherwise it will ignore project_ids and return milestones
# from projects with milestones disabled.
base_options.merge(features: [:issues, :merge_requests])
else
base_options
end
end
def issues
strong_memoize(:issues) do
options = base_options.merge(filters.slice(:order_by, :sort, :confidential, :state))
def scope_results(scope, klass, count_only:)
options = scope_options(scope).merge(count_only: count_only)
Issue.elastic_search(query, options: options)
strong_memoize(memoize_key(scope, count_only: count_only)) do
klass.elastic_search(query, options: options)
end
end
def milestones
strong_memoize(:milestones) do
# Must pass 'issues' and 'merge_requests' to check
# if any of the features is available for projects in ApplicationClassProxy#project_ids_query
# Otherwise it will ignore project_ids and return milestones
# from projects with milestones disabled.
options = base_options
options[:features] = [:issues, :merge_requests]
Milestone.elastic_search(query, options: options)
end
def memoize_key(scope, count_only:)
count_only ? "#{scope}_results_count".to_sym : scope
end
def merge_requests
strong_memoize(:merge_requests) do
options = base_options.merge(filters.slice(:order_by, :sort, :state))
def projects(count_only: false)
scope_results :projects, Project, count_only: count_only
end
MergeRequest.elastic_search(query, options: options)
def issues(count_only: false)
scope_results :issues, Issue, count_only: count_only
end
def milestones(count_only: false)
scope_results :milestones, Milestone, count_only: count_only
end
def notes
strong_memoize(:notes) do
Note.elastic_search(query, options: base_options)
def merge_requests(count_only: false)
scope_results :merge_requests, MergeRequest, count_only: count_only
end
def notes(count_only: false)
scope_results :notes, Note, count_only: count_only
end
def blobs(page: 1, per_page: DEFAULT_PER_PAGE)
def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false)
return Kaminari.paginate_array([]) if query.blank?
strong_memoize(:blobs) do
strong_memoize(memoize_key(:blobs, count_only: count_only)) do
Repository.__elasticsearch__.elastic_search_as_found_blob(
query,
page: (page || 1).to_i,
per: per_page,
options: base_options
options: base_options.merge(count_only: count_only)
)
end
end
def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE)
def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false)
return Kaminari.paginate_array([]) if query.blank?
strong_memoize(:wiki_blobs) do
strong_memoize(memoize_key(:wiki_blobs, count_only: count_only)) do
ProjectWiki.__elasticsearch__.elastic_search_as_wiki_page(
query,
page: (page || 1).to_i,
per: per_page,
options: base_options
options: base_options.merge(count_only: count_only)
)
end
end
......@@ -281,15 +321,15 @@ module Gitlab
# hitting ES twice for any page that's not page 1, and that's something we want to avoid.
#
# It is safe to memoize the page we get here because this method is _always_ called before `#commits_count`
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil, count_only: false)
return Kaminari.paginate_array([]) if query.blank?
strong_memoize(:commits) do
strong_memoize(memoize_key(:commits, count_only: count_only)) do
Repository.find_commits_by_message_with_elastic(
query,
page: (page || 1).to_i,
per_page: per_page,
options: base_options,
options: base_options.merge(count_only: count_only),
preload_method: preload_method
)
end
......
......@@ -50,6 +50,7 @@ RSpec.describe Gitlab::Elastic::GroupSearchResults, :elastic do
end
context 'query performance' do
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|projects notes blobs wiki_blobs commits issues merge_requests milestones|
include_examples 'does not hit Elasticsearch twice for objects and counts', %w[projects notes blobs wiki_blobs commits issues merge_requests milestones]
include_examples 'does not load results for count only queries', %w[projects notes blobs wiki_blobs commits issues merge_requests milestones]
end
end
......@@ -180,6 +180,7 @@ RSpec.describe Gitlab::Elastic::ProjectSearchResults, :elastic do
create(:wiki_page, wiki: project.wiki)
end
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|notes blobs wiki_blobs commits issues merge_requests milestones|
include_examples 'does not hit Elasticsearch twice for objects and counts', %w[notes blobs wiki_blobs commits issues merge_requests milestones]
include_examples 'does not load results for count only queries', %w[notes blobs wiki_blobs commits issues merge_requests milestones]
end
end
......@@ -272,6 +272,15 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need
expect(results.issues_count).to eq 2
end
it 'executes count only queries' do
results = described_class.new(user, 'hello world', limit_project_ids)
expect(results).to receive(:issues).with(count_only: true).and_call_original
count = results.issues_count
expect(count).to eq(2)
end
context 'filtering' do
let!(:project) { create(:project, :public) }
let!(:closed_result) { create(:issue, :closed, project: project, title: 'foo closed') }
......@@ -1374,6 +1383,7 @@ RSpec.describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need
context 'query performance' do
let(:results) { described_class.new(user, 'hello world', limit_project_ids) }
include_examples 'does not hit Elasticsearch twice for objects and counts', %w|projects notes blobs wiki_blobs commits issues merge_requests milestones|
include_examples 'does not hit Elasticsearch twice for objects and counts', %w[projects notes blobs wiki_blobs commits issues merge_requests milestones]
include_examples 'does not load results for count only queries', %w[projects notes blobs wiki_blobs commits issues merge_requests milestones]
end
end
......@@ -18,3 +18,29 @@ RSpec.shared_examples 'does not hit Elasticsearch twice for objects and counts'
end
end
end
RSpec.shared_examples 'does not load results for count only queries' do |scopes|
scopes.each do |scope|
before do
allow(::Gitlab::PerformanceBar).to receive(:enabled_for_request?).and_return(true)
end
context "for scope #{scope}", :elastic, :request_store do
it 'makes count query' do
# We want to warm the cache for checking migrations have run since we
# don't want to count these requests as searches
allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
warm_elasticsearch_migrations_cache!
::Gitlab::SafeRequestStore.clear!
results.public_send("#{scope}_count")
request = ::Gitlab::Instrumentation::ElasticsearchTransport.detail_store.first
expect(request.dig(:body, :size)).to eq(0)
expect(request.dig(:body, :query, :bool, :must)).to be_blank
expect(request[:highlight]).to be_blank
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment