Commit d21e5bca authored by Brett Walker's avatar Brett Walker Committed by Steve Abrams

Enable label reference caching

to reduce SQL queries

Changelog: performance
parent fa007604
...@@ -278,13 +278,15 @@ RSpec.describe Banzai::Filter::References::EpicReferenceFilter do ...@@ -278,13 +278,15 @@ RSpec.describe Banzai::Filter::References::EpicReferenceFilter do
reference_filter(markdown, context) reference_filter(markdown, context)
end.count end.count
expect(control_count).to eq 1
markdown = "#{epic.to_reference} #{epic.group.full_path}&9999991 #{epic.group.full_path}&9999992 &9999993 #{epic2.to_reference(group)} #{epic2.group.full_path}&9999991 something/cool&12" markdown = "#{epic.to_reference} #{epic.group.full_path}&9999991 #{epic.group.full_path}&9999992 &9999993 #{epic2.to_reference(group)} #{epic2.group.full_path}&9999991 something/cool&12"
# Since we're not batching queries across groups, # Since we're not batching queries across groups,
# we have to account for that. # we have to account for that.
# 1 for both groups, 1 for epics in each group == 3 # 1 for both groups, 1 for epics in each group == 3
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359 # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
max_count = control_count + 1 max_count = control_count + 2
expect do expect do
reference_filter(markdown, context) reference_filter(markdown, context)
......
...@@ -8,21 +8,57 @@ module Banzai ...@@ -8,21 +8,57 @@ module Banzai
self.reference_type = :label self.reference_type = :label
self.object_class = Label self.object_class = Label
def parent_records(parent, ids)
return Label.none unless parent.is_a?(Project) || parent.is_a?(Group)
labels = find_labels(parent)
label_ids = ids.map {|y| y[:label_id]}.compact
label_names = ids.map {|y| y[:label_name]}.compact
id_relation = labels.where(id: label_ids)
label_relation = labels.where(title: label_names)
Label.from_union([id_relation, label_relation])
end
def find_object(parent_object, id) def find_object(parent_object, id)
find_labels(parent_object).find(id) key = reference_cache.records_per_parent[parent_object].keys.find do |k|
k[:label_id] == id[:label_id] || k[:label_name] == id[:label_name]
end
reference_cache.records_per_parent[parent_object][key] if key
end
# Transform a symbol extracted from the text to a meaningful value
#
# This method has the contract that if a string `ref` refers to a
# record `record`, then `parse_symbol(ref) == record_identifier(record)`.
#
# This contract is slightly broken here, as we only have either the label_id
# or the label_name, but not both. But below, we have both pieces of information.
# But it's accounted for in `find_object`
def parse_symbol(symbol, match_data)
{ label_id: match_data[:label_id]&.to_i, label_name: match_data[:label_name]&.tr('"', '') }
end
# We assume that most classes are identifying records by ID.
#
# This method has the contract that if a string `ref` refers to a
# record `record`, then `class.parse_symbol(ref) == record_identifier(record)`.
# See note in `parse_symbol` above
def record_identifier(record)
{ label_id: record.id, label_name: record.title }
end end
def references_in(text, pattern = Label.reference_pattern) def references_in(text, pattern = Label.reference_pattern)
labels = {} labels = {}
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
namespace = $~[:namespace] unescaped_html = unescape_html_entities(text).gsub(pattern).with_index do |match, index|
project = $~[:project] ident = identifier($~)
project_path = reference_cache.full_project_path(namespace, project) label = yield match, ident, $~[:project], $~[:namespace], $~
label = find_label_cached(project_path, $~[:label_id], $~[:label_name])
if label != match
if label labels[index] = label
labels[label.id] = yield match, label.id, project, namespace, $~ "#{REFERENCE_PLACEHOLDER}#{index}"
"#{REFERENCE_PLACEHOLDER}#{label.id}"
else else
match match
end end
...@@ -33,20 +69,6 @@ module Banzai ...@@ -33,20 +69,6 @@ module Banzai
escape_with_placeholders(unescaped_html, labels) escape_with_placeholders(unescaped_html, labels)
end end
def find_label_cached(parent_ref, label_id, label_name)
cached_call(:banzai_find_label_cached, label_name&.tr('"', '') || label_id, path: [object_class, parent_ref]) do
find_label(parent_ref, label_id, label_name)
end
end
def find_label(parent_ref, label_id, label_name)
parent = parent_from_ref(parent_ref)
return unless parent
label_params = label_params(label_id, label_name)
find_labels(parent).find_by(label_params)
end
def find_labels(parent) def find_labels(parent)
params = if parent.is_a?(Group) params = if parent.is_a?(Group)
{ group_id: parent.id, { group_id: parent.id,
...@@ -60,21 +82,6 @@ module Banzai ...@@ -60,21 +82,6 @@ module Banzai
LabelsFinder.new(nil, params).execute(skip_authorization: true) LabelsFinder.new(nil, params).execute(skip_authorization: true)
end end
# Parameters to pass to `Label.find_by` based on the given arguments
#
# id - Integer ID to pass. If present, returns {id: id}
# name - String name to pass. If `id` is absent, finds by name without
# surrounding quotes.
#
# Returns a Hash.
def label_params(id, name)
if name
{ name: name.tr('"', '') }
else
{ id: id.to_i }
end
end
def url_for_object(label, parent) def url_for_object(label, parent)
label_url_method = label_url_method =
if context[:label_url_method] if context[:label_url_method]
...@@ -121,6 +128,14 @@ module Banzai ...@@ -121,6 +128,14 @@ module Banzai
presenter = object.present(issuable_subject: project || group) presenter = object.present(issuable_subject: project || group)
LabelsHelper.label_tooltip_title(presenter) LabelsHelper.label_tooltip_title(presenter)
end end
def parent
project || group
end
def requires_unescaping?
true
end
end end
end end
end end
......
...@@ -29,15 +29,15 @@ module Banzai ...@@ -29,15 +29,15 @@ module Banzai
refs = Hash.new { |hash, key| hash[key] = Set.new } refs = Hash.new { |hash, key| hash[key] = Set.new }
nodes.each do |node| nodes.each do |node|
node.to_html.scan(regex) do prepare_node_for_scan(node).scan(regex) do
path = if parent_type == :project parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project]) full_project_path($~[:namespace], $~[:project])
else else
full_group_path($~[:group]) full_group_path($~[:group])
end end
ident = filter.identifier($~) ident = filter.identifier($~)
refs[path] << ident if ident refs[parent_path] << ident if ident
end end
end end
...@@ -55,9 +55,23 @@ module Banzai ...@@ -55,9 +55,23 @@ module Banzai
@per_reference ||= {} @per_reference ||= {}
@per_reference[parent_type] ||= begin @per_reference[parent_type] ||= begin
refs = references_per_parent.keys.to_set refs = references_per_parent.keys
parent_ref = {}
find_for_paths(refs.to_a).index_by(&:full_path) # if we already have a parent, no need to query it again
refs.each do |ref|
next unless ref
if context[:project]&.full_path == ref
parent_ref[ref] = context[:project]
elsif context[:group]&.full_path == ref
parent_ref[ref] = context[:group]
end
refs -= [ref] if parent_ref[ref]
end
find_for_paths(refs).index_by(&:full_path).merge(parent_ref)
end end
end end
...@@ -87,7 +101,7 @@ module Banzai ...@@ -87,7 +101,7 @@ module Banzai
@_records_per_project[filter.object_class.to_s.underscore] @_records_per_project[filter.object_class.to_s.underscore]
end end
def relation_for_paths(paths) def objects_for_paths(paths)
klass = parent_type.to_s.camelize.constantize klass = parent_type.to_s.camelize.constantize
result = klass.where_full_path_in(paths) result = klass.where_full_path_in(paths)
return result if parent_type == :group return result if parent_type == :group
...@@ -102,7 +116,7 @@ module Banzai ...@@ -102,7 +116,7 @@ module Banzai
to_query = paths - cache.keys to_query = paths - cache.keys
unless to_query.empty? unless to_query.empty?
records = relation_for_paths(to_query) records = objects_for_paths(to_query)
found = [] found = []
records.each do |record| records.each do |record|
...@@ -119,7 +133,7 @@ module Banzai ...@@ -119,7 +133,7 @@ module Banzai
cache.slice(*paths).values.compact cache.slice(*paths).values.compact
else else
relation_for_paths(paths) objects_for_paths(paths)
end end
end end
...@@ -170,6 +184,16 @@ module Banzai ...@@ -170,6 +184,16 @@ module Banzai
def refs_cache def refs_cache
Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {} Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {}
end end
def prepare_node_for_scan(node)
html = node.to_html
filter.requires_unescaping? ? unescape_html_entities(html) : html
end
def unescape_html_entities(text)
CGI.unescapeHTML(text.to_s)
end
end end
end end
end end
......
...@@ -109,6 +109,10 @@ module Banzai ...@@ -109,6 +109,10 @@ module Banzai
context[:group] context[:group]
end end
def requires_unescaping?
false
end
private private
# Returns a data attribute String to attach to a reference link # Returns a data attribute String to attach to a reference link
......
...@@ -702,4 +702,72 @@ RSpec.describe Banzai::Filter::References::LabelReferenceFilter do ...@@ -702,4 +702,72 @@ RSpec.describe Banzai::Filter::References::LabelReferenceFilter do
expect(result.css('a').first.text).to eq "#{label.name} in #{project.full_name}" expect(result.css('a').first.text).to eq "#{label.name} in #{project.full_name}"
end end
end end
context 'checking N+1' do
let_it_be(:group) { create(:group) }
let_it_be(:group2) { create(:group) }
let_it_be(:project) { create(:project, :public, namespace: group) }
let_it_be(:project2) { create(:project, :public, namespace: group2) }
let_it_be(:project3) { create(:project, :public) }
let_it_be(:project_label) { create(:label, project: project) }
let_it_be(:project_label2) { create(:label, project: project) }
let_it_be(:project2_label) { create(:label, project: project2) }
let_it_be(:group2_label) { create(:group_label, group: group2, color: '#00ff00') }
let_it_be(:project_reference) { "#{project_label.to_reference}" }
let_it_be(:project_reference2) { "#{project_label2.to_reference}" }
let_it_be(:project2_reference) { "#{project2_label.to_reference}" }
let_it_be(:group2_reference) { "#{project2.full_path}~#{group2_label.name}" }
it 'does not have N+1 per multiple references per project', :use_sql_query_cache do
markdown = "#{project_reference}"
control_count = 1
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(control_count)
markdown = "#{project_reference} ~qwert ~werty ~ertyu ~rtyui #{project_reference2}"
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(control_count)
end
it 'has N+1 for multiple unique project/group references', :use_sql_query_cache do
# reference to already loaded project, only one query
markdown = "#{project_reference}"
control_count = 1
expect do
reference_filter(markdown, project: project)
end.not_to exceed_all_query_limit(control_count)
# Since we're not batching label queries across projects/groups,
# queries increase when a new project/group is added.
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
# first reference to already loaded project (1),
# second reference requires project and namespace (2), and label (1)
markdown = "#{project_reference} #{group2_reference}"
max_count = control_count + 3
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(max_count)
# third reference to already queried project/namespace, nothing extra (no N+1 here)
markdown = "#{project_reference} #{group2_reference} #{project2_reference}"
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(max_count)
# last reference needs another namespace and label query (2)
markdown = "#{project_reference} #{group2_reference} #{project2_reference} #{project3.full_path}~test_label"
max_count += 2
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(max_count)
end
end
end end
...@@ -55,11 +55,12 @@ RSpec.describe Banzai::Filter::References::ReferenceCache do ...@@ -55,11 +55,12 @@ RSpec.describe Banzai::Filter::References::ReferenceCache do
cache_single.load_records_per_parent cache_single.load_records_per_parent
end.count end.count
expect(control_count).to eq 1
# Since this is an issue filter that is not batching issue queries # Since this is an issue filter that is not batching issue queries
# across projects, we have to account for that. # across projects, we have to account for that.
# 1 for both projects, 1 for issues in each project == 3 # 1 for original issue, 2 for second route/project, 1 for other issue
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359 max_count = control_count + 3
max_count = control_count + 1
expect do expect do
cache.load_references_per_parent(filter.nodes) cache.load_references_per_parent(filter.nodes)
......
...@@ -233,13 +233,15 @@ RSpec.describe Banzai::Filter::References::SnippetReferenceFilter do ...@@ -233,13 +233,15 @@ RSpec.describe Banzai::Filter::References::SnippetReferenceFilter do
reference_filter(markdown) reference_filter(markdown)
end.count end.count
expect(control_count).to eq 1
markdown = "#{reference} $9999990 $9999991 $9999992 $9999993 #{reference2} something/cool$12" markdown = "#{reference} $9999990 $9999991 $9999992 $9999993 #{reference2} something/cool$12"
# Since we're not batching snippet queries across projects, # Since we're not batching snippet queries across projects,
# we have to account for that. # we have to account for that.
# 1 for both projects, 1 for snippets in each project == 3 # 1 for both projects, 1 for snippets in each project == 3
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359 # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
max_count = control_count + 1 max_count = control_count + 2
expect do expect do
reference_filter(markdown) reference_filter(markdown)
......
...@@ -600,9 +600,8 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do ...@@ -600,9 +600,8 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
setup_import_export_config('light') setup_import_export_config('light')
setup_reader(reader) setup_reader(reader)
expect(project) expect(project).to receive(:merge_requests).and_call_original
.to receive(:merge_requests) expect(project).to receive(:merge_requests).and_raise(exception)
.and_raise(exception)
end end
it 'report post import error' do it 'report post import error' do
...@@ -618,12 +617,9 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do ...@@ -618,12 +617,9 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
setup_import_export_config('light') setup_import_export_config('light')
setup_reader(reader) setup_reader(reader)
expect(project) expect(project).to receive(:merge_requests).and_call_original
.to receive(:merge_requests) expect(project).to receive(:merge_requests).and_raise(exception)
.and_raise(exception) expect(project).to receive(:merge_requests).and_call_original
expect(project)
.to receive(:merge_requests)
.and_call_original
expect(restored_project_json).to eq(true) expect(restored_project_json).to eq(true)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment