Commit d21e5bca authored by Brett Walker's avatar Brett Walker Committed by Steve Abrams

Enable label reference caching

to reduce SQL queries

Changelog: performance
parent fa007604
......@@ -278,13 +278,15 @@ RSpec.describe Banzai::Filter::References::EpicReferenceFilter do
reference_filter(markdown, context)
end.count
expect(control_count).to eq 1
markdown = "#{epic.to_reference} #{epic.group.full_path}&9999991 #{epic.group.full_path}&9999992 &9999993 #{epic2.to_reference(group)} #{epic2.group.full_path}&9999991 something/cool&12"
# Since we're not batching queries across groups,
# we have to account for that.
# 1 for both groups, 1 for epics in each group == 3
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
max_count = control_count + 1
max_count = control_count + 2
expect do
reference_filter(markdown, context)
......
......@@ -8,21 +8,57 @@ module Banzai
self.reference_type = :label
self.object_class = Label
def parent_records(parent, ids)
return Label.none unless parent.is_a?(Project) || parent.is_a?(Group)
labels = find_labels(parent)
label_ids = ids.map {|y| y[:label_id]}.compact
label_names = ids.map {|y| y[:label_name]}.compact
id_relation = labels.where(id: label_ids)
label_relation = labels.where(title: label_names)
Label.from_union([id_relation, label_relation])
end
def find_object(parent_object, id)
find_labels(parent_object).find(id)
key = reference_cache.records_per_parent[parent_object].keys.find do |k|
k[:label_id] == id[:label_id] || k[:label_name] == id[:label_name]
end
reference_cache.records_per_parent[parent_object][key] if key
end
# Transform a symbol extracted from the text to a meaningful value
#
# This method has the contract that if a string `ref` refers to a
# record `record`, then `parse_symbol(ref) == record_identifier(record)`.
#
# This contract is slightly broken here, as we only have either the label_id
# or the label_name, but not both. But below, we have both pieces of information.
# But it's accounted for in `find_object`
def parse_symbol(symbol, match_data)
{ label_id: match_data[:label_id]&.to_i, label_name: match_data[:label_name]&.tr('"', '') }
end
# We assume that most classes are identifying records by ID.
#
# This method has the contract that if a string `ref` refers to a
# record `record`, then `class.parse_symbol(ref) == record_identifier(record)`.
# See note in `parse_symbol` above
def record_identifier(record)
{ label_id: record.id, label_name: record.title }
end
def references_in(text, pattern = Label.reference_pattern)
labels = {}
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
namespace = $~[:namespace]
project = $~[:project]
project_path = reference_cache.full_project_path(namespace, project)
label = find_label_cached(project_path, $~[:label_id], $~[:label_name])
if label
labels[label.id] = yield match, label.id, project, namespace, $~
"#{REFERENCE_PLACEHOLDER}#{label.id}"
unescaped_html = unescape_html_entities(text).gsub(pattern).with_index do |match, index|
ident = identifier($~)
label = yield match, ident, $~[:project], $~[:namespace], $~
if label != match
labels[index] = label
"#{REFERENCE_PLACEHOLDER}#{index}"
else
match
end
......@@ -33,20 +69,6 @@ module Banzai
escape_with_placeholders(unescaped_html, labels)
end
def find_label_cached(parent_ref, label_id, label_name)
cached_call(:banzai_find_label_cached, label_name&.tr('"', '') || label_id, path: [object_class, parent_ref]) do
find_label(parent_ref, label_id, label_name)
end
end
def find_label(parent_ref, label_id, label_name)
parent = parent_from_ref(parent_ref)
return unless parent
label_params = label_params(label_id, label_name)
find_labels(parent).find_by(label_params)
end
def find_labels(parent)
params = if parent.is_a?(Group)
{ group_id: parent.id,
......@@ -60,21 +82,6 @@ module Banzai
LabelsFinder.new(nil, params).execute(skip_authorization: true)
end
# Parameters to pass to `Label.find_by` based on the given arguments
#
# id - Integer ID to pass. If present, returns {id: id}
# name - String name to pass. If `id` is absent, finds by name without
# surrounding quotes.
#
# Returns a Hash.
def label_params(id, name)
if name
{ name: name.tr('"', '') }
else
{ id: id.to_i }
end
end
def url_for_object(label, parent)
label_url_method =
if context[:label_url_method]
......@@ -121,6 +128,14 @@ module Banzai
presenter = object.present(issuable_subject: project || group)
LabelsHelper.label_tooltip_title(presenter)
end
def parent
project || group
end
def requires_unescaping?
true
end
end
end
end
......
......@@ -29,15 +29,15 @@ module Banzai
refs = Hash.new { |hash, key| hash[key] = Set.new }
nodes.each do |node|
node.to_html.scan(regex) do
path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
prepare_node_for_scan(node).scan(regex) do
parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[path] << ident if ident
refs[parent_path] << ident if ident
end
end
......@@ -55,9 +55,23 @@ module Banzai
@per_reference ||= {}
@per_reference[parent_type] ||= begin
refs = references_per_parent.keys.to_set
refs = references_per_parent.keys
parent_ref = {}
find_for_paths(refs.to_a).index_by(&:full_path)
# if we already have a parent, no need to query it again
refs.each do |ref|
next unless ref
if context[:project]&.full_path == ref
parent_ref[ref] = context[:project]
elsif context[:group]&.full_path == ref
parent_ref[ref] = context[:group]
end
refs -= [ref] if parent_ref[ref]
end
find_for_paths(refs).index_by(&:full_path).merge(parent_ref)
end
end
......@@ -87,7 +101,7 @@ module Banzai
@_records_per_project[filter.object_class.to_s.underscore]
end
def relation_for_paths(paths)
def objects_for_paths(paths)
klass = parent_type.to_s.camelize.constantize
result = klass.where_full_path_in(paths)
return result if parent_type == :group
......@@ -102,7 +116,7 @@ module Banzai
to_query = paths - cache.keys
unless to_query.empty?
records = relation_for_paths(to_query)
records = objects_for_paths(to_query)
found = []
records.each do |record|
......@@ -119,7 +133,7 @@ module Banzai
cache.slice(*paths).values.compact
else
relation_for_paths(paths)
objects_for_paths(paths)
end
end
......@@ -170,6 +184,16 @@ module Banzai
def refs_cache
Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {}
end
def prepare_node_for_scan(node)
html = node.to_html
filter.requires_unescaping? ? unescape_html_entities(html) : html
end
def unescape_html_entities(text)
CGI.unescapeHTML(text.to_s)
end
end
end
end
......
......@@ -109,6 +109,10 @@ module Banzai
context[:group]
end
def requires_unescaping?
false
end
private
# Returns a data attribute String to attach to a reference link
......
......@@ -702,4 +702,72 @@ RSpec.describe Banzai::Filter::References::LabelReferenceFilter do
expect(result.css('a').first.text).to eq "#{label.name} in #{project.full_name}"
end
end
context 'checking N+1' do
let_it_be(:group) { create(:group) }
let_it_be(:group2) { create(:group) }
let_it_be(:project) { create(:project, :public, namespace: group) }
let_it_be(:project2) { create(:project, :public, namespace: group2) }
let_it_be(:project3) { create(:project, :public) }
let_it_be(:project_label) { create(:label, project: project) }
let_it_be(:project_label2) { create(:label, project: project) }
let_it_be(:project2_label) { create(:label, project: project2) }
let_it_be(:group2_label) { create(:group_label, group: group2, color: '#00ff00') }
let_it_be(:project_reference) { "#{project_label.to_reference}" }
let_it_be(:project_reference2) { "#{project_label2.to_reference}" }
let_it_be(:project2_reference) { "#{project2_label.to_reference}" }
let_it_be(:group2_reference) { "#{project2.full_path}~#{group2_label.name}" }
it 'does not have N+1 per multiple references per project', :use_sql_query_cache do
markdown = "#{project_reference}"
control_count = 1
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(control_count)
markdown = "#{project_reference} ~qwert ~werty ~ertyu ~rtyui #{project_reference2}"
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(control_count)
end
it 'has N+1 for multiple unique project/group references', :use_sql_query_cache do
# reference to already loaded project, only one query
markdown = "#{project_reference}"
control_count = 1
expect do
reference_filter(markdown, project: project)
end.not_to exceed_all_query_limit(control_count)
# Since we're not batching label queries across projects/groups,
# queries increase when a new project/group is added.
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
# first reference to already loaded project (1),
# second reference requires project and namespace (2), and label (1)
markdown = "#{project_reference} #{group2_reference}"
max_count = control_count + 3
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(max_count)
# third reference to already queried project/namespace, nothing extra (no N+1 here)
markdown = "#{project_reference} #{group2_reference} #{project2_reference}"
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(max_count)
# last reference needs another namespace and label query (2)
markdown = "#{project_reference} #{group2_reference} #{project2_reference} #{project3.full_path}~test_label"
max_count += 2
expect do
reference_filter(markdown)
end.not_to exceed_all_query_limit(max_count)
end
end
end
......@@ -55,11 +55,12 @@ RSpec.describe Banzai::Filter::References::ReferenceCache do
cache_single.load_records_per_parent
end.count
expect(control_count).to eq 1
# Since this is an issue filter that is not batching issue queries
# across projects, we have to account for that.
# 1 for both projects, 1 for issues in each project == 3
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
max_count = control_count + 1
# 1 for original issue, 2 for second route/project, 1 for other issue
max_count = control_count + 3
expect do
cache.load_references_per_parent(filter.nodes)
......
......@@ -233,13 +233,15 @@ RSpec.describe Banzai::Filter::References::SnippetReferenceFilter do
reference_filter(markdown)
end.count
expect(control_count).to eq 1
markdown = "#{reference} $9999990 $9999991 $9999992 $9999993 #{reference2} something/cool$12"
# Since we're not batching snippet queries across projects,
# we have to account for that.
# 1 for both projects, 1 for snippets in each project == 3
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/330359
max_count = control_count + 1
max_count = control_count + 2
expect do
reference_filter(markdown)
......
......@@ -600,9 +600,8 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
setup_import_export_config('light')
setup_reader(reader)
expect(project)
.to receive(:merge_requests)
.and_raise(exception)
expect(project).to receive(:merge_requests).and_call_original
expect(project).to receive(:merge_requests).and_raise(exception)
end
it 'report post import error' do
......@@ -618,12 +617,9 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
setup_import_export_config('light')
setup_reader(reader)
expect(project)
.to receive(:merge_requests)
.and_raise(exception)
expect(project)
.to receive(:merge_requests)
.and_call_original
expect(project).to receive(:merge_requests).and_call_original
expect(project).to receive(:merge_requests).and_raise(exception)
expect(project).to receive(:merge_requests).and_call_original
expect(restored_project_json).to eq(true)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment