Commit 373ab0c9 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'bw-reference-caching' into 'master'

Refactor Banzai reference filter caching

See merge request gitlab-org/gitlab!60566
parents faee533c 57921604
......@@ -43,8 +43,6 @@ module EE
end
# rubocop: enable CodeReuse/ActiveRecord
private
def parent_type
:group
end
......
......@@ -54,7 +54,7 @@ module EE
end
def parse_and_find_iteration(project_ref, namespace_ref, iteration_id, iteration_name)
project_path = full_project_path(namespace_ref, project_ref)
project_path = reference_cache.full_project_path(namespace_ref, project_ref)
# Returns group if project is not found by path
parent = parent_from_ref(project_path)
......
......@@ -4,12 +4,14 @@ module EE
module Banzai
module Filter
module References
module AbstractReferenceFilter
module ReferenceCache
extend ::Gitlab::Utils::Override
override :current_project_namespace_path
def current_project_namespace_path
@current_project_namespace_path ||= (project&.namespace || group)&.full_path
strong_memoize(:current_project_namespace_path) do
(project&.namespace || group)&.full_path
end
end
end
end
......
......@@ -53,8 +53,6 @@ module EE
record.id.to_i
end
private
def parent_type
:project
end
......
......@@ -17,7 +17,12 @@ module Banzai
return context[:project] || context[:group] unless ref
return context[:project] if context[:project]&.full_path == ref
Project.find_by_full_path(ref)
if reference_cache.cache_loaded?
# optimization to reuse the parent_per_reference query information
reference_cache.parent_per_reference[ref || reference_cache.current_parent_path]
else
Project.find_by_full_path(ref)
end
end
end
end
......@@ -8,6 +8,12 @@ module Banzai
class AbstractReferenceFilter < ReferenceFilter
include CrossProjectReference
def initialize(doc, context = nil, result = nil)
super
@reference_cache = ReferenceCache.new(self, context)
end
# REFERENCE_PLACEHOLDER is used for re-escaping HTML text except found
# reference (which we replace with placeholder during re-scaping). The
# random number helps ensure it's pretty close to unique. Since it's a
......@@ -112,6 +118,8 @@ module Banzai
def call
return doc unless project || group || user
reference_cache.load_reference_cache(nodes) if respond_to?(:parent_records)
ref_pattern = object_reference_pattern
link_pattern = object_class.link_reference_pattern
......@@ -174,9 +182,9 @@ module Banzai
def object_link_filter(text, pattern, link_content: nil, link_reference: false)
references_in(text, pattern) do |match, id, project_ref, namespace_ref, matches|
parent_path = if parent_type == :group
full_group_path(namespace_ref)
reference_cache.full_group_path(namespace_ref)
else
full_project_path(namespace_ref, project_ref)
reference_cache.full_project_path(namespace_ref, project_ref)
end
parent = from_ref_cached(parent_path)
......@@ -263,127 +271,6 @@ module Banzai
text
end
# Returns a Hash containing all object references (e.g. issue IDs) per the
# project they belong to.
def references_per_parent
@references_per ||= {}
@references_per[parent_type] ||= begin
refs = Hash.new { |hash, key| hash[key] = Set.new }
regex = [
object_class.link_reference_pattern,
object_class.reference_pattern
].compact.reduce { |a, b| Regexp.union(a, b) }
nodes.each do |node|
node.to_html.scan(regex) do
path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
if ident = identifier($~)
refs[path] << ident
end
end
end
refs
end
end
# Returns a Hash containing referenced projects grouped per their full
# path.
def parent_per_reference
@per_reference ||= {}
@per_reference[parent_type] ||= begin
refs = Set.new
references_per_parent.each do |ref, _|
refs << ref
end
find_for_paths(refs.to_a).index_by(&:full_path)
end
end
def relation_for_paths(paths)
klass = parent_type.to_s.camelize.constantize
result = klass.where_full_path_in(paths)
return result if parent_type == :group
result.includes(:namespace) if parent_type == :project
end
# Returns projects for the given paths.
def find_for_paths(paths)
if Gitlab::SafeRequestStore.active?
cache = refs_cache
to_query = paths - cache.keys
unless to_query.empty?
records = relation_for_paths(to_query)
found = []
records.each do |record|
ref = record.full_path
get_or_set_cache(cache, ref) { record }
found << ref
end
not_found = to_query - found
not_found.each do |ref|
get_or_set_cache(cache, ref) { nil }
end
end
cache.slice(*paths).values.compact
else
relation_for_paths(paths)
end
end
def current_parent_path
@current_parent_path ||= parent&.full_path
end
def current_project_namespace_path
@current_project_namespace_path ||= project&.namespace&.full_path
end
def records_per_parent
@_records_per_project ||= {}
@_records_per_project[object_class.to_s.underscore] ||= begin
hash = Hash.new { |h, k| h[k] = {} }
parent_per_reference.each do |path, parent|
record_ids = references_per_parent[path]
parent_records(parent, record_ids).each do |record|
hash[parent][record_identifier(record)] = record
end
end
hash
end
end
private
def full_project_path(namespace, project_ref)
return current_parent_path unless project_ref
namespace_ref = namespace || current_project_namespace_path
"#{namespace_ref}/#{project_ref}"
end
def refs_cache
Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {}
end
def parent_type
:project
end
......@@ -392,11 +279,9 @@ module Banzai
parent_type == :project ? project : group
end
def full_group_path(group_ref)
return current_parent_path unless group_ref
private
group_ref
end
attr_accessor :reference_cache
def escape_with_placeholders(text, placeholder_data)
escaped = escape_html_entities(text)
......@@ -409,5 +294,3 @@ module Banzai
end
end
end
Banzai::Filter::References::AbstractReferenceFilter.prepend_if_ee('EE::Banzai::Filter::References::AbstractReferenceFilter')
......@@ -19,7 +19,7 @@ module Banzai
def find_object(project, id)
return unless project.is_a?(Project) && project.valid_repo?
_, record = records_per_parent[project].detect { |k, _v| Gitlab::Git.shas_eql?(k, id) }
_, record = reference_cache.records_per_parent[project].detect { |k, _v| Gitlab::Git.shas_eql?(k, id) }
record
end
......@@ -28,7 +28,7 @@ module Banzai
return [] unless noteable.is_a?(MergeRequest)
@referenced_merge_request_commit_shas ||= begin
referenced_shas = references_per_parent.values.reduce(:|).to_a
referenced_shas = reference_cache.references_per_parent.values.reduce(:|).to_a
noteable.all_commit_shas.select do |sha|
referenced_shas.any? { |ref| Gitlab::Git.shas_eql?(sha, ref) }
end
......@@ -66,12 +66,12 @@ module Banzai
extras
end
private
def parent_records(parent, ids)
parent.commits_by(oids: ids.to_a)
end
private
def noteable
context[:noteable]
end
......
......@@ -36,7 +36,7 @@ module Banzai
self.object_class = ::DesignManagement::Design
def find_object(project, identifier)
records_per_parent[project][identifier]
reference_cache.records_per_parent[project][identifier]
end
def parent_records(project, identifiers)
......@@ -59,15 +59,6 @@ module Banzai
super.includes(:route, :namespace, :group)
end
def parent_type
:project
end
# optimisation to reuse the parent_per_reference query information
def parent_from_ref(ref)
parent_per_reference[ref || current_parent_path]
end
def url_for_object(design, project)
path_options = { vueroute: design.filename }
Gitlab::Routing.url_helpers.designs_project_issue_path(project, design.issue, path_options)
......
......@@ -9,11 +9,7 @@ module Banzai
end
def find_object(parent, iid)
records_per_parent[parent][iid]
end
def parent_from_ref(ref)
parent_per_reference[ref || current_parent_path]
reference_cache.records_per_parent[parent][iid]
end
end
end
......
......@@ -17,7 +17,7 @@ module Banzai
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
namespace = $~[:namespace]
project = $~[:project]
project_path = full_project_path(namespace, project)
project_path = reference_cache.full_project_path(namespace, project)
label = find_label_cached(project_path, $~[:label_id], $~[:label_name])
if label
......@@ -93,7 +93,7 @@ module Banzai
parent = project || group
if project || full_path_ref?(matches)
project_path = full_project_path(matches[:namespace], matches[:project])
project_path = reference_cache.full_project_path(matches[:namespace], matches[:project])
parent_from_ref = from_ref_cached(project_path)
reference = parent_from_ref.to_human_reference(parent)
......
......@@ -67,7 +67,7 @@ module Banzai
end
def find_milestone(project_ref, namespace_ref, milestone_id, milestone_name)
project_path = full_project_path(namespace_ref, project_ref)
project_path = reference_cache.full_project_path(namespace_ref, project_ref)
# Returns group if project is not found by path
parent = parent_from_ref(project_path)
......
# frozen_string_literal: true
module Banzai
module Filter
module References
class ReferenceCache
include Gitlab::Utils::StrongMemoize
include RequestStoreReferenceCache
def initialize(filter, context)
@filter = filter
@context = context
end
def load_reference_cache(nodes)
load_references_per_parent(nodes)
load_parent_per_reference
load_records_per_parent
@cache_loaded = true
end
# Loads all object references (e.g. issue IDs) per
# project/group they belong to.
def load_references_per_parent(nodes)
@references_per_parent ||= {}
@references_per_parent[parent_type] ||= begin
refs = Hash.new { |hash, key| hash[key] = Set.new }
nodes.each do |node|
node.to_html.scan(regex) do
path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[path] << ident if ident
end
end
refs
end
end
def references_per_parent
@references_per_parent[parent_type]
end
# Returns a Hash containing referenced projects grouped per their full
# path.
def load_parent_per_reference
@per_reference ||= {}
@per_reference[parent_type] ||= begin
refs = references_per_parent.keys.to_set
find_for_paths(refs.to_a).index_by(&:full_path)
end
end
def parent_per_reference
@per_reference[parent_type]
end
def load_records_per_parent
@_records_per_project ||= {}
@_records_per_project[filter.object_class.to_s.underscore] ||= begin
hash = Hash.new { |h, k| h[k] = {} }
parent_per_reference.each do |path, parent|
record_ids = references_per_parent[path]
filter.parent_records(parent, record_ids).each do |record|
hash[parent][filter.record_identifier(record)] = record
end
end
hash
end
end
def records_per_parent
@_records_per_project[filter.object_class.to_s.underscore]
end
def relation_for_paths(paths)
klass = parent_type.to_s.camelize.constantize
result = klass.where_full_path_in(paths)
return result if parent_type == :group
result.includes(namespace: :route) if parent_type == :project
end
# Returns projects for the given paths.
def find_for_paths(paths)
if Gitlab::SafeRequestStore.active?
cache = refs_cache
to_query = paths - cache.keys
unless to_query.empty?
records = relation_for_paths(to_query)
found = []
records.each do |record|
ref = record.full_path
get_or_set_cache(cache, ref) { record }
found << ref
end
not_found = to_query - found
not_found.each do |ref|
get_or_set_cache(cache, ref) { nil }
end
end
cache.slice(*paths).values.compact
else
relation_for_paths(paths)
end
end
def current_parent_path
strong_memoize(:current_parent_path) do
parent&.full_path
end
end
def current_project_namespace_path
strong_memoize(:current_project_namespace_path) do
project&.namespace&.full_path
end
end
def full_project_path(namespace, project_ref)
return current_parent_path unless project_ref
namespace_ref = namespace || current_project_namespace_path
"#{namespace_ref}/#{project_ref}"
end
def full_group_path(group_ref)
return current_parent_path unless group_ref
group_ref
end
def cache_loaded?
!!@cache_loaded
end
private
attr_accessor :filter, :context
delegate :project, :group, :parent, :parent_type, to: :filter
def regex
strong_memoize(:regex) do
[
filter.object_class.link_reference_pattern,
filter.object_class.reference_pattern
].compact.reduce { |a, b| Regexp.union(a, b) }
end
end
def refs_cache
Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {}
end
end
end
end
end
Banzai::Filter::References::ReferenceCache.prepend_if_ee('EE::Banzai::Filter::References::ReferenceCache')
......@@ -97,6 +97,18 @@ module Banzai
@nodes ||= each_node.to_a
end
def object_class
self.class.object_class
end
def project
context[:project]
end
def group
context[:group]
end
private
# Returns a data attribute String to attach to a reference link
......@@ -141,14 +153,6 @@ module Banzai
needs :project unless skip_project_check?
end
def project
context[:project]
end
def group
context[:group]
end
def user
context[:user]
end
......@@ -216,10 +220,6 @@ module Banzai
node.is_a?(Nokogiri::XML::Element)
end
def object_class
self.class.object_class
end
def object_reference_pattern
@object_reference_pattern ||= object_class.reference_pattern
end
......
......@@ -4,10 +4,12 @@ require 'spec_helper'
RSpec.describe Banzai::CrossProjectReference do
let(:including_class) { Class.new.include(described_class).new }
let(:reference_cache) { Banzai::Filter::References::ReferenceCache.new(including_class, {})}
before do
allow(including_class).to receive(:context).and_return({})
allow(including_class).to receive(:parent_from_ref).and_call_original
allow(including_class).to receive(:reference_cache).and_return(reference_cache)
end
describe '#parent_from_ref' do
......@@ -47,5 +49,18 @@ RSpec.describe Banzai::CrossProjectReference do
expect(including_class.parent_from_ref('cross/reference')).to eq project2
end
end
context 'when reference cache is loaded' do
let(:project2) { double('referenced project') }
before do
allow(reference_cache).to receive(:cache_loaded?).and_return(true)
allow(reference_cache).to receive(:parent_per_reference).and_return({ 'cross/reference' => project2 })
end
it 'pulls from the reference cache' do
expect(including_class.parent_from_ref('cross/reference')).to eq project2
end
end
end
end
......@@ -8,18 +8,6 @@ RSpec.describe Banzai::Filter::References::AbstractReferenceFilter do
let(:doc) { Nokogiri::HTML.fragment('') }
let(:filter) { described_class.new(doc, project: project) }
describe '#references_per_parent' do
let(:doc) { Nokogiri::HTML.fragment("#1 #{project.full_path}#2 #2") }
it 'returns a Hash containing references grouped per parent paths' do
expect(described_class).to receive(:object_class).exactly(6).times.and_return(Issue)
refs = filter.references_per_parent
expect(refs).to match(a_hash_including(project.full_path => contain_exactly(1, 2)))
end
end
describe '#data_attributes_for' do
let_it_be(:issue) { create(:issue, project: project) }
......@@ -32,74 +20,6 @@ RSpec.describe Banzai::Filter::References::AbstractReferenceFilter do
end
end
describe '#parent_per_reference' do
it 'returns a Hash containing projects grouped per parent paths' do
expect(filter).to receive(:references_per_parent)
.and_return({ project.full_path => Set.new([1]) })
expect(filter.parent_per_reference)
.to eq({ project.full_path => project })
end
end
describe '#find_for_paths' do
context 'with RequestStore disabled' do
it 'returns a list of Projects for a list of paths' do
expect(filter.find_for_paths([project.full_path]))
.to eq([project])
end
it "return an empty array for paths that don't exist" do
expect(filter.find_for_paths(['nonexistent/project']))
.to eq([])
end
end
context 'with RequestStore enabled', :request_store do
it 'returns a list of Projects for a list of paths' do
expect(filter.find_for_paths([project.full_path]))
.to eq([project])
end
context 'when no project with that path exists' do
it 'returns no value' do
expect(filter.find_for_paths(['nonexistent/project']))
.to eq([])
end
it 'adds the ref to the project refs cache' do
project_refs_cache = {}
allow(filter).to receive(:refs_cache).and_return(project_refs_cache)
filter.find_for_paths(['nonexistent/project'])
expect(project_refs_cache).to eq({ 'nonexistent/project' => nil })
end
context 'when the project refs cache includes nil values' do
before do
# adds { 'nonexistent/project' => nil } to cache
filter.from_ref_cached('nonexistent/project')
end
it "return an empty array for paths that don't exist" do
expect(filter.find_for_paths(['nonexistent/project']))
.to eq([])
end
end
end
end
end
describe '#current_parent_path' do
it 'returns the path of the current parent' do
doc = Nokogiri::HTML.fragment('')
filter = described_class.new(doc, project: project)
expect(filter.current_parent_path).to eq(project.full_path)
end
end
context 'abstract methods' do
describe '#find_object' do
it 'raises NotImplementedError' do
......
......@@ -470,24 +470,6 @@ RSpec.describe Banzai::Filter::References::IssueReferenceFilter do
end
end
describe '#records_per_parent' do
context 'using an internal issue tracker' do
it 'returns a Hash containing the issues per project' do
doc = Nokogiri::HTML.fragment('')
filter = described_class.new(doc, project: project)
expect(filter).to receive(:parent_per_reference)
.and_return({ project.full_path => project })
expect(filter).to receive(:references_per_parent)
.and_return({ project.full_path => Set.new([issue.iid]) })
expect(filter.records_per_parent)
.to eq({ project => { issue.iid => issue } })
end
end
end
describe '.references_in' do
let(:merge_request) { create(:merge_request) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Banzai::Filter::References::ReferenceCache do
let_it_be(:project) { create(:project) }
let_it_be(:project2) { create(:project) }
let_it_be(:issue1) { create(:issue, project: project) }
let_it_be(:issue2) { create(:issue, project: project) }
let_it_be(:issue3) { create(:issue, project: project2) }
let_it_be(:doc) { Nokogiri::HTML.fragment("#{issue1.to_reference} #{issue2.to_reference} #{issue3.to_reference(full: true)}") }
let(:filter_class) { Banzai::Filter::References::IssueReferenceFilter }
let(:filter) { filter_class.new(doc, project: project) }
let(:cache) { described_class.new(filter, { project: project }) }
describe '#load_references_per_parent' do
it 'loads references grouped per parent paths' do
cache.load_references_per_parent(filter.nodes)
expect(cache.references_per_parent).to eq({ project.full_path => [issue1.iid, issue2.iid].to_set,
project2.full_path => [issue3.iid].to_set })
end
end
describe '#load_parent_per_reference' do
it 'returns a Hash containing projects grouped per parent paths' do
cache.load_references_per_parent(filter.nodes)
cache.load_parent_per_reference
expect(cache.parent_per_reference).to match({ project.full_path => project, project2.full_path => project2 })
end
end
describe '#load_records_per_parent' do
it 'returns a Hash containing projects grouped per parent paths' do
cache.load_references_per_parent(filter.nodes)
cache.load_parent_per_reference
cache.load_records_per_parent
expect(cache.records_per_parent).to match({ project => { issue1.iid => issue1, issue2.iid => issue2 },
project2 => { issue3.iid => issue3 } })
end
end
describe '#initialize_reference_cache' do
it 'does not have an N+1 query problem with cross projects' do
doc_single = Nokogiri::HTML.fragment("#1")
filter_single = filter_class.new(doc_single, project: project)
cache_single = described_class.new(filter_single, { project: project })
control_count = ActiveRecord::QueryRecorder.new do
cache_single.load_references_per_parent(filter_single.nodes)
cache_single.load_parent_per_reference
cache_single.load_records_per_parent
end.count
# Since this is an issue filter that is not batching issue queries
# across projects, we have to account for that.
# 1 for both projects, 1 for issues in each project == 3
max_count = control_count + 1
expect do
cache.load_references_per_parent(filter.nodes)
cache.load_parent_per_reference
cache.load_records_per_parent
end.not_to exceed_query_limit(max_count)
end
end
describe '#find_for_paths' do
context 'with RequestStore disabled' do
it 'returns a list of Projects for a list of paths' do
expect(cache.find_for_paths([project.full_path]))
.to eq([project])
end
it 'return an empty array for paths that do not exist' do
expect(cache.find_for_paths(['nonexistent/project']))
.to eq([])
end
end
context 'with RequestStore enabled', :request_store do
it 'returns a list of Projects for a list of paths' do
expect(cache.find_for_paths([project.full_path]))
.to eq([project])
end
context 'when no project with that path exists' do
it 'returns no value' do
expect(cache.find_for_paths(['nonexistent/project']))
.to eq([])
end
it 'adds the ref to the project refs cache' do
project_refs_cache = {}
allow(cache).to receive(:refs_cache).and_return(project_refs_cache)
cache.find_for_paths(['nonexistent/project'])
expect(project_refs_cache).to eq({ 'nonexistent/project' => nil })
end
end
end
end
describe '#current_parent_path' do
it 'returns the path of the current parent' do
expect(cache.current_parent_path).to eq project.full_path
end
end
describe '#current_project_namespace_path' do
it 'returns the path of the current project namespace' do
expect(cache.current_project_namespace_path).to eq project.namespace.full_path
end
end
describe '#full_project_path' do
it 'returns current parent path when no ref specified' do
expect(cache.full_project_path('something', nil)).to eq cache.current_parent_path
end
it 'returns combined namespace and project ref' do
expect(cache.full_project_path('something', 'cool')).to eq 'something/cool'
end
it 'returns uses default namespace and project ref when namespace nil' do
expect(cache.full_project_path(nil, 'cool')).to eq "#{project.namespace.full_path}/cool"
end
end
describe '#full_group_path' do
it 'returns current parent path when no group ref specified' do
expect(cache.full_group_path(nil)).to eq cache.current_parent_path
end
it 'returns group ref' do
expect(cache.full_group_path('cool_group')).to eq 'cool_group'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment