Commit 7b6fa267 authored by Vasilii Iakliushin's avatar Vasilii Iakliushin

Optimize scanning for references process

Contributes to https://gitlab.com/gitlab-org/gitlab/-/issues/326323

**Problem**

For reference cache generation we convert and scan each node one by
one for reference identifiers. This process is slow and consumes lots
of memory.

**Solution**

Scan the whole document directly. That removes many unnecessary
transformation operations on nodes.

Changelog: performance
parent f095fc89
......@@ -61,10 +61,38 @@ class Milestone < ApplicationRecord
end
def self.reference_pattern
if Feature.enabled?(:milestone_reference_pattern, default_enabled: :yaml)
new_reference_pattern
else
old_reference_pattern
end
end
def self.new_reference_pattern
# NOTE: The iid pattern only matches when all characters on the expression
# are digits, so it will match %2 but not %2.1 because that's probably a
# milestone name and we want it to be matched as such.
@new_reference_pattern ||= %r{
(#{Project.reference_pattern})?
#{Regexp.escape(reference_prefix)}
(?:
(?<milestone_iid>
\d+(?!\S\w)\b # Integer-based milestone iid, or
) |
(?<milestone_name>
[^"\s\<]+\b | # String-based single-word milestone title, or
"[^"]+" # String-based multi-word milestone surrounded in quotes
)
)
}x
end
# Deprecated: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
def self.old_reference_pattern
# NOTE: The iid pattern only matches when all characters on the expression
# are digits, so it will match %2 but not %2.1 because that's probably a
# milestone name and we want it to be matched as such.
@reference_pattern ||= %r{
@old_reference_pattern ||= %r{
(#{Project.reference_pattern})?
#{Regexp.escape(reference_prefix)}
(?:
......
---
name: milestone_reference_pattern
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65847
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
milestone: '14.1'
type: development
group: group::source code
default_enabled: false
......@@ -28,21 +28,12 @@ module Banzai
@references_per_parent[parent_type] ||= begin
refs = Hash.new { |hash, key| hash[key] = Set.new }
nodes.each do |node|
prepare_node_for_scan(node).scan(regex) do
parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
if Feature.enabled?(:milestone_reference_pattern, default_enabled: :yaml)
doc_search(refs)
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[parent_path] << ident if ident
node_search(nodes, refs)
end
end
refs
end
end
def references_per_parent
......@@ -172,6 +163,39 @@ module Banzai
delegate :project, :group, :parent, :parent_type, to: :filter
# Deprecated: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
def node_search(nodes, refs)
nodes.each do |node|
prepare_node_for_scan(node).scan(regex) do
parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[parent_path] << ident if ident
end
end
refs
end
def doc_search(refs)
prepare_doc_for_scan(filter.doc).to_enum(:scan, regex).each do
parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[parent_path] << ident if ident
end
refs
end
def regex
strong_memoize(:regex) do
[
......@@ -185,6 +209,13 @@ module Banzai
Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {}
end
def prepare_doc_for_scan(doc)
html = doc.to_html
filter.requires_unescaping? ? unescape_html_entities(html) : html
end
# Deprecated: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
def prepare_node_for_scan(node)
html = node.to_html
......
......@@ -92,6 +92,11 @@ RSpec.describe Banzai::Filter::References::MilestoneReferenceFilter do
expect(doc.to_html).to match(%r(\(<a.+>#{milestone.reference_link_text}</a>\.\)))
end
it 'links with adjacent html tags' do
doc = reference_filter("Milestone <p>#{reference}</p>.")
expect(doc.to_html).to match(%r(<p><a.+>#{milestone.reference_link_text}</a></p>))
end
it 'ignores invalid milestone names' do
exp = act = "Milestone #{Milestone.reference_prefix}#{milestone.name.reverse}"
......
......@@ -538,6 +538,15 @@ RSpec.describe Milestone do
it { is_expected.to match('gitlab-org/gitlab-ce%123') }
it { is_expected.to match('gitlab-org/gitlab-ce%"my-milestone"') }
context 'when milestone_reference_pattern feature flag is false' do
before do
stub_feature_flags(milestone_reference_pattern: false)
end
it { is_expected.to match('gitlab-org/gitlab-ce%123') }
it { is_expected.to match('gitlab-org/gitlab-ce%"my-milestone"') }
end
end
describe '.link_reference_pattern' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment