Commit edab8275 authored by Stan Hu's avatar Stan Hu

Merge branch '326323_optimize_reference_cache_search' into 'master'

Optimize scanning for references process

See merge request gitlab-org/gitlab!65847
parents a5719143 7b6fa267
...@@ -61,10 +61,38 @@ class Milestone < ApplicationRecord ...@@ -61,10 +61,38 @@ class Milestone < ApplicationRecord
end end
def self.reference_pattern def self.reference_pattern
if Feature.enabled?(:milestone_reference_pattern, default_enabled: :yaml)
new_reference_pattern
else
old_reference_pattern
end
end
def self.new_reference_pattern
# NOTE: The iid pattern only matches when all characters on the expression
# are digits, so it will match %2 but not %2.1 because that's probably a
# milestone name and we want it to be matched as such.
@new_reference_pattern ||= %r{
(#{Project.reference_pattern})?
#{Regexp.escape(reference_prefix)}
(?:
(?<milestone_iid>
\d+(?!\S\w)\b # Integer-based milestone iid, or
) |
(?<milestone_name>
[^"\s\<]+\b | # String-based single-word milestone title, or
"[^"]+" # String-based multi-word milestone surrounded in quotes
)
)
}x
end
# Deprecated: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
def self.old_reference_pattern
# NOTE: The iid pattern only matches when all characters on the expression # NOTE: The iid pattern only matches when all characters on the expression
# are digits, so it will match %2 but not %2.1 because that's probably a # are digits, so it will match %2 but not %2.1 because that's probably a
# milestone name and we want it to be matched as such. # milestone name and we want it to be matched as such.
@reference_pattern ||= %r{ @old_reference_pattern ||= %r{
(#{Project.reference_pattern})? (#{Project.reference_pattern})?
#{Regexp.escape(reference_prefix)} #{Regexp.escape(reference_prefix)}
(?: (?:
......
---
name: milestone_reference_pattern
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65847
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
milestone: '14.1'
type: development
group: group::source code
default_enabled: false
...@@ -28,20 +28,11 @@ module Banzai ...@@ -28,20 +28,11 @@ module Banzai
@references_per_parent[parent_type] ||= begin @references_per_parent[parent_type] ||= begin
refs = Hash.new { |hash, key| hash[key] = Set.new } refs = Hash.new { |hash, key| hash[key] = Set.new }
nodes.each do |node| if Feature.enabled?(:milestone_reference_pattern, default_enabled: :yaml)
prepare_node_for_scan(node).scan(regex) do doc_search(refs)
parent_path = if parent_type == :project else
full_project_path($~[:namespace], $~[:project]) node_search(nodes, refs)
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[parent_path] << ident if ident
end
end end
refs
end end
end end
...@@ -172,6 +163,39 @@ module Banzai ...@@ -172,6 +163,39 @@ module Banzai
delegate :project, :group, :parent, :parent_type, to: :filter delegate :project, :group, :parent, :parent_type, to: :filter
# Deprecated: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
def node_search(nodes, refs)
nodes.each do |node|
prepare_node_for_scan(node).scan(regex) do
parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[parent_path] << ident if ident
end
end
refs
end
def doc_search(refs)
prepare_doc_for_scan(filter.doc).to_enum(:scan, regex).each do
parent_path = if parent_type == :project
full_project_path($~[:namespace], $~[:project])
else
full_group_path($~[:group])
end
ident = filter.identifier($~)
refs[parent_path] << ident if ident
end
refs
end
def regex def regex
strong_memoize(:regex) do strong_memoize(:regex) do
[ [
...@@ -185,6 +209,13 @@ module Banzai ...@@ -185,6 +209,13 @@ module Banzai
Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {} Gitlab::SafeRequestStore["banzai_#{parent_type}_refs".to_sym] ||= {}
end end
def prepare_doc_for_scan(doc)
html = doc.to_html
filter.requires_unescaping? ? unescape_html_entities(html) : html
end
# Deprecated: https://gitlab.com/gitlab-org/gitlab/-/issues/336268
def prepare_node_for_scan(node) def prepare_node_for_scan(node)
html = node.to_html html = node.to_html
......
...@@ -92,6 +92,11 @@ RSpec.describe Banzai::Filter::References::MilestoneReferenceFilter do ...@@ -92,6 +92,11 @@ RSpec.describe Banzai::Filter::References::MilestoneReferenceFilter do
expect(doc.to_html).to match(%r(\(<a.+>#{milestone.reference_link_text}</a>\.\))) expect(doc.to_html).to match(%r(\(<a.+>#{milestone.reference_link_text}</a>\.\)))
end end
it 'links with adjacent html tags' do
doc = reference_filter("Milestone <p>#{reference}</p>.")
expect(doc.to_html).to match(%r(<p><a.+>#{milestone.reference_link_text}</a></p>))
end
it 'ignores invalid milestone names' do it 'ignores invalid milestone names' do
exp = act = "Milestone #{Milestone.reference_prefix}#{milestone.name.reverse}" exp = act = "Milestone #{Milestone.reference_prefix}#{milestone.name.reverse}"
......
...@@ -538,6 +538,15 @@ RSpec.describe Milestone do ...@@ -538,6 +538,15 @@ RSpec.describe Milestone do
it { is_expected.to match('gitlab-org/gitlab-ce%123') } it { is_expected.to match('gitlab-org/gitlab-ce%123') }
it { is_expected.to match('gitlab-org/gitlab-ce%"my-milestone"') } it { is_expected.to match('gitlab-org/gitlab-ce%"my-milestone"') }
context 'when milestone_reference_pattern feature flag is false' do
before do
stub_feature_flags(milestone_reference_pattern: false)
end
it { is_expected.to match('gitlab-org/gitlab-ce%123') }
it { is_expected.to match('gitlab-org/gitlab-ce%"my-milestone"') }
end
end end
describe '.link_reference_pattern' do describe '.link_reference_pattern' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment