Commit 04a5b342 authored by Sean McGivern's avatar Sean McGivern

Optimise happy-path lookup for refs without slashes

Fetching all references for a repository uses SMEMBERS, which can be
quite slow. We need to fetch all references to resolve ambiguities in
certain edge cases. However, most references do not contain a slash, and
most repositories do not contain ambiguous references (where a tag is a
prefix of a branch or vice versa).

In those cases, we can trade two slow-ish Redis calls (SMEMBERS to get
all tags and branches) for a maximum of four much faster Redis calls,
because they are all GETs:

1. One for the feature flag (which will go away in future hopefully).
2. One to check if the repository has ambiguous refs.
3. One for checking membership of branch names.
4. One for checking membership of tag names (only if we didn't match a
   branch).

In the worst case - when the repository does not contain ambiguous refs
- we add those calls to the existing calls, but this should still not be
a huge amount of overhead in that case.

When the feature flag is disabled (which is the default), we're just
adding a single GET.
parent e0997f1b
---
title: Speed up commit lists and file blob pages on repositories with huge amounts
of branches or tags
merge_request: 38484
author:
type: performance
......@@ -60,18 +60,24 @@ module ExtractsRef
id = [id, '/'].join
end
valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
first_path_segment, rest = id.split('/', 2)
if valid_refs.empty?
# No exact ref match, so just try our best
pair = id.match(%r{([^/]+)(.*)}).captures
if use_first_path_segment?(first_path_segment)
pair = [first_path_segment, rest]
else
# There is a distinct possibility that multiple refs prefix the ID.
# Use the longest match to maximize the chance that we have the
# right ref.
best_match = valid_refs.max_by(&:length)
# Partition the string into the ref and the path, ignoring the empty first value
pair = id.partition(best_match)[1..-1]
valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
if valid_refs.empty?
# No exact ref match, so just try our best
pair = id.match(%r{([^/]+)(.*)}).captures
else
# There is a distinct possibility that multiple refs prefix the ID.
# Use the longest match to maximize the chance that we have the
# right ref.
best_match = valid_refs.max_by(&:length)
# Partition the string into the ref and the path, ignoring the empty first value
pair = id.partition(best_match)[1..-1]
end
end
end
......@@ -111,6 +117,15 @@ module ExtractsRef
private
def use_first_path_segment?(ref)
return false unless ::Feature.enabled?(:extracts_path_optimization)
return false unless repository_container
return false if repository_container.repository.has_ambiguous_refs?
repository_container.repository.branch_names_include?(ref) ||
repository_container.repository.tag_names_include?(ref)
end
# overridden in subclasses, do not remove
def get_id
id = [params[:id] || params[:ref]]
......
......@@ -1953,7 +1953,8 @@ RSpec.describe Repository do
:issue_template_names,
:merge_request_template_names,
:user_defined_metrics_dashboard_paths,
:xcode_project?
:xcode_project?,
:has_ambiguous_refs?
])
repository.after_change_head
......
......@@ -125,6 +125,56 @@ RSpec.shared_examples 'extracts refs' do
expect(extract_ref('release/app/v1.0.0/README.md')).to eq(
['release/app/v1.0.0', 'README.md'])
end
context 'when the repository does not have ambiguous refs' do
before do
allow(container.repository).to receive(:has_ambiguous_refs?).and_return(false)
end
it 'does not fetch all ref names when the first path component is a ref' do
expect(self).not_to receive(:ref_names)
expect(container.repository).to receive(:branch_names_include?).with('v1.0.0').and_return(false)
expect(container.repository).to receive(:tag_names_include?).with('v1.0.0').and_return(true)
expect(extract_ref('v1.0.0/doc/README.md')).to eq(['v1.0.0', 'doc/README.md'])
end
it 'fetches all ref names when the first path component is not a ref' do
expect(self).to receive(:ref_names).and_call_original
expect(container.repository).to receive(:branch_names_include?).with('release').and_return(false)
expect(container.repository).to receive(:tag_names_include?).with('release').and_return(false)
expect(extract_ref('release/app/doc/README.md')).to eq(['release/app', 'doc/README.md'])
end
context 'when the extracts_path_optimization feature flag is disabled' do
before do
stub_feature_flags(extracts_path_optimization: false)
end
it 'always fetches all ref names' do
expect(self).to receive(:ref_names).and_call_original
expect(container.repository).not_to receive(:branch_names_include?)
expect(container.repository).not_to receive(:tag_names_include?)
expect(extract_ref('v1.0.0/doc/README.md')).to eq(['v1.0.0', 'doc/README.md'])
end
end
end
context 'when the repository has ambiguous refs' do
before do
allow(container.repository).to receive(:has_ambiguous_refs?).and_return(true)
end
it 'always fetches all ref names' do
expect(self).to receive(:ref_names).and_call_original
expect(container.repository).not_to receive(:branch_names_include?)
expect(container.repository).not_to receive(:tag_names_include?)
expect(extract_ref('v1.0.0/doc/README.md')).to eq(['v1.0.0', 'doc/README.md'])
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment