Commit 809937c0 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'vij-extract-path-refactor' into 'master'

Refactor ExtractsPath module

Closes #217775

See merge request gitlab-org/gitlab!33414
parents 742c9657 cd545cad
......@@ -3,79 +3,8 @@
# Module providing methods for dealing with separating a tree-ish string and a
# file path string when combined in a request parameter
module ExtractsPath
# Raised when given an invalid file path
InvalidPathError = Class.new(StandardError)
# Given a string containing both a Git tree-ish, such as a branch or tag, and
# a filesystem path joined by forward slashes, attempts to separate the two.
#
# Expects a @project instance variable to contain the active project. This is
# used to check the input against a list of valid repository refs.
#
# Examples
#
# # No @project available
# extract_ref('master')
# # => ['', '']
#
# extract_ref('master')
# # => ['master', '']
#
# extract_ref("f4b14494ef6abf3d144c28e4af0c20143383e062/CHANGELOG")
# # => ['f4b14494ef6abf3d144c28e4af0c20143383e062', 'CHANGELOG']
#
# extract_ref("v2.0.0/README.md")
# # => ['v2.0.0', 'README.md']
#
# extract_ref('master/app/models/project.rb')
# # => ['master', 'app/models/project.rb']
#
# extract_ref('issues/1234/app/models/project.rb')
# # => ['issues/1234', 'app/models/project.rb']
#
# # Given an invalid branch, we fall back to just splitting on the first slash
# extract_ref('non/existent/branch/README.md')
# # => ['non', 'existent/branch/README.md']
#
# Returns an Array where the first value is the tree-ish and the second is the
# path
def extract_ref(id)
pair = ['', '']
return pair unless @project # rubocop:disable Gitlab/ModuleWithInstanceVariables
if id =~ /^(\h{40})(.+)/
# If the ref appears to be a SHA, we're done, just split the string
pair = $~.captures
else
# Otherwise, attempt to detect the ref using a list of the project's
# branches and tags
# Append a trailing slash if we only get a ref and no file path
unless id.ends_with?('/')
id = [id, '/'].join
end
valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
if valid_refs.empty?
# No exact ref match, so just try our best
pair = id.match(%r{([^/]+)(.*)}).captures
else
# There is a distinct possibility that multiple refs prefix the ID.
# Use the longest match to maximize the chance that we have the
# right ref.
best_match = valid_refs.max_by(&:length)
# Partition the string into the ref and the path, ignoring the empty first value
pair = id.partition(best_match)[1..-1]
end
end
# Remove ending slashes from path
pair[1].gsub!(%r{^/|/$}, '')
pair
end
extend ::Gitlab::Utils::Override
include ExtractsRef
# If we have an ID of 'foo.atom', and the controller provides Atom and HTML
# formats, then we have to check if the request was for the Atom version of
......@@ -90,34 +19,17 @@ module ExtractsPath
valid_refs.max_by(&:length)
end
# Assigns common instance variables for views working with Git tree-ish objects
#
# Assignments are:
#
# - @id - A string representing the joined ref and path
# - @ref - A string representing the ref (e.g., the branch, tag, or commit SHA)
# - @path - A string representing the filesystem path
# - @commit - A Commit representing the commit from the given ref
#
# If the :id parameter appears to be requesting a specific response format,
# that will be handled as well.
#
# If there is no path and the ref doesn't exist in the repo, try to resolve
# the ref without an '.atom' suffix. If _that_ ref is found, set the request's
# format to Atom manually.
# Extends the method to handle if there is no path and the ref doesn't
# exist in the repo, try to resolve the ref without an '.atom' suffix.
# If _that_ ref is found, set the request's format to Atom manually.
#
# Automatically renders `not_found!` if a valid tree path could not be
# resolved (e.g., when a user inserts an invalid path or ref).
#
# rubocop:disable Gitlab/ModuleWithInstanceVariables
override :assign_ref_vars
def assign_ref_vars
@id = get_id
@ref, @path = extract_ref(@id)
@repo = @project.repository
@ref.strip!
raise InvalidPathError if @ref.match?(/\s/)
@commit = @repo.commit(@ref)
super
if @path.empty? && !@commit && @id.ends_with?('.atom')
@id = @ref = extract_ref_without_atom(@id)
......@@ -135,10 +47,6 @@ module ExtractsPath
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
def tree
@tree ||= @repo.tree(@commit.id, @path) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def lfs_blob_ids
blob_ids = tree.blobs.map(&:id)
......@@ -146,21 +54,13 @@ module ExtractsPath
# the current Blob in order to determine if it's a LFS object
blob_ids = Array.wrap(@repo.blob_at(@commit.id, @path)&.id) if blob_ids.empty? # rubocop:disable Gitlab/ModuleWithInstanceVariables
@lfs_blob_ids = Gitlab::Git::Blob.batch_lfs_pointers(@project.repository, blob_ids).map(&:id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
@lfs_blob_ids = Gitlab::Git::Blob.batch_lfs_pointers(repository_container.repository, blob_ids).map(&:id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
private
# overridden in subclasses, do not remove
def get_id
id = [params[:id] || params[:ref]]
id << "/" + params[:path] unless params[:path].blank?
id.join
end
def ref_names
return [] unless @project # rubocop:disable Gitlab/ModuleWithInstanceVariables
@ref_names ||= @project.repository.ref_names # rubocop:disable Gitlab/ModuleWithInstanceVariables
override :repository_container
def repository_container
@project
end
end
# frozen_string_literal: true
# Module providing methods for dealing with separating a tree-ish string and a
# file path string when combined in a request parameter
# Can be extended for different types of repository object, e.g. Project or Snippet
module ExtractsRef
InvalidPathError = Class.new(StandardError)
# Given a string containing both a Git tree-ish, such as a branch or tag, and
# a filesystem path joined by forward slashes, attempts to separate the two.
#
# Expects a repository_container method that returns the active repository object. This is
# used to check the input against a list of valid repository refs.
#
# Examples
#
# # No repository_container available
# extract_ref('master')
# # => ['', '']
#
# extract_ref('master')
# # => ['master', '']
#
# extract_ref("f4b14494ef6abf3d144c28e4af0c20143383e062/CHANGELOG")
# # => ['f4b14494ef6abf3d144c28e4af0c20143383e062', 'CHANGELOG']
#
# extract_ref("v2.0.0/README.md")
# # => ['v2.0.0', 'README.md']
#
# extract_ref('master/app/models/project.rb')
# # => ['master', 'app/models/project.rb']
#
# extract_ref('issues/1234/app/models/project.rb')
# # => ['issues/1234', 'app/models/project.rb']
#
# # Given an invalid branch, we fall back to just splitting on the first slash
# extract_ref('non/existent/branch/README.md')
# # => ['non', 'existent/branch/README.md']
#
# Returns an Array where the first value is the tree-ish and the second is the
# path
def extract_ref(id)
pair = ['', '']
return pair unless repository_container
if id =~ /^(\h{40})(.+)/
# If the ref appears to be a SHA, we're done, just split the string
pair = $~.captures
else
# Otherwise, attempt to detect the ref using a list of the repository_container's
# branches and tags
# Append a trailing slash if we only get a ref and no file path
unless id.ends_with?('/')
id = [id, '/'].join
end
valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
if valid_refs.empty?
# No exact ref match, so just try our best
pair = id.match(%r{([^/]+)(.*)}).captures
else
# There is a distinct possibility that multiple refs prefix the ID.
# Use the longest match to maximize the chance that we have the
# right ref.
best_match = valid_refs.max_by(&:length)
# Partition the string into the ref and the path, ignoring the empty first value
pair = id.partition(best_match)[1..-1]
end
end
pair[0] = pair[0].strip
# Remove ending slashes from path
pair[1].gsub!(%r{^/|/$}, '')
pair
end
# Assigns common instance variables for views working with Git tree-ish objects
#
# Assignments are:
#
# - @id - A string representing the joined ref and path
# - @ref - A string representing the ref (e.g., the branch, tag, or commit SHA)
# - @path - A string representing the filesystem path
# - @commit - A Commit representing the commit from the given ref
#
# If the :id parameter appears to be requesting a specific response format,
# that will be handled as well.
#
# rubocop:disable Gitlab/ModuleWithInstanceVariables
def assign_ref_vars
@id = get_id
@ref, @path = extract_ref(@id)
@repo = repository_container.repository
raise InvalidPathError if @ref.match?(/\s/)
@commit = @repo.commit(@ref)
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
def tree
@tree ||= @repo.tree(@commit.id, @path) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
private
# overridden in subclasses, do not remove
def get_id
id = [params[:id] || params[:ref]]
id << "/" + params[:path] unless params[:path].blank?
id.join
end
def ref_names
return [] unless repository_container
@ref_names ||= repository_container.repository.ref_names # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def repository_container
raise NotImplementedError
end
end
......@@ -7,17 +7,15 @@ describe ExtractsPath do
include RepoHelpers
include Gitlab::Routing
let(:project) { double('project') }
let_it_be(:owner) { create(:user) }
let_it_be(:container) { create(:project, :repository, creator: owner) }
let(:request) { double('request') }
before do
@project = project
@project = container
ref_names = ['master', 'foo/bar/baz', 'v1.0.0', 'v2.0.0', 'release/app', 'release/app/v1.0.0']
repo = double(ref_names: ['master', 'foo/bar/baz', 'v1.0.0', 'v2.0.0',
'release/app', 'release/app/v1.0.0'])
allow(project).to receive(:repository).and_return(repo)
allow(project).to receive(:full_path)
.and_return('gitlab/gitlab-ci')
allow(container.repository).to receive(:ref_names).and_return(ref_names)
allow(request).to receive(:format=)
end
......@@ -25,45 +23,12 @@ describe ExtractsPath do
let(:ref) { sample_commit[:id] }
let(:params) { { path: sample_commit[:line_code_path], ref: ref } }
before do
@project = create(:project, :repository)
end
it_behaves_like 'assigns ref vars'
it "log tree path has no escape sequences" do
it 'log tree path has no escape sequences' do
assign_ref_vars
expect(@logs_path).to eq("/#{@project.full_path}/-/refs/#{ref}/logs_tree/files/ruby/popen.rb")
end
context 'ref contains %20' do
let(:ref) { 'foo%20bar' }
it 'is not converted to a space in @id' do
@project.repository.add_branch(@project.owner, 'foo%20bar', 'master')
assign_ref_vars
expect(@id).to start_with('foo%20bar/')
end
end
context 'ref contains trailing space' do
let(:ref) { 'master ' }
it 'strips surrounding space' do
assign_ref_vars
expect(@ref).to eq('master')
end
end
context 'ref contains leading space' do
let(:ref) { ' master ' }
it 'strips surrounding space' do
assign_ref_vars
expect(@ref).to eq('master')
end
expect(@logs_path).to eq("/#{@project.full_path}/-/refs/#{ref}/logs_tree/files/ruby/popen.rb")
end
context 'ref contains space in the middle' do
......@@ -76,28 +41,6 @@ describe ExtractsPath do
end
end
context 'path contains space' do
let(:params) { { path: 'with space', ref: '38008cb17ce1466d8fec2dfa6f6ab8dcfe5cf49e' } }
it 'is not converted to %20 in @path' do
assign_ref_vars
expect(@path).to eq(params[:path])
end
end
context 'subclass overrides get_id' do
it 'uses ref returned by get_id' do
allow_next_instance_of(self.class) do |instance|
allow(instance).to receive(:get_id) { '38008cb17ce1466d8fec2dfa6f6ab8dcfe5cf49e' }
end
assign_ref_vars
expect(@id).to eq(get_id)
end
end
context 'ref only exists without .atom suffix' do
context 'with a path' do
let(:params) { { ref: 'v1.0.0.atom', path: 'README.md' } }
......@@ -171,58 +114,7 @@ describe ExtractsPath do
end
end
describe '#extract_ref' do
it "returns an empty pair when no @project is set" do
@project = nil
expect(extract_ref('master/CHANGELOG')).to eq(['', ''])
end
context "without a path" do
it "extracts a valid branch" do
expect(extract_ref('master')).to eq(['master', ''])
end
it "extracts a valid tag" do
expect(extract_ref('v2.0.0')).to eq(['v2.0.0', ''])
end
it "extracts a valid commit ref without a path" do
expect(extract_ref('f4b14494ef6abf3d144c28e4af0c20143383e062')).to eq(
['f4b14494ef6abf3d144c28e4af0c20143383e062', '']
)
end
it "falls back to a primitive split for an invalid ref" do
expect(extract_ref('stable')).to eq(['stable', ''])
end
it "extracts the longest matching ref" do
expect(extract_ref('release/app/v1.0.0/README.md')).to eq(
['release/app/v1.0.0', 'README.md'])
end
end
context "with a path" do
it "extracts a valid branch" do
expect(extract_ref('foo/bar/baz/CHANGELOG')).to eq(
['foo/bar/baz', 'CHANGELOG'])
end
it "extracts a valid tag" do
expect(extract_ref('v2.0.0/CHANGELOG')).to eq(['v2.0.0', 'CHANGELOG'])
end
it "extracts a valid commit SHA" do
expect(extract_ref('f4b14494ef6abf3d144c28e4af0c20143383e062/CHANGELOG')).to eq(
%w(f4b14494ef6abf3d144c28e4af0c20143383e062 CHANGELOG)
)
end
it "falls back to a primitive split for an invalid ref" do
expect(extract_ref('stable/CHANGELOG')).to eq(%w(stable CHANGELOG))
end
end
end
it_behaves_like 'extracts refs'
describe '#extract_ref_without_atom' do
it 'ignores any matching refs suffixed with atom' do
......
# frozen_string_literal: true
require 'spec_helper'
describe ExtractsRef do
include described_class
include RepoHelpers
let_it_be(:owner) { create(:user) }
let_it_be(:container) { create(:snippet, :repository, author: owner) }
let(:ref) { sample_commit[:id] }
let(:params) { { path: sample_commit[:line_code_path], ref: ref } }
before do
ref_names = ['master', 'foo/bar/baz', 'v1.0.0', 'v2.0.0', 'release/app', 'release/app/v1.0.0']
allow(container.repository).to receive(:ref_names).and_return(ref_names)
allow_any_instance_of(described_class).to receive(:repository_container).and_return(container)
end
it_behaves_like 'assigns ref vars'
it_behaves_like 'extracts refs'
end
# frozen_string_literal: true
RSpec.shared_examples 'assigns ref vars' do
it 'assigns the repository var' do
assign_ref_vars
expect(@repo).to eq container.repository
end
context 'ref contains %20' do
let(:ref) { 'foo%20bar' }
it 'is not converted to a space in @id' do
container.repository.add_branch(owner, 'foo%20bar', 'master')
assign_ref_vars
expect(@id).to start_with('foo%20bar/')
end
end
context 'ref contains trailing space' do
let(:ref) { 'master ' }
it 'strips surrounding space' do
assign_ref_vars
expect(@ref).to eq('master')
end
end
context 'ref contains leading space' do
let(:ref) { ' master ' }
it 'strips surrounding space' do
assign_ref_vars
expect(@ref).to eq('master')
end
end
context 'path contains space' do
let(:params) { { path: 'with space', ref: '38008cb17ce1466d8fec2dfa6f6ab8dcfe5cf49e' } }
it 'is not converted to %20 in @path' do
assign_ref_vars
expect(@path).to eq(params[:path])
end
end
context 'subclass overrides get_id' do
it 'uses ref returned by get_id' do
allow_next_instance_of(self.class) do |instance|
allow(instance).to receive(:get_id) { '38008cb17ce1466d8fec2dfa6f6ab8dcfe5cf49e' }
end
assign_ref_vars
expect(@id).to eq(get_id)
end
end
end
RSpec.shared_examples 'extracts refs' do
describe '#extract_ref' do
it 'returns an empty pair when no repository_container is set' do
allow_any_instance_of(described_class).to receive(:repository_container).and_return(nil)
expect(extract_ref('master/CHANGELOG')).to eq(['', ''])
end
context 'without a path' do
it 'extracts a valid branch' do
expect(extract_ref('master')).to eq(['master', ''])
end
it 'extracts a valid tag' do
expect(extract_ref('v2.0.0')).to eq(['v2.0.0', ''])
end
it 'extracts a valid commit ref without a path' do
expect(extract_ref('f4b14494ef6abf3d144c28e4af0c20143383e062')).to eq(
['f4b14494ef6abf3d144c28e4af0c20143383e062', '']
)
end
it 'falls back to a primitive split for an invalid ref' do
expect(extract_ref('stable')).to eq(['stable', ''])
end
it 'extracts the longest matching ref' do
expect(extract_ref('release/app/v1.0.0/README.md')).to eq(
['release/app/v1.0.0', 'README.md'])
end
end
context 'with a path' do
it 'extracts a valid branch' do
expect(extract_ref('foo/bar/baz/CHANGELOG')).to eq(
['foo/bar/baz', 'CHANGELOG'])
end
it 'extracts a valid tag' do
expect(extract_ref('v2.0.0/CHANGELOG')).to eq(['v2.0.0', 'CHANGELOG'])
end
it 'extracts a valid commit SHA' do
expect(extract_ref('f4b14494ef6abf3d144c28e4af0c20143383e062/CHANGELOG')).to eq(
%w(f4b14494ef6abf3d144c28e4af0c20143383e062 CHANGELOG)
)
end
it 'falls back to a primitive split for an invalid ref' do
expect(extract_ref('stable/CHANGELOG')).to eq(%w(stable CHANGELOG))
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment