Commit 8fa55459 authored by Nick Thomas's avatar Nick Thomas

Merge branch 'split-up-relativelinkfilter' into 'master'

Split up RelativeLinkFilter into UploadLinkFilter and RepositoryLinkFilter

See merge request gitlab-org/gitlab!22631
parents 61f40f90 7cfbf326
......@@ -281,7 +281,7 @@ module MarkupHelper
context.reverse_merge!(
current_user: (current_user if defined?(current_user)),
# RelativeLinkFilter
# RepositoryLinkFilter and UploadLinkFilter
commit: @commit,
project_wiki: @project_wiki,
ref: @ref,
......
---
title: Avoid making Gitaly calls when some Markdown text links to an uploaded file
merge_request: 22631
author:
type: performance
# frozen_string_literal: true
require 'uri'
module Banzai
module Filter
class BaseRelativeLinkFilter < HTML::Pipeline::Filter
include Gitlab::Utils::StrongMemoize
protected
def linkable_attributes
strong_memoize(:linkable_attributes) do
attrs = []
attrs += doc.search('a:not(.gfm)').map do |el|
el.attribute('href')
end
attrs += doc.search('img:not(.gfm), video:not(.gfm), audio:not(.gfm)').flat_map do |el|
[el.attribute('src'), el.attribute('data-src')]
end
attrs.reject do |attr|
attr.blank? || attr.value.start_with?('//')
end
end
end
def relative_url_root
Gitlab.config.gitlab.relative_url_root.presence || '/'
end
def project
context[:project]
end
private
def unescape_and_scrub_uri(uri)
Addressable::URI.unescape(uri).scrub
end
end
end
end
......@@ -4,19 +4,17 @@ require 'uri'
module Banzai
module Filter
# HTML filter that "fixes" relative links to uploads or files in a repository.
# HTML filter that "fixes" relative links to files in a repository.
#
# Context options:
# :commit
# :group
# :current_user
# :project
# :project_wiki
# :ref
# :requested_path
class RelativeLinkFilter < HTML::Pipeline::Filter
include Gitlab::Utils::StrongMemoize
# :system_note
class RepositoryLinkFilter < BaseRelativeLinkFilter
def call
return doc if context[:system_note]
......@@ -26,7 +24,9 @@ module Banzai
load_uri_types
linkable_attributes.each do |attr|
process_link_attr(attr)
if linkable_files? && repo_visible_to_user?
process_link_to_repository_attr(attr)
end
end
doc
......@@ -35,8 +35,8 @@ module Banzai
protected
def load_uri_types
return unless linkable_files?
return unless linkable_attributes.present?
return unless linkable_files?
return {} unless repository
@uri_types = request_path.present? ? get_uri_types([request_path]) : {}
......@@ -57,24 +57,6 @@ module Banzai
end
end
def linkable_attributes
strong_memoize(:linkable_attributes) do
attrs = []
attrs += doc.search('a:not(.gfm)').map do |el|
el.attribute('href')
end
attrs += doc.search('img, video, audio').flat_map do |el|
[el.attribute('src'), el.attribute('data-src')]
end
attrs.reject do |attr|
attr.blank? || attr.value.start_with?('//')
end
end
end
def get_uri_types(paths)
return {} if paths.empty?
......@@ -107,39 +89,6 @@ module Banzai
rescue URI::Error, Addressable::URI::InvalidURIError
end
def process_link_attr(html_attr)
if html_attr.value.start_with?('/uploads/')
process_link_to_upload_attr(html_attr)
elsif linkable_files? && repo_visible_to_user?
process_link_to_repository_attr(html_attr)
end
end
def process_link_to_upload_attr(html_attr)
path_parts = [unescape_and_scrub_uri(html_attr.value)]
if project
path_parts.unshift(relative_url_root, project.full_path)
elsif group
path_parts.unshift(relative_url_root, 'groups', group.full_path, '-')
else
path_parts.unshift(relative_url_root)
end
begin
path = Addressable::URI.escape(File.join(*path_parts))
rescue Addressable::URI::InvalidURIError
return
end
html_attr.value =
if context[:only_path]
path
else
Addressable::URI.join(Gitlab.config.gitlab.base_url, path).to_s
end
end
def process_link_to_repository_attr(html_attr)
uri = URI(html_attr.value)
......@@ -239,10 +188,6 @@ module Banzai
@current_commit ||= context[:commit] || repository.commit(ref)
end
def relative_url_root
Gitlab.config.gitlab.relative_url_root.presence || '/'
end
def repo_visible_to_user?
project && Ability.allowed?(current_user, :download_code, project)
end
......@@ -251,14 +196,6 @@ module Banzai
context[:ref] || project.default_branch
end
def group
context[:group]
end
def project
context[:project]
end
def current_user
context[:current_user]
end
......@@ -266,12 +203,6 @@ module Banzai
def repository
@repository ||= project&.repository
end
private
def unescape_and_scrub_uri(uri)
Addressable::URI.unescape(uri).scrub
end
end
end
end
# frozen_string_literal: true
require 'uri'
module Banzai
module Filter
# HTML filter that "fixes" links to uploads.
#
# Context options:
# :group
# :only_path
# :project
# :system_note
class UploadLinkFilter < BaseRelativeLinkFilter
def call
return doc if context[:system_note]
linkable_attributes.each do |attr|
process_link_to_upload_attr(attr)
end
doc
end
protected
def process_link_to_upload_attr(html_attr)
return unless html_attr.value.start_with?('/uploads/')
path_parts = [unescape_and_scrub_uri(html_attr.value)]
if project
path_parts.unshift(relative_url_root, project.full_path)
elsif group
path_parts.unshift(relative_url_root, 'groups', group.full_path, '-')
else
path_parts.unshift(relative_url_root)
end
begin
path = Addressable::URI.escape(File.join(*path_parts))
rescue Addressable::URI::InvalidURIError
return
end
html_attr.value =
if context[:only_path]
path
else
Addressable::URI.join(Gitlab.config.gitlab.base_url, path).to_s
end
html_attr.parent.add_class('gfm')
end
def group
context[:group]
end
end
end
end
......@@ -16,7 +16,10 @@ module Banzai
[
Filter::ReferenceRedactorFilter,
Filter::InlineMetricsRedactorFilter,
Filter::RelativeLinkFilter,
# UploadLinkFilter must come before RepositoryLinkFilter to
# prevent unnecessary Gitaly calls from being made.
Filter::UploadLinkFilter,
Filter::RepositoryLinkFilter,
Filter::IssuableStateFilter,
Filter::SuggestionFilter
]
......
# frozen_string_literal: true
module Banzai
module Pipeline
class RelativeLinkPipeline < BasePipeline
def self.filters
FilterArray[
Filter::RelativeLinkFilter
]
end
end
end
end
......@@ -208,6 +208,8 @@ describe 'GitLab Markdown', :aggregate_failures do
@group = @feat.group
end
let(:project) { @feat.project } # Shadow this so matchers can use it
context 'default pipeline' do
before do
@html = markdown(@feat.raw_markdown)
......@@ -216,8 +218,12 @@ describe 'GitLab Markdown', :aggregate_failures do
it_behaves_like 'all pipelines'
it 'includes custom filters' do
aggregate_failures 'RelativeLinkFilter' do
expect(doc).to parse_relative_links
aggregate_failures 'UploadLinkFilter' do
expect(doc).to parse_upload_links
end
aggregate_failures 'RepositoryLinkFilter' do
expect(doc).to parse_repository_links
end
aggregate_failures 'EmojiFilter' do
......@@ -277,8 +283,12 @@ describe 'GitLab Markdown', :aggregate_failures do
it_behaves_like 'all pipelines'
it 'includes custom filters' do
aggregate_failures 'RelativeLinkFilter' do
expect(doc).not_to parse_relative_links
aggregate_failures 'UploadLinkFilter' do
expect(doc).to parse_upload_links
end
aggregate_failures 'RepositoryLinkFilter' do
expect(doc).not_to parse_repository_links
end
aggregate_failures 'EmojiFilter' do
......
......@@ -111,7 +111,13 @@ Markdown should be usable inside a link. Let's try!
- [**text**](#link-strong)
- [`text`](#link-code)
### RelativeLinkFilter
### UploadLinkFilter
Linking to an upload in this project should work:
[Relative Upload Link](/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg)
![Relative Upload Image](/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg)
### RepositoryLinkFilter
Linking to a file relative to this project's repository should work.
......
......@@ -2,7 +2,7 @@
require 'spec_helper'
describe Banzai::Filter::RelativeLinkFilter do
describe Banzai::Filter::RepositoryLinkFilter do
include GitHelpers
include RepoHelpers
......@@ -128,11 +128,6 @@ describe Banzai::Filter::RelativeLinkFilter do
expect { filter(act) }.not_to raise_error
end
it 'does not raise an exception on URIs containing invalid utf-8 byte sequences in uploads' do
act = link("/uploads/%FF")
expect { filter(act) }.not_to raise_error
end
it 'does not raise an exception on URIs containing invalid utf-8 byte sequences in context requested path' do
expect { filter(link("files/test.md"), requested_path: '%FF') }.not_to raise_error
end
......@@ -147,11 +142,6 @@ describe Banzai::Filter::RelativeLinkFilter do
expect { filter(act) }.not_to raise_error
end
it 'does not raise an exception with a space in the path' do
act = link("/uploads/d18213acd3732630991986120e167e3d/Landscape_8.jpg \nBut here's some more unexpected text :smile:)")
expect { filter(act) }.not_to raise_error
end
it 'ignores ref if commit is passed' do
doc = filter(link('non/existent.file'), commit: project.commit('empty-branch') )
expect(doc.at_css('a')['href'])
......@@ -350,166 +340,4 @@ describe Banzai::Filter::RelativeLinkFilter do
include_examples :valid_repository
end
context 'with a /upload/ URL' do
# not needed
let(:commit) { nil }
let(:ref) { nil }
let(:requested_path) { nil }
let(:upload_path) { '/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg' }
let(:relative_path) { "/#{project.full_path}#{upload_path}" }
context 'to a project upload' do
shared_examples 'rewrite project uploads' do
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
it 'rebuilds relative URL for a link' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
doc = filter(nested(link(upload_path)))
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'rebuilds relative URL for an image' do
doc = filter(image(upload_path))
expect(doc.at_css('img')['src']).to eq(relative_path)
doc = filter(nested(image(upload_path)))
expect(doc.at_css('img')['src']).to eq(relative_path)
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
end
it 'supports unescaped Unicode filenames' do
path = '/uploads/한글.png'
doc = filter(link(path))
expect(doc.at_css('a')['href']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
end
it 'supports escaped Unicode filenames' do
path = '/uploads/한글.png'
escaped = Addressable::URI.escape(path)
doc = filter(image(escaped))
expect(doc.at_css('img')['src']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
end
end
context 'without project repository access' do
let(:project) { create(:project, :repository, repository_access_level: ProjectFeature::PRIVATE) }
it_behaves_like 'rewrite project uploads'
end
context 'with project repository access' do
it_behaves_like 'rewrite project uploads'
end
end
context 'to a group upload' do
let(:upload_link) { link('/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg') }
let(:group) { create(:group) }
let(:project) { nil }
let(:relative_path) { "/groups/#{group.full_path}/-/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg" }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'rewrites the link correctly for subgroup' do
group.update!(parent: create(:group))
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
end
end
context 'to a personal snippet' do
let(:group) { nil }
let(:project) { nil }
let(:relative_path) { '/uploads/-/system/personal_snippet/6/674e4f07fbf0a7736c3439212896e51a/example.tar.gz' }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
context 'with a relative URL root' do
let(:gitlab_root) { '/gitlab' }
let(:absolute_path) { Gitlab.config.gitlab.url + gitlab_root + relative_path }
before do
stub_config_setting(relative_url_root: gitlab_root)
end
context 'with an absolute URL' do
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(gitlab_root + relative_path)
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Banzai::Filter::UploadLinkFilter do
def filter(doc, contexts = {})
contexts.reverse_merge!(
project: project,
group: group,
only_path: only_path
)
described_class.call(doc, contexts)
end
def image(path)
%(<img src="#{path}" />)
end
def video(path)
%(<video src="#{path}"></video>)
end
def audio(path)
%(<audio src="#{path}"></audio>)
end
def link(path)
%(<a href="#{path}">#{path}</a>)
end
def nested(element)
%(<div>#{element}</div>)
end
let_it_be(:project) { create(:project, :public) }
let_it_be(:user) { create(:user) }
let(:group) { nil }
let(:project_path) { project.full_path }
let(:only_path) { true }
let(:upload_path) { '/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg' }
let(:relative_path) { "/#{project.full_path}#{upload_path}" }
context 'to a project upload' do
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rebuilds relative URL for a link' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
doc = filter(nested(link(upload_path)))
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'rebuilds relative URL for an image' do
doc = filter(image(upload_path))
expect(doc.at_css('img')['src']).to eq(relative_path)
expect(doc.at_css('img').classes).to include('gfm')
doc = filter(nested(image(upload_path)))
expect(doc.at_css('img')['src']).to eq(relative_path)
expect(doc.at_css('img').classes).to include('gfm')
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
expect(doc.at_css('a').classes).not_to include('gfm')
end
it 'supports unescaped Unicode filenames' do
path = '/uploads/한글.png'
doc = filter(link(path))
expect(doc.at_css('a')['href']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
expect(doc.at_css('a').classes).to include('gfm')
end
it 'supports escaped Unicode filenames' do
path = '/uploads/한글.png'
escaped = Addressable::URI.escape(path)
doc = filter(image(escaped))
expect(doc.at_css('img')['src']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
expect(doc.at_css('img').classes).to include('gfm')
end
end
context 'to a group upload' do
let(:upload_link) { link('/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg') }
let_it_be(:group) { create(:group) }
let(:project) { nil }
let(:relative_path) { "/groups/#{group.full_path}/-/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg" }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'rewrites the link correctly for subgroup' do
group.update!(parent: create(:group))
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
expect(doc.at_css('a').classes).not_to include('gfm')
end
end
context 'to a personal snippet' do
let(:group) { nil }
let(:project) { nil }
let(:relative_path) { '/uploads/-/system/personal_snippet/6/674e4f07fbf0a7736c3439212896e51a/example.tar.gz' }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
context 'with a relative URL root' do
let(:gitlab_root) { '/gitlab' }
let(:absolute_path) { Gitlab.config.gitlab.url + gitlab_root + relative_path }
before do
stub_config_setting(relative_url_root: gitlab_root)
end
context 'with an absolute URL' do
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(gitlab_root + relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
expect(doc.at_css('a').classes).not_to include('gfm')
end
end
context 'invalid input' do
using RSpec::Parameterized::TableSyntax
where(:name, :href) do
'invalid URI' | '://foo'
'invalid UTF-8 byte sequences' | '%FF'
'garbled path' | 'open(/var/tmp/):%20/location%0Afrom:%20/test'
'whitespace' | "d18213acd3732630991986120e167e3d/Landscape_8.jpg\nand more"
end
with_them do
it { expect { filter(link("/uploads/#{href}")) }.not_to raise_error }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Banzai::Pipeline::PostProcessPipeline do
context 'when a document only has upload links' do
it 'does not make any Gitaly calls', :request_store do
markdown = <<-MARKDOWN.strip_heredoc
[Relative Upload Link](/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg)
![Relative Upload Image](/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg)
MARKDOWN
context = {
project: create(:project, :public, :repository),
ref: 'master'
}
Gitlab::GitalyClient.reset_counts
described_class.call(markdown, context)
expect(Gitlab::GitalyClient.get_request_count).to eq(0)
end
end
end
......@@ -10,8 +10,21 @@ module MarkdownMatchers
extend RSpec::Matchers::DSL
include Capybara::Node::Matchers
# RelativeLinkFilter
matcher :parse_relative_links do
# UploadLinkFilter
matcher :parse_upload_links do
set_default_markdown_messages
match do |actual|
link = actual.at_css('a:contains("Relative Upload Link")')
image = actual.at_css('img[alt="Relative Upload Image"]')
expect(link['href']).to eq("/#{project.full_path}/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg")
expect(image['data-src']).to eq("/#{project.full_path}/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg")
end
end
# RepositoryLinkFilter
matcher :parse_repository_links do
set_default_markdown_messages
match do |actual|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment