Commit 74d83dec authored by Michael Kozono's avatar Michael Kozono

Merge branch 'preserve-original-url-in-markdown-rendering' into 'master'

Preserve source URL in Markdown render output

See merge request gitlab-org/gitlab!64032
parents 5390d2ab d480e725
......@@ -45,13 +45,15 @@ module Banzai
return
end
html_attr.value =
path =
if context[:only_path]
path
else
Addressable::URI.join(Gitlab.config.gitlab.base_url, path).to_s
end
replace_html_attr_value(html_attr, path)
if html_attr.name == 'href'
html_attr.parent.set_attribute('data-link', 'true')
end
......@@ -59,6 +61,21 @@ module Banzai
html_attr.parent.add_class('gfm')
end
def replace_html_attr_value(html_attr, path)
if path != html_attr.value
preserve_original_link(html_attr, html_attr.parent)
end
html_attr.value = path
end
def preserve_original_link(html_attr, node)
return if html_attr.blank?
return if node.value?('data-canonical-src')
node.set_attribute('data-canonical-src', html_attr.value)
end
def group
context[:group]
end
......
......@@ -36,7 +36,7 @@ module Banzai
protected
def process_link(link_attr, node)
process_link_attr(link_attr)
process_link_attr(link_attr, node)
remove_unsafe_links({ node: node }, remove_invalid_links: false)
end
......@@ -44,14 +44,27 @@ module Banzai
!context[:wiki].nil?
end
def process_link_attr(html_attr)
def process_link_attr(html_attr, node)
return if html_attr.blank?
html_attr.value = apply_rewrite_rules(html_attr.value)
rewritten_value = apply_rewrite_rules(html_attr.value)
if html_attr.value != rewritten_value
preserve_original_link(html_attr, node)
end
html_attr.value = rewritten_value
rescue URI::Error, Addressable::URI::InvalidURIError
# noop
end
def preserve_original_link(html_attr, node)
return if html_attr.blank?
return if node.value?('data-canonical-src')
node.set_attribute('data-canonical-src', html_attr.value)
end
def apply_rewrite_rules(link_string)
Rewriter.new(link_string, wiki: context[:wiki], slug: context[:page_slug]).apply_rules
end
......
......@@ -6,7 +6,7 @@ module Gitlab
module Utils
module SanitizeNodeLink
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
ATTRS_TO_SANITIZE = %w(href src data-src).freeze
ATTRS_TO_SANITIZE = %w(href src data-src data-canonical-src).freeze
def remove_unsafe_links(env, remove_invalid_links: true)
node = env[:node]
......
......@@ -42,6 +42,12 @@ RSpec.describe Banzai::Filter::UploadLinkFilter do
let(:upload_path) { '/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg' }
let(:relative_path) { "/#{project.full_path}#{upload_path}" }
it 'preserves original url in data-canonical-src attribute' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['data-canonical-src']).to eq(upload_path)
end
context 'to a project upload' do
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
......
......@@ -22,6 +22,24 @@ RSpec.describe Banzai::Filter::WikiLinkFilter do
expect(filtered_link.attribute('href').value).to eq('/uploads/a.test')
end
describe 'when links are rewritable' do
it "stores original url in the data-canonical-src attribute" do
original_path = "#{repository_upload_folder}/a.jpg"
filtered_elements = filter("<a href='#{original_path}'><img src='#{original_path}'>example</img></a>", wiki: wiki)
expect(filtered_elements.search('img').first.attribute('data-canonical-src').value).to eq(original_path)
expect(filtered_elements.search('a').first.attribute('data-canonical-src').value).to eq(original_path)
end
end
describe 'when links are not rewritable' do
it "does not store original url in the data-canonical-src attribute" do
filtered_link = filter("<a href='/uploads/a.test'>Link</a>", wiki: wiki).children[0]
expect(filtered_link.value?('data-canonical-src')).to eq(false)
end
end
describe 'when links point to the relative wiki path' do
it 'does not rewrite links' do
path = "#{wiki.wiki_base_path}/#{repository_upload_folder}/a.jpg"
......
......@@ -56,7 +56,7 @@ RSpec.describe Emails::Releases do
let(:release) { create(:release, project: project, description: "Attachment: [Test file](#{upload_path})") }
it 'renders absolute links' do
is_expected.to have_body_text(%Q(<a href="#{project.web_url}#{upload_path}" data-link="true" class="gfm">Test file</a>))
is_expected.to have_body_text(%Q(<a href="#{project.web_url}#{upload_path}" data-canonical-src="#{upload_path}" data-link="true" class="gfm">Test file</a>))
end
end
end
......
......@@ -199,7 +199,7 @@ RSpec.describe Emails::ServiceDesk do
let_it_be(:note) { create(:note_on_issue, noteable: issue, project: project, note: "a new comment with [file](#{upload_path})") }
let(:template_content) { 'some text %{ NOTE_TEXT }' }
let(:expected_body) { %Q(some text a new comment with <a href="#{project.web_url}#{upload_path}" data-link="true" class="gfm">file</a>) }
let(:expected_body) { %Q(some text a new comment with <a href="#{project.web_url}#{upload_path}" data-canonical-src="#{upload_path}" data-link="true" class="gfm">file</a>) }
it_behaves_like 'handle template content', 'new_note'
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment