Commit 401ab708 authored by Robert Speicher's avatar Robert Speicher Committed by Lin Jen-Shin

Merge branch 'bvl-validate-urls-in-markdown-using-uri' into 'security'

Add correct `rel` attributes to external links when rendering markdown

See merge request !2086
parent 2e93947c
---
title: Validate URLs in markdown using URI to detect the host correctly
merge_request:
author:
...@@ -2,16 +2,17 @@ module Banzai ...@@ -2,16 +2,17 @@ module Banzai
module Filter module Filter
# HTML Filter to modify the attributes of external links # HTML Filter to modify the attributes of external links
class ExternalLinkFilter < HTML::Pipeline::Filter class ExternalLinkFilter < HTML::Pipeline::Filter
SCHEMES = ['http', 'https', nil].freeze
def call def call
links.each do |node| links.each do |node|
href = href_to_lowercase_scheme(node["href"].to_s) uri = uri(node['href'].to_s)
next unless uri
unless node["href"].to_s == href node.set_attribute('href', uri.to_s)
node.set_attribute('href', href)
end
if href =~ %r{\A(https?:)?//[^/]} && external_url?(href) if SCHEMES.include?(uri.scheme) && external_url?(uri)
node.set_attribute('rel', 'nofollow noreferrer') node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank') node.set_attribute('target', '_blank')
end end
end end
...@@ -21,27 +22,26 @@ module Banzai ...@@ -21,27 +22,26 @@ module Banzai
private private
def uri(href)
URI.parse(href)
rescue URI::InvalidURIError
nil
end
def links def links
query = 'descendant-or-self::a[@href and not(@href = "")]' query = 'descendant-or-self::a[@href and not(@href = "")]'
doc.xpath(query) doc.xpath(query)
end end
def href_to_lowercase_scheme(href) def external_url?(uri)
scheme_match = href.match(/\A(\w+):\/\//) # Relative URLs miss a hostname
return false unless uri.hostname
if scheme_match
scheme_match.to_s.downcase + scheme_match.post_match
else
href
end
end
def external_url?(url) uri.hostname != internal_url.hostname
!url.start_with?(internal_url)
end end
def internal_url def internal_url
@internal_url ||= Gitlab.config.gitlab.url @internal_url ||= URI.parse(Gitlab.config.gitlab.url)
end end
end end
end end
......
require 'spec_helper' require 'spec_helper'
shared_examples 'an external link with rel attribute' do
it 'adds rel="nofollow" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
end
it 'adds rel="noopener" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noopener'
end
end
describe Banzai::Filter::ExternalLinkFilter, lib: true do describe Banzai::Filter::ExternalLinkFilter, lib: true do
include FilterSpecHelper include FilterSpecHelper
...@@ -22,49 +39,51 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do ...@@ -22,49 +39,51 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do
context 'for root links on document' do context 'for root links on document' do
let(:doc) { filter %q(<a href="https://google.com/">Google</a>) } let(:doc) { filter %q(<a href="https://google.com/">Google</a>) }
it 'adds rel="nofollow" to external links' do it_behaves_like 'an external link with rel attribute'
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
end
end end
context 'for nested links on document' do context 'for nested links on document' do
let(:doc) { filter %q(<p><a href="https://google.com/">Google</a></p>) } let(:doc) { filter %q(<p><a href="https://google.com/">Google</a></p>) }
it 'adds rel="nofollow" to external links' do it_behaves_like 'an external link with rel attribute'
expect(doc.at_css('a')).to have_attribute('rel') end
expect(doc.at_css('a')['rel']).to include 'nofollow'
context 'for invalid urls' do
it 'skips broken hrefs' do
doc = filter %q(<p><a href="don't crash on broken urls">Google</a></p>)
expected = %q(<p><a href="don't%20crash%20on%20broken%20urls">Google</a></p>)
expect(doc.to_html).to eq(expected)
end end
end
context 'for links with a username' do
context 'with a valid username' do
let(:doc) { filter %q(<a href="https://user@google.com/">Google</a>) }
it 'adds rel="noreferrer" to external links' do it_behaves_like 'an external link with rel attribute'
expect(doc.at_css('a')).to have_attribute('rel') end
expect(doc.at_css('a')['rel']).to include 'noreferrer'
context 'with an impersonated username' do
let(:internal) { Gitlab.config.gitlab.url }
let(:doc) { filter %Q(<a href="https://#{internal}@example.com" target="_blank">Reverse Tabnabbing</a>) }
it_behaves_like 'an external link with rel attribute'
end end
end end
context 'for non-lowercase scheme links' do context 'for non-lowercase scheme links' do
let(:doc_with_http) { filter %q(<p><a href="httP://google.com/">Google</a></p>) } context 'with http' do
let(:doc_with_https) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) } let(:doc) { filter %q(<p><a href="httP://google.com/">Google</a></p>) }
it 'adds rel="nofollow" to external links' do
expect(doc_with_http.at_css('a')).to have_attribute('rel')
expect(doc_with_https.at_css('a')).to have_attribute('rel')
expect(doc_with_http.at_css('a')['rel']).to include 'nofollow' it_behaves_like 'an external link with rel attribute'
expect(doc_with_https.at_css('a')['rel']).to include 'nofollow'
end end
it 'adds rel="noreferrer" to external links' do context 'with https' do
expect(doc_with_http.at_css('a')).to have_attribute('rel') let(:doc) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) }
expect(doc_with_https.at_css('a')).to have_attribute('rel')
expect(doc_with_http.at_css('a')['rel']).to include 'noreferrer' it_behaves_like 'an external link with rel attribute'
expect(doc_with_https.at_css('a')['rel']).to include 'noreferrer'
end end
it 'skips internal links' do it 'skips internal links' do
...@@ -84,14 +103,6 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do ...@@ -84,14 +103,6 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do
context 'for protocol-relative links' do context 'for protocol-relative links' do
let(:doc) { filter %q(<p><a href="//google.com/">Google</a></p>) } let(:doc) { filter %q(<p><a href="//google.com/">Google</a></p>) }
it 'adds rel="nofollow" to external links' do it_behaves_like 'an external link with rel attribute'
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment