Commit 401ab708 authored by Robert Speicher's avatar Robert Speicher Committed by Lin Jen-Shin

Merge branch 'bvl-validate-urls-in-markdown-using-uri' into 'security'

Add correct `rel` attributes to external links when rendering markdown

See merge request !2086
parent 2e93947c
---
title: Validate URLs in markdown using URI to detect the host correctly
merge_request:
author:
......@@ -2,16 +2,17 @@ module Banzai
module Filter
# HTML Filter to modify the attributes of external links
class ExternalLinkFilter < HTML::Pipeline::Filter
SCHEMES = ['http', 'https', nil].freeze
def call
links.each do |node|
href = href_to_lowercase_scheme(node["href"].to_s)
uri = uri(node['href'].to_s)
next unless uri
unless node["href"].to_s == href
node.set_attribute('href', href)
end
node.set_attribute('href', uri.to_s)
if href =~ %r{\A(https?:)?//[^/]} && external_url?(href)
node.set_attribute('rel', 'nofollow noreferrer')
if SCHEMES.include?(uri.scheme) && external_url?(uri)
node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank')
end
end
......@@ -21,27 +22,26 @@ module Banzai
private
def uri(href)
URI.parse(href)
rescue URI::InvalidURIError
nil
end
def links
query = 'descendant-or-self::a[@href and not(@href = "")]'
doc.xpath(query)
end
def href_to_lowercase_scheme(href)
scheme_match = href.match(/\A(\w+):\/\//)
if scheme_match
scheme_match.to_s.downcase + scheme_match.post_match
else
href
end
end
def external_url?(uri)
# Relative URLs miss a hostname
return false unless uri.hostname
def external_url?(url)
!url.start_with?(internal_url)
uri.hostname != internal_url.hostname
end
def internal_url
@internal_url ||= Gitlab.config.gitlab.url
@internal_url ||= URI.parse(Gitlab.config.gitlab.url)
end
end
end
......
require 'spec_helper'
shared_examples 'an external link with rel attribute' do
it 'adds rel="nofollow" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
end
it 'adds rel="noopener" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noopener'
end
end
describe Banzai::Filter::ExternalLinkFilter, lib: true do
include FilterSpecHelper
......@@ -22,49 +39,51 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do
context 'for root links on document' do
let(:doc) { filter %q(<a href="https://google.com/">Google</a>) }
it 'adds rel="nofollow" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
it_behaves_like 'an external link with rel attribute'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
context 'for nested links on document' do
let(:doc) { filter %q(<p><a href="https://google.com/">Google</a></p>) }
it_behaves_like 'an external link with rel attribute'
end
context 'for invalid urls' do
it 'skips broken hrefs' do
doc = filter %q(<p><a href="don't crash on broken urls">Google</a></p>)
expected = %q(<p><a href="don't%20crash%20on%20broken%20urls">Google</a></p>)
expect(doc.to_html).to eq(expected)
end
end
context 'for nested links on document' do
let(:doc) { filter %q(<p><a href="https://google.com/">Google</a></p>) }
context 'for links with a username' do
context 'with a valid username' do
let(:doc) { filter %q(<a href="https://user@google.com/">Google</a>) }
it 'adds rel="nofollow" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
it_behaves_like 'an external link with rel attribute'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
context 'with an impersonated username' do
let(:internal) { Gitlab.config.gitlab.url }
let(:doc) { filter %Q(<a href="https://#{internal}@example.com" target="_blank">Reverse Tabnabbing</a>) }
it_behaves_like 'an external link with rel attribute'
end
end
context 'for non-lowercase scheme links' do
let(:doc_with_http) { filter %q(<p><a href="httP://google.com/">Google</a></p>) }
let(:doc_with_https) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) }
context 'with http' do
let(:doc) { filter %q(<p><a href="httP://google.com/">Google</a></p>) }
it 'adds rel="nofollow" to external links' do
expect(doc_with_http.at_css('a')).to have_attribute('rel')
expect(doc_with_https.at_css('a')).to have_attribute('rel')
expect(doc_with_http.at_css('a')['rel']).to include 'nofollow'
expect(doc_with_https.at_css('a')['rel']).to include 'nofollow'
it_behaves_like 'an external link with rel attribute'
end
it 'adds rel="noreferrer" to external links' do
expect(doc_with_http.at_css('a')).to have_attribute('rel')
expect(doc_with_https.at_css('a')).to have_attribute('rel')
context 'with https' do
let(:doc) { filter %q(<p><a href="hTTpS://google.com/">Google</a></p>) }
expect(doc_with_http.at_css('a')['rel']).to include 'noreferrer'
expect(doc_with_https.at_css('a')['rel']).to include 'noreferrer'
it_behaves_like 'an external link with rel attribute'
end
it 'skips internal links' do
......@@ -84,14 +103,6 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do
context 'for protocol-relative links' do
let(:doc) { filter %q(<p><a href="//google.com/">Google</a></p>) }
it 'adds rel="nofollow" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'nofollow'
end
it 'adds rel="noreferrer" to external links' do
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to include 'noreferrer'
end
it_behaves_like 'an external link with rel attribute'
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment