Commit 5fd6497a authored by Douwe Maan's avatar Douwe Maan Committed by Marin Jankovski

Merge branch 'rs-dont-follow-me' into 'master'

Add ExternalLinkFilter to Markdown pipeline

Forces a `rel="nofollow"` attribute on all external links.

Addresses internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2314#note_46525

See merge request !727
parent 4a66b5fc
...@@ -11,6 +11,7 @@ module Gitlab ...@@ -11,6 +11,7 @@ module Gitlab
autoload :CommitReferenceFilter, 'gitlab/markdown/commit_reference_filter' autoload :CommitReferenceFilter, 'gitlab/markdown/commit_reference_filter'
autoload :EmojiFilter, 'gitlab/markdown/emoji_filter' autoload :EmojiFilter, 'gitlab/markdown/emoji_filter'
autoload :ExternalIssueReferenceFilter, 'gitlab/markdown/external_issue_reference_filter' autoload :ExternalIssueReferenceFilter, 'gitlab/markdown/external_issue_reference_filter'
autoload :ExternalLinkFilter, 'gitlab/markdown/external_link_filter'
autoload :IssueReferenceFilter, 'gitlab/markdown/issue_reference_filter' autoload :IssueReferenceFilter, 'gitlab/markdown/issue_reference_filter'
autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter' autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter'
autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter' autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter'
...@@ -103,6 +104,7 @@ module Gitlab ...@@ -103,6 +104,7 @@ module Gitlab
Gitlab::Markdown::EmojiFilter, Gitlab::Markdown::EmojiFilter,
Gitlab::Markdown::TableOfContentsFilter, Gitlab::Markdown::TableOfContentsFilter,
Gitlab::Markdown::AutolinkFilter, Gitlab::Markdown::AutolinkFilter,
Gitlab::Markdown::ExternalLinkFilter,
Gitlab::Markdown::UserReferenceFilter, Gitlab::Markdown::UserReferenceFilter,
Gitlab::Markdown::IssueReferenceFilter, Gitlab::Markdown::IssueReferenceFilter,
......
require 'html/pipeline/filter'
module Gitlab
module Markdown
# HTML Filter to add a `rel="nofollow"` attribute to external links
#
class ExternalLinkFilter < HTML::Pipeline::Filter
def call
doc.search('a').each do |node|
next unless node.has_attribute?('href')
link = node.attribute('href').value
# Skip non-HTTP(S) links
next unless link.start_with?('http')
# Skip internal links
next if link.start_with?(internal_url)
node.set_attribute('rel', 'nofollow')
end
doc
end
private
def internal_url
@internal_url ||= Gitlab.config.gitlab.url
end
end
end
end
...@@ -145,7 +145,7 @@ describe 'GitLab Markdown' do ...@@ -145,7 +145,7 @@ describe 'GitLab Markdown' do
it 'removes `rel` attribute from links' do it 'removes `rel` attribute from links' do
body = get_section('sanitizationfilter') body = get_section('sanitizationfilter')
expect(body).not_to have_selector('a[rel]') expect(body).not_to have_selector('a[rel="bookmark"]')
end end
it "removes `href` from `a` elements if it's fishy" do it "removes `href` from `a` elements if it's fishy" do
...@@ -233,6 +233,18 @@ describe 'GitLab Markdown' do ...@@ -233,6 +233,18 @@ describe 'GitLab Markdown' do
end end
end end
describe 'ExternalLinkFilter' do
let(:links) { get_section('externallinkfilter').next_element }
it 'adds nofollow to external link' do
expect(links.css('a').first.to_html).to match 'nofollow'
end
it 'ignores internal link' do
expect(links.css('a').last.to_html).not_to match 'nofollow'
end
end
describe 'ReferenceFilter' do describe 'ReferenceFilter' do
it 'handles references in headers' do it 'handles references in headers' do
header = @doc.at_css('#reference-filters-eg-1').parent header = @doc.at_css('#reference-filters-eg-1').parent
......
...@@ -79,7 +79,7 @@ As permissive as it is, we've allowed even more stuff: ...@@ -79,7 +79,7 @@ As permissive as it is, we've allowed even more stuff:
<span>span tag</span> <span>span tag</span>
<a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a> <a href="#" rel="bookmark">This is a link with a defined rel attribute, which should be removed</a>
<a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a> <a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
...@@ -127,6 +127,13 @@ But it shouldn't autolink text inside certain tags: ...@@ -127,6 +127,13 @@ But it shouldn't autolink text inside certain tags:
- <a>http://about.gitlab.com/</a> - <a>http://about.gitlab.com/</a>
- <kbd>http://about.gitlab.com/</kbd> - <kbd>http://about.gitlab.com/</kbd>
### ExternalLinkFilter
External links get a `rel="nofollow"` attribute:
- [Google](https://google.com/)
- [GitLab Root](<%= Gitlab.config.gitlab.url %>)
### Reference Filters (e.g., #<%= issue.iid %>) ### Reference Filters (e.g., #<%= issue.iid %>)
References should be parseable even inside _!<%= merge_request.iid %>_ emphasis. References should be parseable even inside _!<%= merge_request.iid %>_ emphasis.
......
require 'spec_helper'
module Gitlab::Markdown
describe ExternalLinkFilter do
def filter(html, options = {})
described_class.call(html, options)
end
it 'ignores elements without an href attribute' do
exp = act = %q(<a id="ignored">Ignore Me</a>)
expect(filter(act).to_html).to eq exp
end
it 'ignores non-HTTP(S) links' do
exp = act = %q(<a href="irc://irc.freenode.net/gitlab">IRC</a>)
expect(filter(act).to_html).to eq exp
end
it 'skips internal links' do
internal = Gitlab.config.gitlab.url
exp = act = %Q(<a href="#{internal}/sign_in">Login</a>)
expect(filter(act).to_html).to eq exp
end
it 'adds rel="nofollow" to external links' do
act = %q(<a href="https://google.com/">Google</a>)
doc = filter(act)
expect(doc.at_css('a')).to have_attribute('rel')
expect(doc.at_css('a')['rel']).to eq 'nofollow'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment