Commit aa2cc670 authored by Robert Speicher's avatar Robert Speicher

Add Gitlab::Markdown::AutolinkFilter

parent 2d9edcad
......@@ -34,9 +34,8 @@ module GitlabMarkdownHelper
# see https://github.com/vmg/redcarpet#darling-i-packed-you-a-couple-renderers-for-lunch
rend = Redcarpet::Render::GitlabHTML.new(self, user_color_scheme_class, {
safe_links_only: true,
# Handled further down the line by HTML::Pipeline::SanitizationFilter
escape_html: false
escape_html: false
}.merge(options))
# see https://github.com/vmg/redcarpet#and-its-like-really-simple-to-use
......@@ -44,7 +43,6 @@ module GitlabMarkdownHelper
no_intra_emphasis: true,
tables: true,
fenced_code_blocks: true,
autolink: true,
strikethrough: true,
lax_spacing: true,
space_after_headers: true,
......
......@@ -30,6 +30,7 @@ module Gitlab
# => "<img alt=\":trollface:\" class=\"emoji\" src=\"/images/trollface.png" title=\":trollface:\" />
module Markdown
# Provide autoload paths for filters to prevent a circular dependency error
autoload :AutolinkFilter, 'gitlab/markdown/autolink_filter'
autoload :CommitRangeReferenceFilter, 'gitlab/markdown/commit_range_reference_filter'
autoload :CommitReferenceFilter, 'gitlab/markdown/commit_reference_filter'
autoload :EmojiFilter, 'gitlab/markdown/emoji_filter'
......@@ -122,6 +123,7 @@ module Gitlab
Gitlab::Markdown::EmojiFilter,
Gitlab::Markdown::TableOfContentsFilter,
Gitlab::Markdown::AutolinkFilter,
Gitlab::Markdown::UserReferenceFilter,
Gitlab::Markdown::IssueReferenceFilter,
......@@ -130,7 +132,7 @@ module Gitlab
Gitlab::Markdown::SnippetReferenceFilter,
Gitlab::Markdown::CommitRangeReferenceFilter,
Gitlab::Markdown::CommitReferenceFilter,
Gitlab::Markdown::LabelReferenceFilter,
Gitlab::Markdown::LabelReferenceFilter
]
end
......
require 'html/pipeline/filter'
require 'uri'
module Gitlab
module Markdown
# HTML Filter for auto-linking URLs in HTML.
#
# Based on HTML::Pipeline::AutolinkFilter
#
# Context options:
# :autolink - Boolean, skips all processing done by this filter when false
# :link_attr - Hash of attributes for the generated links
#
class AutolinkFilter < HTML::Pipeline::Filter
include ActionView::Helpers::TagHelper
# Pattern to match text that should be autolinked.
#
# A URI scheme begins with a letter and may contain letters, numbers,
# plus, period and hyphen. Schemes are case-insensitive but we're being
# picky here and allowing only lowercase for autolinks.
#
# See http://en.wikipedia.org/wiki/URI_scheme
#
# The negative lookbehind ensures that users can paste a URL followed by a
# period or comma for punctuation without those characters being included
# in the generated link.
SCHEME_PATTERN = %r{([a-z][a-z0-9_\+\.\-]+:\/\/[^\s]+)(?<!,|\.)}
# Text matching SCHEME_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
def call
return doc if context[:autolink] == false
rinku_parse
text_parse
end
private
# Run the text through Rinku as a first pass
#
# This will quickly autolink http(s) and ftp links.
#
# `@doc` will be re-parsed with the HTML String from Rinku.
def rinku_parse
# Convert the options from a Hash to a String that Rinku expects
options = tag_options(link_options)
# NOTE: We don't parse email links because it will erroneously match
# external Commit and CommitRange references.
rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a)
# Rinku returns a String, so parse it back to a Nokogiri::XML::Document
# for further processing.
@doc = parse_html(rinku)
end
# Autolinks any text matching SCHEME_PATTERN that Rinku didn't already
# replace
def text_parse
search_text_nodes(doc).each do |node|
content = node.to_html
next if has_ancestor?(node, IGNORE_PARENTS)
next unless content.match(SCHEME_PATTERN)
# If Rinku didn't link this, there's probably a good reason, so we'll
# skip it too
next if content.start_with?(*%w(http https ftp))
html = autolink_filter(content)
next if html == content
node.replace(html)
end
doc
end
def autolink_filter(text)
text.gsub(SCHEME_PATTERN) do |match|
options = link_options.merge(href: match)
content_tag(:a, match, options)
end
end
def link_options
@link_options ||= context[:link_attr] || {}
end
end
end
end
......@@ -8,13 +8,8 @@ class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
@color_scheme = color_scheme
@project = @template.instance_variable_get("@project")
@options = options.dup
super options
end
def preprocess(full_document)
# Redcarpet doesn't allow SMB links when `safe_links_only` is enabled.
# FTP links are allowed, so we trick Redcarpet.
full_document.gsub("smb://", "ftp://smb:")
super(options)
end
# If project has issue number 39, apostrophe will be linked in
......@@ -25,6 +20,7 @@ class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
# This only influences regular text, code blocks are untouched.
def normal_text(text)
return text unless text.present?
text.gsub("'", "&rsquo;")
end
......@@ -37,7 +33,7 @@ class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
# so we assume you're not using leading spaces that aren't tabs,
# and just replace them here.
if lexer.tag == 'make'
code.gsub! /^ /, "\t"
code.gsub!(/^ /, "\t")
end
formatter = Rugments::Formatters::HTML.new(
......@@ -46,17 +42,13 @@ class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
formatter.format(lexer.lex(code))
end
def link(link, title, content)
h.link_to_gfm(content, link, title: title)
end
def postprocess(full_document)
full_document.gsub!("ftp://smb:", "smb://")
full_document.gsub!("&rsquo;", "'")
unless @template.instance_variable_get("@project_wiki") || @project.nil?
full_document = h.create_relative_links(full_document)
end
h.gfm_with_options(full_document, @options)
end
end
require 'spec_helper'
module Gitlab::Markdown
describe AutolinkFilter do
let(:link) { 'http://about.gitlab.com/' }
def filter(html, options = {})
described_class.call(html, options)
end
it 'does nothing when :autolink is false' do
exp = act = link
expect(filter(act, autolink: false).to_html).to eq exp
end
it 'does nothing with non-link text' do
exp = act = 'This text contains no links to autolink'
expect(filter(act).to_html).to eq exp
end
context 'Rinku schemes' do
it 'autolinks http' do
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link
expect(doc.at_css('a')['href']).to eq link
end
it 'autolinks https' do
link = 'https://google.com/'
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link
expect(doc.at_css('a')['href']).to eq link
end
it 'autolinks ftp' do
link = 'ftp://ftp.us.debian.org/debian/'
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link
expect(doc.at_css('a')['href']).to eq link
end
it 'accepts link_attr options' do
doc = filter("See #{link}", link_attr: {class: 'custom'})
expect(doc.at_css('a')['class']).to eq 'custom'
end
described_class::IGNORE_PARENTS.each do |elem|
it "ignores valid links contained inside '#{elem}' element" do
exp = act = "<#{elem}>See #{link}</#{elem}>"
expect(filter(act).to_html).to eq exp
end
end
end
context 'other schemes' do
let(:link) { 'foo://bar.baz/' }
it 'autolinks smb' do
link = 'smb:///Volumes/shared/foo.pdf'
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link
expect(doc.at_css('a')['href']).to eq link
end
it 'autolinks irc' do
link = 'irc://irc.freenode.net/git'
doc = filter("See #{link}")
expect(doc.at_css('a').text).to eq link
expect(doc.at_css('a')['href']).to eq link
end
it 'does not include trailing punctuation' do
doc = filter("See #{link}.")
expect(doc.at_css('a').text).to eq link
doc = filter("See #{link}, ok?")
expect(doc.at_css('a').text).to eq link
end
it 'accepts link_attr options' do
doc = filter("See #{link}", link_attr: {class: 'custom'})
expect(doc.at_css('a')['class']).to eq 'custom'
end
described_class::IGNORE_PARENTS.each do |elem|
it "ignores valid links contained inside '#{elem}' element" do
exp = act = "<#{elem}>See #{link}</#{elem}>"
expect(filter(act).to_html).to eq exp
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment