Commit 382a0aa6 authored by Robert Speicher's avatar Robert Speicher

Add Gitlab::Markdown::TableOfContentsFilter

Removes header and table of contents processing from Redcarpet renderer.
parent 286c9e68
...@@ -34,7 +34,6 @@ module GitlabMarkdownHelper ...@@ -34,7 +34,6 @@ module GitlabMarkdownHelper
# see https://github.com/vmg/redcarpet#darling-i-packed-you-a-couple-renderers-for-lunch # see https://github.com/vmg/redcarpet#darling-i-packed-you-a-couple-renderers-for-lunch
rend = Redcarpet::Render::GitlabHTML.new(self, user_color_scheme_class, { rend = Redcarpet::Render::GitlabHTML.new(self, user_color_scheme_class, {
with_toc_data: true,
safe_links_only: true, safe_links_only: true,
# Handled further down the line by HTML::Pipeline::SanitizationFilter # Handled further down the line by HTML::Pipeline::SanitizationFilter
escape_html: false escape_html: false
......
...@@ -38,6 +38,7 @@ module Gitlab ...@@ -38,6 +38,7 @@ module Gitlab
autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter' autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter'
autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter' autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter'
autoload :SnippetReferenceFilter, 'gitlab/markdown/snippet_reference_filter' autoload :SnippetReferenceFilter, 'gitlab/markdown/snippet_reference_filter'
autoload :TableOfContentsFilter, 'gitlab/markdown/table_of_contents_filter'
autoload :UserReferenceFilter, 'gitlab/markdown/user_reference_filter' autoload :UserReferenceFilter, 'gitlab/markdown/user_reference_filter'
# Public: Parse the provided text with GitLab-Flavored Markdown # Public: Parse the provided text with GitLab-Flavored Markdown
...@@ -81,6 +82,9 @@ module Gitlab ...@@ -81,6 +82,9 @@ module Gitlab
asset_root: Gitlab.config.gitlab.url, asset_root: Gitlab.config.gitlab.url,
asset_host: Gitlab::Application.config.asset_host, asset_host: Gitlab::Application.config.asset_host,
# TableOfContentsFilter
no_header_anchors: options[:no_header_anchors],
# ReferenceFilter # ReferenceFilter
current_user: current_user, current_user: current_user,
only_path: options[:reference_only_path], only_path: options[:reference_only_path],
...@@ -117,6 +121,7 @@ module Gitlab ...@@ -117,6 +121,7 @@ module Gitlab
HTML::Pipeline::SanitizationFilter, HTML::Pipeline::SanitizationFilter,
Gitlab::Markdown::EmojiFilter, Gitlab::Markdown::EmojiFilter,
Gitlab::Markdown::TableOfContentsFilter,
Gitlab::Markdown::UserReferenceFilter, Gitlab::Markdown::UserReferenceFilter,
Gitlab::Markdown::IssueReferenceFilter, Gitlab::Markdown::IssueReferenceFilter,
......
require 'html/pipeline/filter'
module Gitlab
module Markdown
# HTML filter that adds an anchor child element to all Headers in a
# document, so that they can be linked to.
#
# Generates the Table of Contents with links to each header. See Results.
#
# Based on HTML::Pipeline::TableOfContentsFilter.
#
# Context options:
# :no_header_anchors - Skips all processing done by this filter.
#
# Results:
# :toc - String containing Table of Contents data as a `ul` element with
# `li` child elements.
class TableOfContentsFilter < HTML::Pipeline::Filter
PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u
def call
return doc if context[:no_header_anchors]
result[:toc] = ""
headers = Hash.new(0)
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
text = node.text
id = text.downcase
id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
id.gsub!(' ', '-') # replace spaces with dash
id.squeeze!(' -') # replace multiple spaces or dashes with one
uniq = (headers[id] > 0) ? "-#{headers[id]}" : ''
headers[id] += 1
if header_content = node.children.first
href = "#{id}#{uniq}"
push_toc(href, text)
header_content.add_previous_sibling(anchor_tag(href))
end
end
result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty?
doc
end
private
def anchor_tag(href)
%Q{<a id="#{href}" class="anchor" href="##{href}" aria-hidden="true"></a>}
end
def push_toc(href, text)
result[:toc] << %Q{<li><a href="##{href}">#{text}</a></li>\n}
end
end
end
end
...@@ -50,16 +50,6 @@ class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML ...@@ -50,16 +50,6 @@ class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
h.link_to_gfm(content, link, title: title) h.link_to_gfm(content, link, title: title)
end end
def header(text, level)
if @options[:no_header_anchors]
"<h#{level}>#{text}</h#{level}>"
else
id = ActionController::Base.helpers.strip_tags(h.gfm(text)).downcase() \
.gsub(/[^a-z0-9_-]/, '-').gsub(/-+/, '-').gsub(/^-/, '').gsub(/-$/, '')
"<h#{level} id=\"#{id}\">#{text}<a href=\"\##{id}\"></a></h#{level}>"
end
end
def postprocess(full_document) def postprocess(full_document)
full_document.gsub!("ftp://smb:", "smb://") full_document.gsub!("ftp://smb:", "smb://")
......
...@@ -207,23 +207,6 @@ describe GitlabMarkdownHelper do ...@@ -207,23 +207,6 @@ describe GitlabMarkdownHelper do
end end
describe "#markdown" do describe "#markdown" do
# TODO (rspeicher) - This block tests multiple different contexts. Break this up!
it "should add ids and links to headers" do
# Test every rule except nested tags.
text = '..Ab_c-d. e..'
id = 'ab_c-d-e'
expect(markdown("# #{text}")).
to match(%r{<h1 id="#{id}">#{text}<a href="[^"]*##{id}"></a></h1>})
expect(markdown("# #{text}", {no_header_anchors:true})).
to eq("<h1>#{text}</h1>")
id = 'link-text'
expect(markdown("# [link text](url) ![img alt](url)")).to match(
%r{<h1 id="#{id}"><a href="[^"]*url">link text</a> <img[^>]*><a href="[^"]*##{id}"></a></h1>}
)
end
# REFERENCES (PART TWO: THE REVENGE) --------------------------------------- # REFERENCES (PART TWO: THE REVENGE) ---------------------------------------
it "should handle references in headers" do it "should handle references in headers" do
......
# encoding: UTF-8
require 'spec_helper'
module Gitlab::Markdown
describe TableOfContentsFilter do
def filter(html, options = {})
described_class.call(html, options)
end
def header(level, text)
"<h#{level}>#{text}</h#{level}>\n"
end
it 'does nothing when :no_header_anchors is truthy' do
exp = act = header(1, 'Header')
expect(filter(act, no_header_anchors: 1).to_html).to eq exp
end
it 'does nothing with empty headers' do
exp = act = header(1, nil)
expect(filter(act).to_html).to eq exp
end
1.upto(6) do |i|
it "processes h#{i} elements" do
html = header(i, "Header #{i}")
doc = filter(html)
expect(doc.css("h#{i} a").first.attr('id')).to eq "header-#{i}"
end
end
describe 'anchor tag' do
it 'has an `anchor` class' do
doc = filter(header(1, 'Header'))
expect(doc.css('h1 a').first.attr('class')).to eq 'anchor'
end
it 'links to the id' do
doc = filter(header(1, 'Header'))
expect(doc.css('h1 a').first.attr('href')).to eq '#header'
end
describe 'generated IDs' do
it 'translates spaces to dashes' do
doc = filter(header(1, 'This header has spaces in it'))
expect(doc.css('h1 a').first.attr('id')).to eq 'this-header-has-spaces-in-it'
end
it 'squeezes multiple spaces and dashes' do
doc = filter(header(1, 'This---header is poorly-formatted'))
expect(doc.css('h1 a').first.attr('id')).to eq 'this-header-is-poorly-formatted'
end
it 'removes punctuation' do
doc = filter(header(1, "This, header! is, filled. with @ punctuation?"))
expect(doc.css('h1 a').first.attr('id')).to eq 'this-header-is-filled-with-punctuation'
end
it 'appends a unique number to duplicates' do
doc = filter(header(1, 'One') + header(2, 'One'))
expect(doc.css('h1 a').first.attr('id')).to eq 'one'
expect(doc.css('h2 a').first.attr('id')).to eq 'one-1'
end
it 'supports Unicode' do
doc = filter(header(1, '한글'))
expect(doc.css('h1 a').first.attr('id')).to eq '한글'
expect(doc.css('h1 a').first.attr('href')).to eq '#한글'
end
end
end
describe 'result' do
def result(html)
HTML::Pipeline.new([described_class]).call(html)
end
let(:results) { result(header(1, 'Header 1') + header(2, 'Header 2')) }
let(:doc) { Nokogiri::XML::DocumentFragment.parse(results[:toc]) }
it 'is contained within a `ul` element' do
expect(doc.children.first.name).to eq 'ul'
expect(doc.children.first.attr('class')).to eq 'section-nav'
end
it 'contains an `li` element for each header' do
expect(doc.css('li').length).to eq 2
links = doc.css('li a')
expect(links.first.attr('href')).to eq '#header-1'
expect(links.first.text).to eq 'Header 1'
expect(links.last.attr('href')).to eq '#header-2'
expect(links.last.text).to eq 'Header 2'
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment