Commit 34ab6dfa authored by Brett Walker's avatar Brett Walker

Properly process footnotes in markdown

All the ids and classes were stripped.  Add them back in
and make ids unique
parent 1b3affaf
---
title: Footnotes now work render properly in markdown
merge_request: 24168
author:
type: fixed
# frozen_string_literal: true
module Banzai
module Filter
# HTML Filter for footnotes
#
# Footnotes are supported in CommonMark. However we were stripping
# the ids during sanitization. Those are now allowed.
#
# Footnotes are numbered the same - the first one has `id=fn1`, the
# second is `id=fn2`, etc. In order to allow footnotes when rendering
# multiple markdown blocks on a page, we need to make each footnote
# reference unique.
#
# This filter adds a random number to each footnote (the same number
# can be used for a single render). So you get `id=fn1-4335` and `id=fn2-4335`.
#
class FootnoteFilter < HTML::Pipeline::Filter
INTEGER_PATTERN = /\A\d+\Z/.freeze
def call
return doc unless first_footnote = doc.at_css('ol > li[id=fn1]')
# Sanitization stripped off the section wrapper - add it back in
first_footnote.parent.wrap('<section class="footnotes">')
doc.css('sup > a[id]').each do |link_node|
ref_num = link_node[:id].delete_prefix('fnref')
footnote_node = doc.at_css("li[id=fn#{ref_num}]")
backref_node = doc.at_css("li[id=fn#{ref_num}] a[href=\"#fnref#{ref_num}\"]")
if ref_num =~ INTEGER_PATTERN && footnote_node && backref_node
rand_ref_num = "#{ref_num}-#{random_number}"
link_node[:href] = "#fn#{rand_ref_num}"
link_node[:id] = "fnref#{rand_ref_num}"
footnote_node[:id] = "fn#{rand_ref_num}"
backref_node[:href] = "#fnref#{rand_ref_num}"
# Sanitization stripped off class - add it back in
link_node.parent.append_class('footnote-ref')
backref_node.append_class('footnote-backref')
end
end
doc
end
private
def random_number
@random_number ||= rand(10000)
end
end
end
end
......@@ -9,7 +9,9 @@ module Banzai
include Gitlab::Utils::StrongMemoize
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/
TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze
FOOTNOTE_LINK_REFERENCE_PATTERN = /\Afnref\d\z/.freeze
FOOTNOTE_LI_REFERENCE_PATTERN = /\Afn\d\z/.freeze
def whitelist
strong_memoize(:whitelist) do
......@@ -57,6 +59,13 @@ module Banzai
# Remove any `style` properties not required for table alignment
whitelist[:transformers].push(self.class.remove_unsafe_table_style)
# Allow `id` in a and li elements for footnotes
whitelist[:attributes]['a'].push('id')
whitelist[:attributes]['li'] = %w(id)
# ...but remove any `id` properties not matching for footnotes
whitelist[:transformers].push(self.class.remove_non_footnote_ids)
whitelist
end
......@@ -112,6 +121,20 @@ module Banzai
end
end
end
def remove_non_footnote_ids
lambda do |env|
node = env[:node]
return unless node.name == 'a' || node.name == 'li'
return unless node.has_attribute?('id')
return if node.name == 'a' && node['id'] =~ FOOTNOTE_LINK_REFERENCE_PATTERN
return if node.name == 'li' && node['id'] =~ FOOTNOTE_LI_REFERENCE_PATTERN
node.remove_attribute('id')
end
end
end
end
end
......
......@@ -30,6 +30,7 @@ module Banzai
Filter::AutolinkFilter,
Filter::ExternalLinkFilter,
Filter::SuggestionFilter,
Filter::FootnoteFilter,
*reference_filters,
......
# frozen_string_literal: true
require 'spec_helper'
describe Banzai::Filter::FootnoteFilter do
include FilterSpecHelper
# first[^1] and second[^second]
# [^1]: one
# [^second]: two
let(:footnote) do
<<-EOF.strip_heredoc
<p>first<sup><a href="#fn1" id="fnref1">1</a></sup> and second<sup><a href="#fn2" id="fnref2">2</a></sup></p>
<ol>
<li id="fn1">
<p>one <a href="#fnref1">↩</a></p>
</li>
<li id="fn2">
<p>two <a href="#fnref2">↩</a></p>
</li>
</ol>
EOF
end
context 'when footnotes exist' do
let(:doc) { filter(footnote) }
let(:link_node) { doc.css('sup > a').first }
let(:identifier) { link_node[:id].delete_prefix('fnref1-') }
it 'adds identifier to footnotes' do
expect(link_node[:id]).to eq "fnref1-#{identifier}"
expect(link_node[:href]).to eq "#fn1-#{identifier}"
expect(doc.css("li[id=fn1-#{identifier}]")).not_to be_empty
expect(doc.css("li[id=fn1-#{identifier}] a[href=\"#fnref1-#{identifier}\"]")).not_to be_empty
end
it 'uses the same identifier for all footnotes' do
expect(doc.css("li[id=fn2-#{identifier}]")).not_to be_empty
expect(doc.css("li[id=fn2-#{identifier}] a[href=\"#fnref2-#{identifier}\"]")).not_to be_empty
end
it 'adds section and classes' do
expect(doc.css("section[class=footnotes]")).not_to be_empty
expect(doc.css("sup[class=footnote-ref]").count).to eq 2
expect(doc.css("a[class=footnote-backref]").count).to eq 2
end
end
end
......@@ -246,7 +246,7 @@ describe Banzai::Filter::SanitizationFilter do
'protocol-based JS injection: spaces and entities' => {
input: '<a href=" &#14; javascript:alert(\'XSS\');">foo</a>',
output: '<a href>foo</a>'
output: '<a href="">foo</a>'
},
'protocol whitespace' => {
......@@ -300,5 +300,41 @@ describe Banzai::Filter::SanitizationFilter do
expect(act.to_html).to eq exp
end
describe 'footnotes' do
it 'allows id property on links' do
exp = %q{<a href="#fn1" id="fnref1">foo/bar.md</a>}
act = filter(exp)
expect(act.to_html).to eq exp
end
it 'allows id property on li element' do
exp = %q{<ol><li id="fn1">footnote</li></ol>}
act = filter(exp)
expect(act.to_html).to eq exp
end
it 'only allows valid footnote formats for links' do
exp = %q{<a href="#fn1">link</a>}
%w[fnrefx test xfnref1].each do |id|
act = filter(%Q{<a href="#fn1" id="#{id}">link</a>})
expect(act.to_html).to eq exp
end
end
it 'only allows valid footnote formats for li' do
exp = %q{<ol><li>footnote</li></ol>}
%w[fnx test xfn1].each do |id|
act = filter(%Q{<ol><li id="#{id}">footnote</li></ol>})
expect(act.to_html).to eq exp
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment