Commit 40c8295a authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'rs-disallow-id-class' into 'master'

Remove class and id attributes from SanitizationFilter whitelist

Closes internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2302

See merge request !631
parents 15bbb4ee 70bbf093
...@@ -423,7 +423,7 @@ Quote break. ...@@ -423,7 +423,7 @@ Quote break.
You can also use raw HTML in your Markdown, and it'll mostly work pretty well. You can also use raw HTML in your Markdown, and it'll mostly work pretty well.
See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements, as well as the `class`, and `id` attributes on all elements. See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements.
```no-highlight ```no-highlight
<dl> <dl>
......
...@@ -10,8 +10,9 @@ module Gitlab ...@@ -10,8 +10,9 @@ module Gitlab
def whitelist def whitelist
whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST
# Allow `class` and `id` on all elements # Allow code highlighting
whitelist[:attributes][:all].push('class', 'id') whitelist[:attributes]['pre'] = %w(class)
whitelist[:attributes]['span'] = %w(class)
# Allow table alignment # Allow table alignment
whitelist[:attributes]['th'] = %w(style) whitelist[:attributes]['th'] = %w(style)
...@@ -23,6 +24,9 @@ module Gitlab ...@@ -23,6 +24,9 @@ module Gitlab
# Remove `rel` attribute from `a` elements # Remove `rel` attribute from `a` elements
whitelist[:transformers].push(remove_rel) whitelist[:transformers].push(remove_rel)
# Remove `class` attribute from non-highlight spans
whitelist[:transformers].push(clean_spans)
whitelist whitelist
end end
...@@ -33,6 +37,17 @@ module Gitlab ...@@ -33,6 +37,17 @@ module Gitlab
end end
end end
end end
def clean_spans
lambda do |env|
return unless env[:node_name] == 'span'
return unless env[:node].has_attribute?('class')
unless has_ancestor?(env[:node], 'pre')
env[:node].remove_attribute('class')
end
end
end
end end
end end
end end
...@@ -60,8 +60,8 @@ describe 'GitLab Markdown' do ...@@ -60,8 +60,8 @@ describe 'GitLab Markdown' do
@feat.teardown @feat.teardown
end end
# Given a header ID, goes to that element's parent (the header), then to its # Given a header ID, goes to that element's parent (the header itself), then
# second sibling (the body). # its next sibling element (the body).
def get_section(id) def get_section(id)
@doc.at_css("##{id}").parent.next_element @doc.at_css("##{id}").parent.next_element
end end
...@@ -119,18 +119,18 @@ describe 'GitLab Markdown' do ...@@ -119,18 +119,18 @@ describe 'GitLab Markdown' do
describe 'HTML::Pipeline' do describe 'HTML::Pipeline' do
describe 'SanitizationFilter' do describe 'SanitizationFilter' do
it 'uses a permissive whitelist' do it 'uses a permissive whitelist' do
expect(@doc).to have_selector('b#manual-b') expect(@doc).to have_selector('b:contains("b tag")')
expect(@doc).to have_selector('em#manual-em') expect(@doc).to have_selector('em:contains("em tag")')
expect(@doc).to have_selector("code#manual-code") expect(@doc).to have_selector('code:contains("code tag")')
expect(@doc).to have_selector('kbd:contains("s")') expect(@doc).to have_selector('kbd:contains("s")')
expect(@doc).to have_selector('strike:contains(Emoji)') expect(@doc).to have_selector('strike:contains(Emoji)')
expect(@doc).to have_selector('img#manual-img') expect(@doc).to have_selector('img[src*="smile.png"]')
expect(@doc).to have_selector('br#manual-br') expect(@doc).to have_selector('br')
expect(@doc).to have_selector('hr#manual-hr') expect(@doc).to have_selector('hr')
end end
it 'permits span elements' do it 'permits span elements' do
expect(@doc).to have_selector('span#span-class-light.light') expect(@doc).to have_selector('span:contains("span tag")')
end end
it 'permits table alignment' do it 'permits table alignment' do
...@@ -144,13 +144,12 @@ describe 'GitLab Markdown' do ...@@ -144,13 +144,12 @@ describe 'GitLab Markdown' do
end end
it 'removes `rel` attribute from links' do it 'removes `rel` attribute from links' do
expect(@doc).to have_selector('a#a-rel-nofollow') body = get_section('sanitizationfilter')
expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]') expect(body).not_to have_selector('a[rel]')
end end
it "removes `href` from `a` elements if it's fishy" do it "removes `href` from `a` elements if it's fishy" do
expect(@doc).to have_selector('a#a-href-javascript') expect(@doc).not_to have_selector('a[href*="javascript"]')
expect(@doc).not_to have_selector('a#a-href-javascript[href]')
end end
end end
...@@ -228,7 +227,8 @@ describe 'GitLab Markdown' do ...@@ -228,7 +227,8 @@ describe 'GitLab Markdown' do
%w(code a kbd).each do |elem| %w(code a kbd).each do |elem|
it "ignores links inside '#{elem}' element" do it "ignores links inside '#{elem}' element" do
expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text body = get_section('autolinkfilter')
expect(body).not_to have_selector("#{elem} a")
end end
end end
end end
......
...@@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through... ...@@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through...
### SanitizationFilter ### SanitizationFilter
GitLab uses <a href="http://git.io/vfW8a" class="sanitize" id="sanitize-link">HTML::Pipeline::SanitizationFilter</a> GitLab uses <a href="http://git.io/vfW8a">HTML::Pipeline::SanitizationFilter</a>
to sanitize the generated HTML, stripping dangerous or unwanted tags. to sanitize the generated HTML, stripping dangerous or unwanted tags.
Its default whitelist is pretty permissive. Check it: Its default whitelist is pretty permissive. Check it:
<b id="manual-b">This text is bold</b> and <em id="manual-em">this text is emphasized</em>. <b>b tag</b> and <em>em tag</em>.
<code id="manual-code">echo "Hello, world!"</code> <code>code tag</code>
Press <kbd>s</kbd> to search. Press <kbd>s</kbd> to search.
<strike>Emoji</strike> Plain old images! <img <strike>Emoji</strike> Plain old images! <img src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20" height="20" />
src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20"
height="20" id="manual-img" />
Here comes a line break: Here comes a line break:
<br id="manual-br" /> <br />
And a horizontal rule: And a horizontal rule:
<hr id="manual-hr" /> <hr />
As permissive as it is, we've allowed even more stuff: As permissive as it is, we've allowed even more stuff:
<span class="light" id="span-class-light">Span elements</span> <span>span tag</span>
<a href="#" rel="nofollow" id="a-rel-nofollow">This is a link with a defined rel attribute, which should be removed</a> <a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a>
<a href="javascript:alert('Hi')" id="a-href-javascript">This is a link trying to be sneaky. It gets its link removed entirely.</a> <a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
### Escaping ### Escaping
...@@ -125,9 +123,9 @@ These are all plain text that should get turned into links: ...@@ -125,9 +123,9 @@ These are all plain text that should get turned into links:
But it shouldn't autolink text inside certain tags: But it shouldn't autolink text inside certain tags:
- <code id="autolink-code">http://about.gitlab.com/</code> - <code>http://about.gitlab.com/</code>
- <a id="autolink-a">http://about.gitlab.com/</a> - <a>http://about.gitlab.com/</a>
- <kbd id="autolink-kbd">http://about.gitlab.com/</kbd> - <kbd>http://about.gitlab.com/</kbd>
### Reference Filters (e.g., #<%= issue.iid %>) ### Reference Filters (e.g., #<%= issue.iid %>)
......
...@@ -29,17 +29,27 @@ module Gitlab::Markdown ...@@ -29,17 +29,27 @@ module Gitlab::Markdown
exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>" exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>"
expect(filter(act).to_html).to eq exp expect(filter(act).to_html).to eq exp
end end
it 'sanitizes `class` attribute on any element' do
act = %q{<strong class="foo">Strong</strong>}
expect(filter(act).to_html).to eq %q{<strong>Strong</strong>}
end
it 'sanitizes `id` attribute on any element' do
act = %q{<em id="foo">Emphasis</em>}
expect(filter(act).to_html).to eq %q{<em>Emphasis</em>}
end
end end
describe 'custom whitelist' do describe 'custom whitelist' do
it 'allows `class` attribute on any element' do it 'allows syntax highlighting' do
exp = act = %q{<strong class="foo">Strong</strong>} exp = act = %q{<pre class="code highlight white c"><code><span class="k">def</span></code></pre>}
expect(filter(act).to_html).to eq exp expect(filter(act).to_html).to eq exp
end end
it 'allows `id` attribute on any element' do it 'sanitizes `class` attribute from non-highlight spans' do
exp = act = %q{<em id="foo">Emphasis</em>} act = %q{<span class="k">def</span>}
expect(filter(act).to_html).to eq exp expect(filter(act).to_html).to eq %q{<span>def</span>}
end end
it 'allows `style` attribute on table elements' do it 'allows `style` attribute on table elements' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment