Commit 286c9e68 authored by Robert Speicher's avatar Robert Speicher

Add a feature spec for our entire Markdown parsing stack

parent 2f4426b7
require 'spec_helper'
require 'erb'
# This feature spec is intended to be a comprehensive exercising of all of
# GitLab's non-standard Markdown parsing and the integration thereof.
#
# These tests should be very high-level. Anything low-level belongs in the specs
# for the corresponding HTML::Pipeline filter or helper method.
#
# The idea is to pass a Markdown document through our entire processing stack.
#
# The process looks like this:
#
# Raw Markdown
# -> `markdown` helper
# -> Redcarpet::Render::GitlabHTML converts Markdown to HTML
# -> Post-process HTML
# -> `gfm_with_options` helper
# -> HTML::Pipeline
# -> Sanitize
# -> Emoji
# -> Table of Contents
# -> Autolinks
# -> Rinku (http, https, ftp)
# -> Other schemes
# -> References
# -> `html_safe`
# -> Template
#
# See the MarkdownFeature class for setup details.
describe 'GitLab Markdown' do
include ActionView::Helpers::TagHelper
include ActionView::Helpers::UrlHelper
include Capybara::Node::Matchers
include GitlabMarkdownHelper
# `markdown` calls these two methods
def current_user
@feat.user
end
def user_color_scheme_class
:white
end
# Let's only parse this thing once
before(:all) do
@feat = MarkdownFeature.new
# `markdown` expects a `@project` variable
@project = @feat.project
@md = markdown(@feat.raw_markdown)
@doc = Nokogiri::HTML::DocumentFragment.parse(@md)
end
after(:all) do
@feat.teardown
end
# Given a header ID, goes to that element's parent (the header), then to its
# second sibling (the body).
def get_section(id)
@doc.at_css("##{id}").parent.next.next
end
# it 'writes to a file' do
# File.open(Rails.root.join('tmp/capybara/markdown_spec.html'), 'w') do |file|
# file.puts @md
# end
# end
describe 'Markdown' do
describe 'No Intra Emphasis' do
it 'does not parse emphasis inside of words' do
body = get_section('no-intra-emphasis')
expect(body.to_html).not_to match('foo<em>bar</em>baz')
end
end
describe 'Tables' do
it 'parses table Markdown' do
body = get_section('tables')
expect(body).to have_selector('th:contains("Header")')
expect(body).to have_selector('th:contains("Row")')
expect(body).to have_selector('th:contains("Example")')
end
it 'allows Markdown in tables' do
expect(@doc.at_css('td:contains("Baz")').children.to_html).
to eq '<strong>Baz</strong>'
end
end
describe 'Fenced Code Blocks' do
it 'parses fenced code blocks' do
expect(@doc).to have_selector('pre.code.highlight.white.c')
expect(@doc).to have_selector('pre.code.highlight.white.python')
end
end
describe 'Strikethrough' do
it 'parses strikethroughs' do
expect(@doc).to have_selector(%{del:contains("and this text doesn't")})
end
end
describe 'Superscript' do
it 'parses superscript' do
body = get_section('superscript')
expect(body.to_html).to match('1<sup>st</sup>')
expect(body.to_html).to match('2<sup>nd</sup>')
end
end
end
describe 'HTML::Pipeline' do
describe 'SanitizationFilter' do
it 'uses a permissive whitelist' do
expect(@doc).to have_selector('b#manual-b')
expect(@doc).to have_selector('em#manual-em')
expect(@doc).to have_selector("code#manual-code")
expect(@doc).to have_selector('kbd:contains("s")')
expect(@doc).to have_selector('strike:contains(Emoji)')
expect(@doc).to have_selector('img#manual-img')
expect(@doc).to have_selector('br#manual-br')
expect(@doc).to have_selector('hr#manual-hr')
end
it 'permits span elements' do
expect(@doc).to have_selector('span#span-class-light.light')
end
it 'permits table alignment' do
expect(@doc.at_css('th:contains("Header")')['style']).to eq 'text-align: center'
expect(@doc.at_css('th:contains("Row")')['style']).to eq 'text-align: right'
expect(@doc.at_css('th:contains("Example")')['style']).to eq 'text-align: left'
expect(@doc.at_css('td:contains("Foo")')['style']).to eq 'text-align: center'
expect(@doc.at_css('td:contains("Bar")')['style']).to eq 'text-align: right'
expect(@doc.at_css('td:contains("Baz")')['style']).to eq 'text-align: left'
end
it 'removes `rel` attribute from links' do
expect(@doc).to have_selector('a#a-rel-nofollow')
expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]')
end
it "removes `href` from `a` elements if it's fishy" do
expect(@doc).to have_selector('a#a-href-javascript')
expect(@doc).not_to have_selector('a#a-href-javascript[href]')
end
end
describe 'Escaping' do
let(:table) { @doc.css('table').last.at_css('tbody') }
it 'escapes non-tag angle brackets' do
expect(table.at_xpath('.//tr[1]/td[3]').inner_html).to eq '1 &lt; 3 &amp; 5'
end
end
describe 'EmojiFilter' do
it 'parses Emoji' do
expect(@doc).to have_selector('img.emoji', count: 10)
end
end
describe 'TableOfContentsFilter' do
it 'creates anchors inside header elements' do
expect(@doc).to have_selector('h1 a#gitlab-markdown')
expect(@doc).to have_selector('h2 a#markdown')
expect(@doc).to have_selector('h3 a#autolinkfilter')
end
end
describe 'AutolinkFilter' do
let(:list) { get_section('autolinkfilter').parent.search('ul') }
def item(index)
list.at_css("li:nth-child(#{index})")
end
it 'autolinks http://' do
expect(item(1).children.first.name).to eq 'a'
expect(item(1).children.first['href']).to eq 'http://about.gitlab.com/'
end
it 'autolinks https://' do
expect(item(2).children.first.name).to eq 'a'
expect(item(2).children.first['href']).to eq 'https://google.com/'
end
it 'autolinks ftp://' do
expect(item(3).children.first.name).to eq 'a'
expect(item(3).children.first['href']).to eq 'ftp://ftp.us.debian.org/debian/'
end
it 'autolinks smb://' do
expect(item(4).children.first.name).to eq 'a'
expect(item(4).children.first['href']).to eq 'smb://foo/bar/baz'
end
it 'autolinks irc://' do
expect(item(5).children.first.name).to eq 'a'
expect(item(5).children.first['href']).to eq 'irc://irc.freenode.net/git'
end
# TODO (rspeicher): Do we really want this?
it 'autolinks short, invalid URLs' do
skip 'rspeicher: Pending decision'
expect(item(6).children.first.name).to eq 'a'
expect(item(6).children.first['href']).to eq 'http://foo'
end
%w(code a kbd).each do |elem|
it "ignores links inside '#{elem}' element" do
expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text
end
end
end
describe 'ReferenceFilter' do
it 'handles references in headers' do
header = @doc.at_css('#reference-filters-eg-1').parent
expect(header.css('a').size).to eq 2
end
it "handles references in Markdown" do
body = get_section('reference-filters-eg-1')
expect(body).to have_selector('em a.gfm-merge_request', count: 1)
end
it 'parses user references' do
body = get_section('userreferencefilter')
expect(body).to have_selector('a.gfm.gfm-project_member', count: 3)
end
it 'parses issue references' do
body = get_section('issuereferencefilter')
expect(body).to have_selector('a.gfm.gfm-issue', count: 2)
end
it 'parses merge request references' do
body = get_section('mergerequestreferencefilter')
expect(body).to have_selector('a.gfm.gfm-merge_request', count: 2)
end
it 'parses snippet references' do
body = get_section('snippetreferencefilter')
expect(body).to have_selector('a.gfm.gfm-snippet', count: 2)
end
it 'parses commit range references' do
body = get_section('commitrangereferencefilter')
expect(body).to have_selector('a.gfm.gfm-commit_range', count: 2)
end
it 'parses commit references' do
body = get_section('commitreferencefilter')
expect(body).to have_selector('a.gfm.gfm-commit', count: 2)
end
it 'parses label references' do
body = get_section('labelreferencefilter')
expect(body).to have_selector('a.gfm.gfm-label', count: 3)
end
end
end
end
# This is a helper class used by the GitLab Markdown feature spec
#
# Because the feature spec only cares about the output of the Markdown, and the
# test setup and teardown and parsing is fairly expensive, we only want to do it
# once. Unfortunately RSpec will not let you access `let`s in a `before(:all)`
# block, so we fake it by encapsulating all the shared setup in this class.
#
# The class contains the raw Markup used in the test, dynamically substituting
# real objects, created from factories and setup on-demand, when referenced in
# the Markdown.
class MarkdownFeature
include FactoryGirl::Syntax::Methods
def initialize
DatabaseCleaner.start
end
def teardown
DatabaseCleaner.clean
end
def user
@user ||= create(:user)
end
def group
unless @group
@group = create(:group)
@group.add_user(user, Gitlab::Access::DEVELOPER)
end
@group
end
# Direct references ----------------------------------------------------------
def project
@project ||= create(:project)
end
def issue
@issue ||= create(:issue, project: project)
end
def merge_request
@merge_request ||= create(:merge_request, :simple, source_project: project)
end
def snippet
@snippet ||= create(:project_snippet, project: project)
end
def commit
@commit ||= project.repository.commit
end
def commit_range
unless @commit_range
commit2 = project.repository.commit('HEAD~3')
@commit_range = CommitRange.new("#{commit.id}...#{commit2.id}")
end
@commit_range
end
def simple_label
@simple_label ||= create(:label, name: 'gfm', project: project)
end
def label
@label ||= create(:label, name: 'awaiting feedback', project: project)
end
# Cross-references -----------------------------------------------------------
def xproject
unless @xproject
namespace = create(:namespace, name: 'cross-reference')
@xproject = create(:project, namespace: namespace)
@xproject.team << [user, :developer]
end
@xproject
end
# Shortcut to "cross-reference/project"
def xref
xproject.path_with_namespace
end
def xissue
@xissue ||= create(:issue, project: xproject)
end
def xmerge_request
@xmerge_request ||= create(:merge_request, :simple, source_project: xproject)
end
def xsnippet
@xsnippet ||= create(:project_snippet, project: xproject)
end
def xcommit
@xcommit ||= xproject.repository.commit
end
def xcommit_range
unless @xcommit_range
xcommit2 = xproject.repository.commit('HEAD~2')
@xcommit_range = CommitRange.new("#{xcommit.id}...#{xcommit2.id}")
end
@xcommit_range
end
def raw_markdown
fixture = Rails.root.join('spec/fixtures/markdown.md.erb')
ERB.new(File.read(fixture)).result(binding)
end
end
# GitLab Markdown
This document is intended to be a comprehensive example of custom GitLab
Markdown usage. It will be parsed and then tested for accuracy. Let's get
started.
## Markdown
GitLab uses [Redcarpet](http://git.io/ld_NVQ) to parse all Markdown into
HTML.
It has some special features. Let's try 'em out!
### No Intra Emphasis
This string should have no emphasis: foo_bar_baz
### Tables
| Header | Row | Example |
| :------: | ---: | :------ |
| Foo | Bar | **Baz** |
### Fenced Code Blocks
```c
#include<stdio.h>
main()
{
printf("Hello World");
}
```
```python
print "Hello, World!"
```
### Strikethrough
This text says this, ~~and this text doesn't~~.
### Superscript
This is my 1^(st) time using superscript in Markdown. Now this is my
2^(nd).
### Next step
After the Markdown has been turned into HTML, it gets passed through...
## HTML::Pipeline
### SanitizationFilter
GitLab uses <a href="http://git.io/vfW8a" class="sanitize" id="sanitize-link">HTML::Pipeline::SanitizationFilter</a>
to sanitize the generated HTML, stripping dangerous or unwanted tags.
Its default whitelist is pretty permissive. Check it:
<b id="manual-b">This text is bold</b> and <em id="manual-em">this text is emphasized</em>.
<code id="manual-code">echo "Hello, world!"</code>
Press <kbd>s</kbd> to search.
<strike>Emoji</strike> Plain old images! <img
src="http://www.emoji-cheat-sheet.com/graphics/emojis/smile.png" width="20"
height="20" id="manual-img" />
Here comes a line break:
<br id="manual-br" />
And a horizontal rule:
<hr id="manual-hr" />
As permissive as it is, we've allowed even more stuff:
<span class="light" id="span-class-light">Span elements</span>
<a href="#" rel="nofollow" id="a-rel-nofollow">This is a link with a defined rel attribute, which should be removed</a>
<a href="javascript:alert('Hi')" id="a-href-javascript">This is a link trying to be sneaky. It gets its link removed entirely.</a>
### Escaping
The problem with SanitizationFilter is that it can be too aggressive.
| Input | Expected | Actual |
| ----------- | ---------------- | --------- |
| `1 < 3 & 5` | 1 &lt; 3 &amp; 5 | 1 < 3 & 5 |
| `<foo>` | &lt;foo&gt; | <foo> |
### EmojiFilter
Because life would be :zzz: without Emoji, right? :rocket:
Get ready for the Emoji :bomb:: :+1::-1::ok_hand::wave::v::raised_hand::muscle:
### TableOfContentsFilter
All headers in this document should be linkable. Try it.
### AutolinkFilter
These are all plain text that should get turned into links:
- http://about.gitlab.com/
- https://google.com/
- ftp://ftp.us.debian.org/debian/
- smb://foo/bar/baz
- irc://irc.freenode.net/git
- http://localhost:3000
But it shouldn't autolink text inside certain tags:
- <code id="autolink-code">http://about.gitlab.com/</code>
- <a id="autolink-a">http://about.gitlab.com/</a>
- <kbd id="autolink-kbd">http://about.gitlab.com/</kbd>
### Reference Filters (e.g., #<%= issue.iid %>)
References should be parseable even inside _!<%= merge_request.iid %>_ emphasis.
#### UserReferenceFilter
- All: @all
- User: @<%= user.username %>
- Group: @<%= group.name %>
- Ignores invalid: @fake_user
- Ignored in code: `@<%= user.username %>`
- Ignored in links: [Link to @<%= user.username %>](#user-link)
#### IssueReferenceFilter
- Issue: #<%= issue.iid %>
- Issue in another project: <%= xref %>#<%= xissue.iid %>
- Ignores HTML entities: TODO:&#39;
- Ignored in code: `#<%= issue.iid %>`
- Ignored in links: [Link to #<%= issue.iid %>](#issue-link)
#### MergeRequestReferenceFilter
- Merge request: !<%= merge_request.iid %>
- Merge request in another project: <%= xref %>!<%= xmerge_request.iid %>
- Ignored in code: `!<%= merge_request.iid %>`
- Ignored in links: [Link to !<%= merge_request.iid %>](#merge-request-link)
#### SnippetReferenceFilter
- Snippet: $<%= snippet.id %>
- Snippet in another project: <%= xref %>$<%= xsnippet.id %>
- Ignored in code: `$<%= snippet.id %>`
- Ignored in links: [Link to $<%= snippet.id %>](#snippet-link)
#### CommitRangeReferenceFilter
- Range: <%= commit_range %>
- Range in another project: <%= xref %>@<%= xcommit_range %>
- Ignored in code: `<%= commit_range %>`
- Ignored in links: [Link to <%= commit_range %>](#commit-range-link)
#### CommitReferenceFilter
- Commit: <%= commit.id %>
- Commit in another project: <%= xref %>@<%= xcommit.id %>
- Ignored in code: `<%= commit.id %>`
- Ignored in links: [Link to <%= commit.id %>](#commit-link)
#### LabelReferenceFilter
- Label by ID: ~<%= simple_label.id %>
- Label by name: ~<%= simple_label.name %>
- Label by name in quotes: ~"<%= label.name %>"
- Ignored in code: `~<%= simple_label.name %>`
- Ignored in links: [Link to ~<%= simple_label.id %>](#label-link)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment