Commit 786cc880 authored by Alex Kalderimis's avatar Alex Kalderimis

Add front-matter support to wiki pages

This adds wiki page front-matter parsing library code, behind a
feature flag. The parser abstracts front-matter recognition and
extraction and explanation of failure.

The WikiPage model is provided with a new attribute `front_matter`,
which is derived from the content. This attribute can be updated and
serialized to the wiki-page repository.

The feature flag is gated on a project-by-project basis.

Front-matter parsing is unified across concerns, by
re-using our existing front-matter parsing Regexp in the new
front-matter parser, to ensure consistency.

A new module, `Gitlab::WikiPages` is used by model and library code.

`parse_string` is made a separate method so that different languages can
be supported without modifying other code.
parent 02ed457d
......@@ -2,17 +2,19 @@
# rubocop:disable Rails/ActiveRecordAliases
class WikiPage
include Gitlab::Utils::StrongMemoize
PageChangedError = Class.new(StandardError)
PageRenameError = Class.new(StandardError)
MAX_TITLE_BYTES = 245
MAX_DIRECTORY_BYTES = 255
FrontMatterTooLong = Class.new(StandardError)
include ActiveModel::Validations
include ActiveModel::Conversion
include StaticModel
extend ActiveModel::Naming
delegate :content, :front_matter, to: :parsed_content
def self.primary_key
'slug'
end
......@@ -114,8 +116,7 @@ class WikiPage
@attributes[:title] = new_title
end
# The raw content of this page.
def content
def raw_content
@attributes[:content] ||= @page&.text_data
end
......@@ -238,7 +239,7 @@ class WikiPage
save do
wiki.update_page(
@page,
content: content,
content: raw_content,
format: format,
message: attrs[:message],
title: title
......@@ -281,8 +282,10 @@ class WikiPage
# Updates the current @attributes hash by merging a hash of params
def update_attributes(attrs)
attrs[:title] = process_title(attrs[:title]) if attrs[:title].present?
update_front_matter(attrs)
attrs.slice!(:content, :format, :message, :title)
clear_memoization(:parsed_content) if attrs.has_key?(:content)
@attributes.merge!(attrs)
end
......@@ -293,6 +296,28 @@ class WikiPage
private
def serialize_front_matter(hash)
return '' unless hash.present?
YAML.dump(hash.transform_keys(&:to_s)) + "---\n"
end
def update_front_matter(attrs)
return unless Gitlab::WikiPages::FrontMatterParser.enabled?(project)
return unless attrs.has_key?(:front_matter)
fm_yaml = serialize_front_matter(attrs[:front_matter])
raise FrontMatterTooLong if fm_yaml.size > Gitlab::WikiPages::FrontMatterParser::MAX_FRONT_MATTER_LENGTH
attrs[:content] = fm_yaml + (attrs[:content].presence || content)
end
def parsed_content
strong_memoize(:parsed_content) do
Gitlab::WikiPages::FrontMatterParser.new(raw_content, project).parse
end
end
# Process and format the title based on the user input.
def process_title(title)
return if title.blank?
......@@ -339,14 +364,16 @@ class WikiPage
def validate_path_limits
*dirnames, title = @attributes[:title].split('/')
if title && title.bytesize > MAX_TITLE_BYTES
errors.add(:title, _("exceeds the limit of %{bytes} bytes") % { bytes: MAX_TITLE_BYTES })
if title && title.bytesize > Gitlab::WikiPages::MAX_TITLE_BYTES
errors.add(:title, _("exceeds the limit of %{bytes} bytes") % {
bytes: Gitlab::WikiPages::MAX_TITLE_BYTES
})
end
invalid_dirnames = dirnames.select { |d| d.bytesize > MAX_DIRECTORY_BYTES }
invalid_dirnames = dirnames.select { |d| d.bytesize > Gitlab::WikiPages::MAX_DIRECTORY_BYTES }
invalid_dirnames.each do |dirname|
errors.add(:title, _('exceeds the limit of %{bytes} bytes for directory name "%{dirname}"') % {
bytes: MAX_DIRECTORY_BYTES,
bytes: Gitlab::WikiPages::MAX_DIRECTORY_BYTES,
dirname: dirname
})
end
......
......@@ -3,28 +3,11 @@
module Banzai
module Filter
class FrontMatterFilter < HTML::Pipeline::Filter
DELIM_LANG = {
'---' => 'yaml',
'+++' => 'toml',
';;;' => 'json'
}.freeze
DELIM = Regexp.union(DELIM_LANG.keys)
PATTERN = %r{
\A(?:[^\r\n]*coding:[^\r\n]*)? # optional encoding line
\s*
^(?<delim>#{DELIM})[ \t]*(?<lang>\S*) # opening front matter marker (optional language specifier)
\s*
^(?<front_matter>.*?) # front matter (not greedy)
\s*
^\k<delim> # closing front matter marker
\s*
}mx.freeze
def call
html.sub(PATTERN) do |_match|
lang = $~[:lang].presence || DELIM_LANG[$~[:delim]]
lang_mapping = Gitlab::FrontMatter::DELIM_LANG
html.sub(Gitlab::FrontMatter::PATTERN) do |_match|
lang = $~[:lang].presence || lang_mapping[$~[:delim]]
["```#{lang}", $~[:front_matter], "```", "\n"].join("\n")
end
......
# frozen_string_literal: true
module Gitlab
module FrontMatter
DELIM_LANG = {
'---' => 'yaml',
'+++' => 'toml',
';;;' => 'json'
}.freeze
DELIM = Regexp.union(DELIM_LANG.keys)
PATTERN = %r{
\A(?:[^\r\n]*coding:[^\r\n]*)? # optional encoding line
\s*
^(?<delim>#{DELIM})[ \t]*(?<lang>\S*) # opening front matter marker (optional language specifier)
\s*
^(?<front_matter>.*?) # front matter (not greedy)
\s*
^(\k<delim> | \.{3}) # closing front matter marker
\s*
}mx.freeze
end
end
# frozen_string_literal: true
module Gitlab
module WikiPages
# Many common file systems have a limit of 255 bytes for file and
# directory names, and while Git and GitLab both support paths exceeding
# those limits, the presence of them makes it impossible for users on
# those file systems to checkout a wiki repository locally.
# To avoid this situation, we enforce these limits when editing pages
# through the GitLab web interface and API:
MAX_TITLE_BYTES = 245 # reserving 10 bytes for the file extension
MAX_DIRECTORY_BYTES = 255
end
end
# frozen_string_literal: true
module Gitlab
module WikiPages
class FrontMatterParser
# A ParseResult contains the de-serialized front-matter, the stripped
# content, and maybe an error, explaining why there is no front-matter.
ParseResult = Struct.new(:front_matter, :content, :reason, :error, keyword_init: true)
class NoFrontMatter < StandardError
attr_reader :reason
def initialize(reason)
super
@reason = reason
end
end
FEATURE_FLAG = :wiki_front_matter
# We limit the maximum length of text we are prepared to parse as YAML, to
# avoid exploitations and attempts to consume memory and CPU. We allow for:
# - a title line
# - a "slugs:" line
# - and up to 50 slugs
#
# This limit does not take comments into account.
MAX_SLUGS = 50
SLUG_LINE_LENGTH = (4 + Gitlab::WikiPages::MAX_DIRECTORY_BYTES + 1 + Gitlab::WikiPages::MAX_TITLE_BYTES)
MAX_FRONT_MATTER_LENGTH = (8 + Gitlab::WikiPages::MAX_TITLE_BYTES) + 7 + (SLUG_LINE_LENGTH * MAX_SLUGS)
# @param [String] wiki_content
# @param [FeatureGate] feature_gate The scope for feature availability
# (usually a project)
def initialize(wiki_content, feature_gate)
@wiki_content = wiki_content
@feature_gate = feature_gate
end
def self.enabled?(gate = nil)
Feature.enabled?(FEATURE_FLAG, gate)
end
def parse
ParseResult.new(front_matter: extract_front_matter, content: strip_front_matter)
rescue NoFrontMatter => e
ParseResult.new(front_matter: {}, content: wiki_content, reason: e.reason, error: e.cause)
end
private
attr_reader :wiki_content, :feature_gate
def extract_front_matter
ensure_enabled!
front_matter, lang = extract
front_matter = parse_string(front_matter, lang)
validate(front_matter)
front_matter
end
def parse_string(source, lang)
raise NoFrontMatter, :not_yaml unless lang == 'yaml'
YAML.safe_load(source, symbolize_names: true)
rescue Psych::DisallowedClass, Psych::SyntaxError
raise NoFrontMatter, :parse_error
end
def validate(parsed)
raise NoFrontMatter, :not_mapping unless Hash === parsed
end
def extract
raise NoFrontMatter, :no_content unless wiki_content.present?
match = Gitlab::FrontMatter::PATTERN.match(wiki_content) if wiki_content.present?
raise NoFrontMatter, :no_pattern_match unless match
raise NoFrontMatter, :too_long if match[:front_matter].size > MAX_FRONT_MATTER_LENGTH
lang = match[:lang].downcase.presence || Gitlab::FrontMatter::DELIM_LANG[match[:delim]]
[match[:front_matter], lang]
end
def ensure_enabled!
raise NoFrontMatter, :feature_flag_disabled unless self.class.enabled?(feature_gate)
end
def strip_front_matter
wiki_content.gsub(Gitlab::FrontMatter::PATTERN, '')
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::WikiPages::FrontMatterParser do
subject(:parser) { described_class.new(raw_content, gate) }
let(:content) { 'This is the content' }
let(:end_divider) { '---' }
let(:gate) { double('Gate') }
let(:with_front_matter) do
<<~MD
---
a: 1
b: 2
c:
- foo
- bar
date: I am safe. Not actually a date
#{end_divider}
#{content}
MD
end
def have_correct_front_matter
include(a: 1, b: 2, c: %w(foo bar))
end
describe '#parse' do
subject { parser.parse }
context 'there is front matter' do
let(:raw_content) { with_front_matter }
it do
is_expected.to have_attributes(
front_matter: have_correct_front_matter,
content: content + "\n",
error: be_nil
)
end
end
context 'there is no content' do
let(:raw_content) { '' }
it { is_expected.to have_attributes(reason: :no_content) }
end
context 'there is no front_matter' do
let(:raw_content) { content }
it { is_expected.to have_attributes(front_matter: be_empty, content: raw_content) }
it { is_expected.to have_attributes(reason: :no_pattern_match) }
end
context 'the feature flag is disabled' do
let(:raw_content) { with_front_matter }
before do
stub_feature_flags(Gitlab::WikiPages::FrontMatterParser::FEATURE_FLAG => false)
end
it { is_expected.to have_attributes(front_matter: be_empty, content: raw_content) }
end
context 'the feature flag is enabled for the gated object' do
let(:raw_content) { with_front_matter }
before do
stub_feature_flags(Gitlab::WikiPages::FrontMatterParser::FEATURE_FLAG => false)
stub_feature_flags(Gitlab::WikiPages::FrontMatterParser::FEATURE_FLAG => {
enabled: true,
thing: gate
})
end
it do
is_expected.to have_attributes(
front_matter: have_correct_front_matter,
content: content + "\n",
reason: be_nil
)
end
end
context 'the end divider is ...' do
let(:end_divider) { '...' }
let(:raw_content) { with_front_matter }
it { is_expected.to have_attributes(front_matter: have_correct_front_matter) }
end
context 'the front-matter is not a mapping' do
let(:raw_content) do
<<~MD
---
- thing one
- thing two
---
#{content}
MD
end
it { is_expected.to have_attributes(reason: :not_mapping) }
end
context 'there is nothing in the front-matter block' do
let(:raw_content) do
<<~MD
---
---
My content here
MD
end
it { is_expected.to have_attributes(reason: :not_mapping) }
end
context 'there is a string in the YAML block' do
let(:raw_content) do
<<~MD
---
This is a string
---
#{content}
MD
end
it { is_expected.to have_attributes(reason: :not_mapping) }
end
context 'there is dangerous YAML in the block' do
let(:raw_content) do
<<~MD
---
date: 2010-02-11 11:02:57
---
#{content}
MD
end
it { is_expected.to have_attributes(reason: :parse_error, error: be_present) }
end
context 'there is acceptably long YAML in the front-matter block' do
let(:raw_content) do
key = 'title: '
length = described_class::MAX_FRONT_MATTER_LENGTH - key.size
<<~MD
---
title: #{FFaker::Lorem.characters(length)}
---
#{content}
MD
end
it { is_expected.to have_attributes(front_matter: include(title: be_present)) }
end
context 'there is suspiciously long YAML in the front-matter block' do
let(:raw_content) do
<<~MD
---
title: #{FFaker::Lorem.characters(described_class::MAX_FRONT_MATTER_LENGTH)}
---
#{content}
MD
end
it { is_expected.to have_attributes(reason: :too_long) }
end
context 'TOML front matter' do
let(:raw_content) do
<<~MD
+++
title = "My title"
+++
#{content}
MD
end
it { is_expected.to have_attributes(reason: :not_yaml) }
end
context 'TOML style fences, advertised as YAML' do
let(:raw_content) do
<<~MD
+++ yaml
title: "My title"
+++
#{content}
MD
end
it { is_expected.to have_attributes(front_matter: include(title: 'My title')) }
end
context 'YAML, advertised as something else' do
let(:raw_content) do
<<~MD
--- toml
title: My title
---
#{content}
MD
end
it { is_expected.to have_attributes(reason: :not_yaml) }
end
context 'there is text content in the YAML block, in comments' do
let(:raw_content) do
<<~MD
---
# This is YAML
#
# It has comments though. Explaining things
foo: 1
## It has headings
headings:
- heading one
- heading two
# And lists
lists:
- and lists
- with things in them
---
#{content}
MD
end
it { is_expected.to have_attributes(front_matter: include(foo: 1)) }
end
context 'there is text content in the YAML block' do
let(:raw_content) do
<<~MD
---
# This is not YAML
In fact is looks like markdown
## It has headings
Paragraphs
- and lists
- with things in them
---
#{content}
MD
end
it { is_expected.to have_attributes(reason: :not_mapping) }
end
end
describe '#strip_front_matter' do
let(:raw_content) { with_front_matter }
it 'removes the front-matter from the content' do
expect(subject.send(:strip_front_matter)).to eq(content + "\n")
end
end
end
......@@ -20,6 +20,17 @@ describe WikiPage do
subject { new_page }
def disable_front_matter
stub_feature_flags(Gitlab::WikiPages::FrontMatterParser::FEATURE_FLAG => false)
end
def enable_front_matter_for_project
stub_feature_flags(Gitlab::WikiPages::FrontMatterParser::FEATURE_FLAG => {
thing: project,
enabled: true
})
end
describe '.group_by_directory' do
context 'when there are no pages' do
it 'returns an empty array' do
......@@ -101,6 +112,119 @@ describe WikiPage do
end
end
describe '#front_matter' do
let_it_be(:project) { create(:project) }
let(:wiki_page) { create(:wiki_page, project: project, content: content) }
shared_examples 'a page without front-matter' do
it { expect(wiki_page).to have_attributes(front_matter: {}, content: content) }
end
shared_examples 'a page with front-matter' do
let(:front_matter) { { title: 'Foo', slugs: %w[slug_a slug_b] } }
it { expect(wiki_page.front_matter).to eq(front_matter) }
end
context 'the wiki page has front matter' do
let(:content) do
<<~MD
---
title: Foo
slugs:
- slug_a
- slug_b
---
My actual content
MD
end
it_behaves_like 'a page with front-matter'
it 'strips the front matter from the content' do
expect(wiki_page.content.strip).to eq('My actual content')
end
context 'the feature flag is off' do
before do
disable_front_matter
end
it_behaves_like 'a page without front-matter'
context 'but enabled for the project' do
before do
enable_front_matter_for_project
end
it_behaves_like 'a page with front-matter'
end
end
end
context 'the wiki page does not have front matter' do
let(:content) { 'My actual content' }
it_behaves_like 'a page without front-matter'
end
context 'the wiki page has fenced blocks, but nothing in them' do
let(:content) do
<<~MD
---
---
My actual content
MD
end
it_behaves_like 'a page without front-matter'
end
context 'the wiki page has invalid YAML type in fenced blocks' do
let(:content) do
<<~MD
---
this isn't YAML
---
My actual content
MD
end
it_behaves_like 'a page without front-matter'
end
context 'the wiki page has a disallowed class in fenced block' do
let(:content) do
<<~MD
---
date: 2010-02-11 11:02:57
---
My actual content
MD
end
it_behaves_like 'a page without front-matter'
end
context 'the wiki page has invalid YAML in fenced block' do
let(:content) do
<<~MD
---
invalid-use-of-reserved-indicator: @text
---
My actual content
MD
end
it_behaves_like 'a page without front-matter'
end
end
describe '.unhyphenize' do
it 'removes hyphens from a name' do
name = 'a-name--with-hyphens'
......@@ -155,8 +279,8 @@ describe WikiPage do
end
describe '#validate_path_limits' do
let(:max_title) { described_class::MAX_TITLE_BYTES }
let(:max_directory) { described_class::MAX_DIRECTORY_BYTES }
let(:max_title) { Gitlab::WikiPages::MAX_TITLE_BYTES }
let(:max_directory) { Gitlab::WikiPages::MAX_DIRECTORY_BYTES }
where(:character) do
['a', 'ä', '🙈']
......@@ -296,7 +420,7 @@ describe WikiPage do
subject.update(content: "new content")
page = wiki.find_page(title)
expect(page.content).to eq('new content')
expect([subject.content, page.content]).to all(eq('new content'))
end
it "returns true" do
......@@ -333,7 +457,7 @@ describe WikiPage do
subject.update(content: new_content)
page = wiki.find_page('test page')
expect(page.content).to eq("new content")
expect([subject.content, page.content]).to all(eq("new content"))
end
it "updates the title of the page" do
......@@ -342,7 +466,75 @@ describe WikiPage do
subject.update(title: new_title)
page = wiki.find_page(new_title)
expect(page.title).to eq(new_title)
expect([subject.title, page.title]).to all(eq(new_title))
end
describe 'updating front_matter' do
shared_examples 'able to update front-matter' do
it 'updates the wiki-page front-matter' do
title = subject.title
content = subject.content
subject.update(front_matter: { slugs: ['x'] })
page = wiki.find_page(title)
expect([subject, page]).to all(
have_attributes(
front_matter: include(slugs: include('x')),
content: content
))
end
end
it_behaves_like 'able to update front-matter'
context 'the front matter is too long' do
let(:new_front_matter) do
{
title: generate(:wiki_page_title),
slugs: Array.new(51).map { FFaker::Lorem.characters(512) }
}
end
it 'raises an error' do
expect { subject.update(front_matter: new_front_matter) }.to raise_error(described_class::FrontMatterTooLong)
end
end
context 'the front-matter feature flag is not enabled' do
before do
disable_front_matter
end
it 'does not update the front-matter' do
content = subject.content
subject.update(front_matter: { slugs: ['x'] })
page = wiki.find_page(subject.title)
expect([subject, page]).to all(have_attributes(front_matter: be_empty, content: content))
end
context 'but it is enabled for the project' do
before do
enable_front_matter_for_project
end
it_behaves_like 'able to update front-matter'
end
end
it 'updates the wiki-page front-matter and content together' do
title = subject.title
content = 'totally new content'
subject.update(content: content, front_matter: { slugs: ['x'] })
page = wiki.find_page(title)
expect([subject, page]).to all(
have_attributes(
front_matter: include(slugs: include('x')),
content: content
))
end
end
it "returns true" do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment