Commit a98b89e9 authored by Jan Provaznik's avatar Jan Provaznik

Re-escape whole HTML content instead of only match

When we un-escape HTML text to find references in it, we should then
re-escape the whole text again, not only found matches.

Because we replace matches with milestone/label links (which contain
HTML tags we don't want to escape again), we re-escape HTML text
with placeholders instead of these links and then replace placeholders
in the escaped text.
parent 842b4d4a
---
title: Make sure HTML text is always escaped when replacing label/milestone references.
merge_request:
author:
type: security
......@@ -7,6 +7,14 @@ module Banzai
class AbstractReferenceFilter < ReferenceFilter
include CrossProjectReference
# REFERENCE_PLACEHOLDER is used for re-escaping HTML text except found
# reference (which we replace with placeholder during re-scaping). The
# random number helps ensure it's pretty close to unique. Since it's a
# transitory value (it never gets saved) we can initialize once, and it
# doesn't matter if it changes on a restart.
REFERENCE_PLACEHOLDER = "_reference_#{SecureRandom.hex(16)}_"
REFERENCE_PLACEHOLDER_PATTERN = %r{#{REFERENCE_PLACEHOLDER}(\d+)}.freeze
def self.object_class
# Implement in child class
# Example: MergeRequest
......@@ -389,6 +397,14 @@ module Banzai
def escape_html_entities(text)
CGI.escapeHTML(text.to_s)
end
def escape_with_placeholders(text, placeholder_data)
escaped = escape_html_entities(text)
escaped.gsub(REFERENCE_PLACEHOLDER_PATTERN) do |match|
placeholder_data[$1.to_i]
end
end
end
end
end
......@@ -14,24 +14,24 @@ module Banzai
find_labels(parent_object).find(id)
end
def self.references_in(text, pattern = Label.reference_pattern)
unescape_html_entities(text).gsub(pattern) do |match|
yield match, $~[:label_id].to_i, $~[:label_name], $~[:project], $~[:namespace], $~
end
end
def references_in(text, pattern = Label.reference_pattern)
unescape_html_entities(text).gsub(pattern) do |match|
labels = {}
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
namespace, project = $~[:namespace], $~[:project]
project_path = full_project_path(namespace, project)
label = find_label(project_path, $~[:label_id], $~[:label_name])
if label
yield match, label.id, project, namespace, $~
labels[label.id] = yield match, label.id, project, namespace, $~
"#{REFERENCE_PLACEHOLDER}#{label.id}"
else
escape_html_entities(match)
match
end
end
return text if labels.empty?
escape_with_placeholders(unescaped_html, labels)
end
def find_label(parent_ref, label_id, label_name)
......
......@@ -51,15 +51,21 @@ module Banzai
# default implementation.
return super(text, pattern) if pattern != Milestone.reference_pattern
unescape_html_entities(text).gsub(pattern) do |match|
milestones = {}
unescaped_html = unescape_html_entities(text).gsub(pattern) do |match|
milestone = find_milestone($~[:project], $~[:namespace], $~[:milestone_iid], $~[:milestone_name])
if milestone
yield match, milestone.id, $~[:project], $~[:namespace], $~
milestones[milestone.id] = yield match, milestone.id, $~[:project], $~[:namespace], $~
"#{REFERENCE_PLACEHOLDER}#{milestone.id}"
else
escape_html_entities(match)
match
end
end
return text if milestones.empty?
escape_with_placeholders(unescaped_html, milestones)
end
def find_milestone(project_ref, namespace_ref, milestone_id, milestone_name)
......
......@@ -3,8 +3,8 @@
module Gitlab
module MarkdownCache
# Increment this number every time the renderer changes its output
CACHE_COMMONMARK_VERSION = 17
CACHE_COMMONMARK_VERSION_START = 10
CACHE_COMMONMARK_VERSION = 16
BaseError = Class.new(StandardError)
UnsupportedClassError = Class.new(BaseError)
......
......@@ -10,6 +10,11 @@ describe Banzai::Filter::LabelReferenceFilter do
let(:label) { create(:label, project: project) }
let(:reference) { label.to_reference }
it_behaves_like 'HTML text with references' do
let(:resource) { label }
let(:resource_text) { resource.title }
end
it 'requires project context' do
expect { described_class.call('') }.to raise_error(ArgumentError, /:project/)
end
......
......@@ -329,6 +329,10 @@ describe Banzai::Filter::MilestoneReferenceFilter do
it_behaves_like 'cross-project / same-namespace complete reference'
it_behaves_like 'cross project shorthand reference'
it_behaves_like 'references with HTML entities'
it_behaves_like 'HTML text with references' do
let(:resource) { milestone }
let(:resource_text) { "#{resource.class.reference_prefix}#{resource.title}" }
end
end
shared_context 'group milestones' do
......@@ -340,6 +344,10 @@ describe Banzai::Filter::MilestoneReferenceFilter do
it_behaves_like 'String-based multi-word references in quotes'
it_behaves_like 'referencing a milestone in a link href'
it_behaves_like 'references with HTML entities'
it_behaves_like 'HTML text with references' do
let(:resource) { milestone }
let(:resource_text) { "#{resource.class.reference_prefix}#{resource.title}" }
end
it 'does not support references by IID' do
doc = reference_filter("See #{Milestone.reference_prefix}#{milestone.iid}")
......
# frozen_string_literal: true
RSpec.shared_examples 'HTML text with references' do
let(:markdown_prepend) { "&lt;img src=\"\" onerror=alert(`bug`)&gt;" }
it 'preserves escaped HTML text and adds valid references' do
reference = resource.to_reference(format: :name)
doc = reference_filter("#{markdown_prepend}#{reference}")
expect(doc.to_html).to start_with(markdown_prepend)
expect(doc.text).to eq %(<img src="" onerror=alert(`bug`)>#{resource_text})
end
it 'preserves escaped HTML text if there are no valid references' do
reference = "#{resource.class.reference_prefix}invalid"
text = "#{markdown_prepend}#{reference}"
doc = reference_filter(text)
expect(doc.to_html).to eq text
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment