Merge branch 'security-2769-idn-homograph-attack' into 'master'

[master] GitLab vulnerable to IDN homograph attacks and RTLO attacks See merge request gitlab/gitlabhq!2770

Merge branch 'security-2769-idn-homograph-attack' into 'master'
[master] GitLab vulnerable to IDN homograph attacks and RTLO attacks See merge request gitlab/gitlabhq!2770
08cc2ab8 · Yorick Peterse · e055bbca · ac2d8af5 · 08cc2ab8 · 08cc2ab8
Commit 08cc2ab8 authored Jan 24, 2019 by Yorick Peterse
9 changed files
--- a/app/models/concerns/cache_markdown_field.rb
+++ b/app/models/concerns/cache_markdown_field.rb
@@ -15,7 +15,7 @@ module CacheMarkdownField
  # Increment this number every time the renderer changes its output
  CACHE_REDCARPET_VERSION         = 3
  CACHE_COMMONMARK_VERSION_START  = 10
-  CACHE_COMMONMARK_VERSION        = 13
+  CACHE_COMMONMARK_VERSION        = 14

  # changes to these attributes cause the cache to be invalidates
  INVALIDATED_BY = %w[author project].freeze

--- a/changelogs/unreleased/security-2769-idn-homograph-attack.yml
+++ b/changelogs/unreleased/security-2769-idn-homograph-attack.yml
+---
+title: Make potentially malicious links more visible in the UI and scrub RTLO chars from links
+merge_request: 2770
+author:
+type: security
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -8,6 +8,10 @@ module Banzai
    #
    # Based on HTML::Pipeline::AutolinkFilter
    #
+    # Note that our CommonMark parser, `commonmarker` (using the autolink extension)
+    # handles standard autolinking, like http/https. We detect additional
+    # schemes (smb, rdar, etc).
+    #
    # Context options:
    #   :autolink  - Boolean, skips all processing done by this filter when false
    #   :link_attr - Hash of attributes for the generated links
@@ -107,10 +111,13 @@ module Banzai
          end
        end

-        # match has come from node.to_html above, so we know it's encoded
-        # correctly.
+        # Since this came from a Text node, make sure the new href is encoded.
+        # `commonmarker` percent encodes the domains of links it handles, so
+        # do the same (instead of using `normalized_encode`).
+        href_safe = Addressable::URI.encode(match).html_safe
+
        html_safe_match = match.html_safe
-        options = link_options.merge(href: html_safe_match)
+        options         = link_options.merge(href: href_safe)

        content_tag(:a, html_safe_match, options) + dropped
      end

--- a/lib/banzai/filter/external_link_filter.rb
+++ b/lib/banzai/filter/external_link_filter.rb
@@ -4,17 +4,29 @@ module Banzai
  module Filter
    # HTML Filter to modify the attributes of external links
    class ExternalLinkFilter < HTML::Pipeline::Filter
-      SCHEMES = ['http', 'https', nil].freeze
+      SCHEMES      = ['http', 'https', nil].freeze
+      RTLO         = "\u202E".freeze
+      ENCODED_RTLO = '%E2%80%AE'.freeze

      def call
        links.each do |node|
-          uri = uri(node['href'].to_s)
-
-          node.set_attribute('href', uri.to_s) if uri
+          # URI.parse does stricter checking on the url than Addressable,
+          # such as on `mailto:` links. Since we've been using it, do an
+          # initial parse for validity and then use Addressable
+          # for IDN support, etc
+          uri = uri_strict(node['href'].to_s)
+          if uri
+            node.set_attribute('href', uri.to_s)
+            addressable_uri = addressable_uri(node['href'])
+          else
+            addressable_uri = nil
+          end

-          if SCHEMES.include?(uri&.scheme) && !internal_url?(uri)
-            node.set_attribute('rel', 'nofollow noreferrer noopener')
-            node.set_attribute('target', '_blank')
+          unless internal_url?(addressable_uri)
+            punycode_autolink_node!(addressable_uri, node)
+            sanitize_link_text!(node)
+            add_malicious_tooltip!(addressable_uri, node)
+            add_nofollow!(addressable_uri, node)
          end
        end

@@ -23,12 +35,18 @@ module Banzai

      private

-      def uri(href)
+      def uri_strict(href)
        URI.parse(href)
      rescue URI::Error
        nil
      end

+      def addressable_uri(href)
+        Addressable::URI.parse(href)
+      rescue Addressable::URI::InvalidURIError
+        nil
+      end
+
      def links
        query = 'descendant-or-self::a[@href and not(@href = "")]'
        doc.xpath(query)
@@ -45,6 +63,57 @@ module Banzai
      def internal_url
        @internal_url ||= URI.parse(Gitlab.config.gitlab.url)
      end
+
+      # Only replace an autolink with an IDN with it's punycode
+      # version if we need emailable links.  Otherwise let it
+      # be shown normally and the tooltips will show the
+      # punycode version.
+      def punycode_autolink_node!(uri, node)
+        return unless uri
+        return unless context[:emailable_links]
+
+        unencoded_uri_str = Addressable::URI.unencode(node['href'])
+
+        if unencoded_uri_str == node.content && idn?(uri)
+          node.content = uri.normalize
+        end
+      end
+
+      # escape any right-to-left (RTLO) characters in link text
+      def sanitize_link_text!(node)
+        node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO)
+      end
+
+      # If the domain is an international domain name (IDN),
+      # let's expose with a tooltip in case it's intended
+      # to be malicious. This is particularly useful for links
+      # where the link text is not the same as the actual link.
+      # We will continue to show the unicode version of the domain
+      # in autolinked link text, which could contain emojis, etc.
+      #
+      # Also show the tooltip if the url contains the RTLO character,
+      # as this is an indicator of a malicious link
+      def add_malicious_tooltip!(uri, node)
+        if idn?(uri) || has_encoded_rtlo?(uri)
+          node.add_class('has-tooltip')
+          node.set_attribute('title', uri.normalize)
+        end
+      end
+
+      def add_nofollow!(uri, node)
+        if SCHEMES.include?(uri&.scheme)
+          node.set_attribute('rel', 'nofollow noreferrer noopener')
+          node.set_attribute('target', '_blank')
+        end
+      end
+
+      def idn?(uri)
+        uri&.normalized_host&.start_with?('xn--')
+      end
+
+      def has_encoded_rtlo?(uri)
+        uri&.to_s&.include?(ENCODED_RTLO)
+      end
    end
  end
 end
--- a/lib/banzai/pipeline/email_pipeline.rb
+++ b/lib/banzai/pipeline/email_pipeline.rb
@@ -11,7 +11,8 @@ module Banzai

      def self.transform_context(context)
        super(context).merge(
-          only_path: false
+          only_path: false,
+          emailable_links: true
        )
      end
    end

--- a/spec/lib/banzai/filter/autolink_filter_spec.rb
+++ b/spec/lib/banzai/filter/autolink_filter_spec.rb
@@ -188,6 +188,22 @@ describe Banzai::Filter::AutolinkFilter do
      expect(doc.at_css('a')['class']).to eq 'custom'
    end

+    it 'escapes RTLO and other characters' do
+      # rendered text looks like "http://example.com/evilexe.mp3"
+      evil_link = "#{link}evil\u202E3pm.exe"
+      doc = filter("#{evil_link}")
+
+      expect(doc.at_css('a')['href']).to eq "http://about.gitlab.com/evil%E2%80%AE3pm.exe"
+    end
+
+    it 'encodes international domains' do
+      link     = "http://one😄two.com"
+      expected = "http://one%F0%9F%98%84two.com"
+      doc      = filter(link)
+
+      expect(doc.at_css('a')['href']).to eq expected
+    end
+
    described_class::IGNORE_PARENTS.each do |elem|
      it "ignores valid links contained inside '#{elem}' element" do
        exp = act = "<#{elem}>See #{link}</#{elem}>"

--- a/spec/lib/banzai/filter/external_link_filter_spec.rb
+++ b/spec/lib/banzai/filter/external_link_filter_spec.rb
@@ -62,6 +62,13 @@ describe Banzai::Filter::ExternalLinkFilter do

      expect(doc.to_html).to eq(expected)
    end
+
+    it 'adds rel and target to improperly formatted autolinks' do
+      doc = filter %q(<p><a href="mailto://jblogs@example.com">mailto://jblogs@example.com</a></p>)
+      expected = %q(<p><a href="mailto://jblogs@example.com" rel="nofollow noreferrer noopener" target="_blank">mailto://jblogs@example.com</a></p>)
+
+      expect(doc.to_html).to eq(expected)
+    end
  end

  context 'for links with a username' do
@@ -112,4 +119,62 @@ describe Banzai::Filter::ExternalLinkFilter do

    it_behaves_like 'an external link with rel attribute'
  end
+
+  context 'links with RTLO character' do
+    # In rendered text this looks like "http://example.com/evilexe.mp3"
+    let(:doc) { filter %Q(<a href="http://example.com/evil%E2%80%AE3pm.exe">http://example.com/evil\u202E3pm.exe</a>) }
+
+    it_behaves_like 'an external link with rel attribute'
+
+    it 'escapes RTLO in link text' do
+      expected = %q(http://example.com/evil%E2%80%AE3pm.exe</a>)
+
+      expect(doc.to_html).to include(expected)
+    end
+
+    it 'does not mangle the link text' do
+      doc = filter %Q(<a href="http://example.com">One<span>and</span>\u202Eexe.mp3</a>)
+
+      expect(doc.to_html).to include('One<span>and</span>%E2%80%AEexe.mp3</a>')
+    end
+  end
+
+  context 'for generated autolinks' do
+    context 'with an IDN character' do
+      let(:doc)       { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>)) }
+      let(:doc_email) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>), emailable_links: true) }
+
+      it_behaves_like 'an external link with rel attribute'
+
+      it 'does not change the link text' do
+        expect(doc.to_html).to include('http://exa😄mple.com</a>')
+      end
+
+      it 'uses punycode for emails' do
+        expect(doc_email.to_html).to include('http://xn--example-6p25f.com/</a>')
+      end
+    end
+  end
+
+  context 'for links that look malicious' do
+    context 'with an IDN character' do
+      let(:doc) { filter %q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>) }
+
+      it 'adds a toolip with punycode' do
+        expect(doc.to_html).to include('http://exa😄mple.com</a>')
+        expect(doc.to_html).to include('class="has-tooltip"')
+        expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"')
+      end
+    end
+
+    context 'with RTLO character' do
+      let(:doc) { filter %q(<a href="http://example.com/evil%E2%80%AE3pm.exe">Evil Test</a>) }
+
+      it 'adds a toolip with punycode' do
+        expect(doc.to_html).to include('Evil Test</a>')
+        expect(doc.to_html).to include('class="has-tooltip"')
+        expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"')
+      end
+    end
+  end
 end
--- a/spec/lib/banzai/pipeline/email_pipeline_spec.rb
+++ b/spec/lib/banzai/pipeline/email_pipeline_spec.rb
@@ -10,5 +10,19 @@ describe Banzai::Pipeline::EmailPipeline do
      expect(described_class.filters).not_to be_empty
      expect(described_class.filters).not_to include(Banzai::Filter::ImageLazyLoadFilter)
    end
+
+    it 'shows punycode for autolinks' do
+      examples = %W[
+        http://one😄two.com
+        http://\u0261itlab.com
+      ]
+
+      examples.each do |markdown|
+        result = described_class.call(markdown, project: nil)[:output]
+        link   = result.css('a').first
+
+        expect(link.content).to include('http://xn--')
+      end
+    end
  end
 end
--- a/spec/lib/banzai/pipeline/full_pipeline_spec.rb
+++ b/spec/lib/banzai/pipeline/full_pipeline_spec.rb
@@ -57,4 +57,42 @@ describe Banzai::Pipeline::FullPipeline do
      expect(html.lines.map(&:strip).join("\n")).to eq filtered_footnote
    end
  end
+
+  describe 'links are detected as malicious' do
+    it 'has tooltips for malicious links' do
+      examples = %W[
+        http://example.com/evil\u202E3pm.exe
+        [evilexe.mp3](http://example.com/evil\u202E3pm.exe)
+        rdar://localhost.com/\u202E3pm.exe
+        http://one😄two.com
+        [Evil-Test](http://one😄two.com)
+        http://\u0261itlab.com
+        [Evil-GitLab-link](http://\u0261itlab.com)
+        ![Evil-GitLab-link](http://\u0261itlab.com.png)
+      ]
+
+      examples.each do |markdown|
+        result = described_class.call(markdown, project: nil)[:output]
+        link   = result.css('a').first
+
+        expect(link[:class]).to include('has-tooltip')
+      end
+    end
+
+    it 'has no tooltips for safe links' do
+      examples = %w[
+        http://example.com
+        [Safe-Test](http://example.com)
+        https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg
+        [Wikipedia-link](https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg)
+      ]
+
+      examples.each do |markdown|
+        result = described_class.call(markdown, project: nil)[:output]
+        link   = result.css('a').first
+
+        expect(link[:class]).to be_nil
+      end
+    end
+  end
 end