From 8906cabae7a6be44cafcedcaf27352614fcc462b Mon Sep 17 00:00:00 2001 From: Douwe Maan <douwe@gitlab.com> Date: Tue, 18 Aug 2015 17:02:26 -0700 Subject: [PATCH] Changes and stuff. --- Gemfile | 2 + Gemfile.lock | 2 + app/mailers/notify.rb | 10 +- app/models/sent_notification.rb | 8 +- app/workers/email_receiver_worker.rb | 2 +- config/initializers/1_settings.rb | 2 +- lib/gitlab/email_html_cleaner.rb | 133 +++++++++++++++++++++++++ lib/gitlab/email_receiver.rb | 144 +++++++++++++++++++++++---- lib/gitlab/reply_by_email.rb | 47 +++++++++ 9 files changed, 320 insertions(+), 30 deletions(-) create mode 100644 lib/gitlab/email_html_cleaner.rb create mode 100644 lib/gitlab/reply_by_email.rb diff --git a/Gemfile b/Gemfile index e3f76671f5..9879141f5c 100644 --- a/Gemfile +++ b/Gemfile @@ -274,3 +274,5 @@ gem "newrelic_rpm" gem 'octokit', '3.7.0' gem "mail_room", github: "DouweM/mail_room", branch: "sidekiq" + +gem 'email_reply_parser' diff --git a/Gemfile.lock b/Gemfile.lock index 597eb4cde0..629d128b42 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -163,6 +163,7 @@ GEM dotenv (0.9.0) dropzonejs-rails (0.7.1) rails (> 3.1) + email_reply_parser (0.5.8) email_spec (1.6.0) launchy (~> 2.1) mail (~> 2.2) @@ -780,6 +781,7 @@ DEPENDENCIES diffy (~> 3.0.3) doorkeeper (= 2.1.3) dropzonejs-rails + email_reply_parser email_spec (~> 1.6.0) enumerize factory_girl_rails diff --git a/app/mailers/notify.rb b/app/mailers/notify.rb index 44df3d6407..c2ea99d968 100644 --- a/app/mailers/notify.rb +++ b/app/mailers/notify.rb @@ -146,7 +146,7 @@ class Notify < ActionMailer::Base if reply_key headers['X-GitLab-Reply-Key'] = reply_key - headers['Reply-To'] = Gitlab.config.reply_by_email.address.gsub('%{reply_key}', reply_key) + headers['Reply-To'] = Gitlab::ReplyByEmail.reply_address(reply_key) end mail(headers) @@ -165,6 +165,10 @@ class Notify < ActionMailer::Base headers['In-Reply-To'] = message_id(model) headers['References'] = message_id(model) + if headers[:subject] + headers[:subject].prepend('Re: ') + end + mail_new_thread(model, headers) end @@ -173,8 +177,6 @@ class Notify < ActionMailer::Base end def reply_key - return nil unless Gitlab.config.reply_by_email.enabled - - @reply_key ||= SecureRandom.hex(16) + @reply_key ||= Gitlab::ReplyByEmail.reply_key end end diff --git a/app/models/sent_notification.rb b/app/models/sent_notification.rb index a3d24669b5..23a1b19ea7 100644 --- a/app/models/sent_notification.rb +++ b/app/models/sent_notification.rb @@ -6,8 +6,8 @@ class SentNotification < ActiveRecord::Base validate :project, :recipient, :reply_key, presence: true validate :reply_key, uniqueness: true - validates :noteable_id, presence: true, if: ->(n) { n.noteable_type.present? && n.noteable_type != 'Commit' } - validates :commit_id, presence: true, if: ->(n) { n.noteable_type == 'Commit' } + validates :noteable_id, presence: true, unless: :for_commit? + validates :commit_id, presence: true, if: :for_commit? def self.for(reply_key) find_by(reply_key: reply_key) @@ -19,11 +19,9 @@ class SentNotification < ActiveRecord::Base def noteable if for_commit? - project.commit(commit_id) + project.commit(commit_id) rescue nil else super end - rescue - nil end end diff --git a/app/workers/email_receiver_worker.rb b/app/workers/email_receiver_worker.rb index e44a430f6b..94e346b5a5 100644 --- a/app/workers/email_receiver_worker.rb +++ b/app/workers/email_receiver_worker.rb @@ -4,7 +4,7 @@ class EmailReceiverWorker sidekiq_options queue: :incoming_email def perform(raw) - return unless Gitlab.config.reply_by_email.enabled + return unless Gitlab::ReplyByEmail.enabled? # begin Gitlab::EmailReceiver.new(raw).process diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 9e83660e10..654de6238d 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -153,7 +153,7 @@ Settings.gitlab['restricted_signup_domains'] ||= [] # Reply by email # Settings['reply_by_email'] ||= Settingslogic.new({}) -Settings.reply_by_email['enabled'] = false if Settings.gravatar['enabled'].nil? +Settings.reply_by_email['enabled'] = false if Settings.reply_by_email['enabled'].nil? # # Gravatar diff --git a/lib/gitlab/email_html_cleaner.rb b/lib/gitlab/email_html_cleaner.rb new file mode 100644 index 0000000000..6d7a17fe87 --- /dev/null +++ b/lib/gitlab/email_html_cleaner.rb @@ -0,0 +1,133 @@ +# Taken mostly from Discourse's Email::HtmlCleaner +module Gitlab + # HtmlCleaner cleans up the extremely dirty HTML that many email clients + # generate by stripping out any excess divs or spans, removing styling in + # the process (which also makes the html more suitable to be parsed as + # Markdown). + class EmailHtmlCleaner + # Elements to hoist all children out of + HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td) + # Node types to always delete + HTML_DELETE_ELEMENT_TYPES = [ + Nokogiri::XML::Node::DTD_NODE, + Nokogiri::XML::Node::COMMENT_NODE, + ] + + # Private variables: + # @doc - nokogiri document + # @out - same as @doc, but only if trimming has occured + def initialize(html) + if html.is_a?(String) + @doc = Nokogiri::HTML(html) + else + @doc = html + end + end + + class << self + # EmailHtmlCleaner.trim(inp, opts={}) + # + # Arguments: + # inp - Either a HTML string or a Nokogiri document. + # Options: + # :return => :doc, :string + # Specify the desired return type. + # Defaults to the type of the input. + # A value of :string is equivalent to calling get_document_text() + # on the returned document. + def trim(inp, opts={}) + cleaner = EmailHtmlCleaner.new(inp) + + opts[:return] ||= (inp.is_a?(String) ? :string : :doc) + + if opts[:return] == :string + cleaner.output_html + else + cleaner.output_document + end + end + + # EmailHtmlCleaner.get_document_text(doc) + # + # Get the body portion of the document, including html, as a string. + def get_document_text(doc) + body = doc.xpath('//body') + if body + body.inner_html + else + doc.inner_html + end + end + end + + def output_document + @out ||= begin + doc = @doc + trim_process_node doc + add_newlines doc + doc + end + end + + def output_html + EmailHtmlCleaner.get_document_text(output_document) + end + + private + + def add_newlines(doc) + # Replace <br> tags with a markdown \n + doc.xpath('//br').each do |br| + br.replace(new_linebreak_node doc, 2) + end + # Surround <p> tags with newlines, to help with line-wise postprocessing + # and ensure markdown paragraphs + doc.xpath('//p').each do |p| + p.before(new_linebreak_node doc) + p.after(new_linebreak_node doc, 2) + end + end + + def new_linebreak_node(doc, count=1) + Nokogiri::XML::Text.new("\n" * count, doc) + end + + def trim_process_node(node) + if should_hoist?(node) + hoisted = trim_hoist_element node + hoisted.each { |child| trim_process_node child } + elsif should_delete?(node) + node.remove + else + if children = node.children + children.each { |child| trim_process_node child } + end + end + + node + end + + def trim_hoist_element(element) + hoisted = [] + element.children.each do |child| + element.before(child) + hoisted << child + end + element.remove + hoisted + end + + def should_hoist?(node) + return false unless node.element? + HTML_HOIST_ELEMENTS.include? node.name + end + + def should_delete?(node) + return true if HTML_DELETE_ELEMENT_TYPES.include? node.type + return true if node.element? && node.name == 'head' + return true if node.text? && node.text.strip.blank? + + false + end + end +end diff --git a/lib/gitlab/email_receiver.rb b/lib/gitlab/email_receiver.rb index a9f67bb5da..18a06d2ee8 100644 --- a/lib/gitlab/email_receiver.rb +++ b/lib/gitlab/email_receiver.rb @@ -1,44 +1,69 @@ +# Inspired in great part by Discourse's Email::Receiver module Gitlab class EmailReceiver + class ProcessingError < StandardError; end + class EmailUnparsableError < ProcessingError; end + class EmptyEmailError < ProcessingError; end + class UserNotFoundError < ProcessingError; end + class UserNotAuthorizedLevelError < ProcessingError; end + class NoteableNotFoundError < ProcessingError; end + class AutoGeneratedEmailError < ProcessingError; end + class SentNotificationNotFound < ProcessingError; end + class InvalidNote < ProcessingError; end + def initialize(raw) @raw = raw end def message @message ||= Mail::Message.new(@raw) + rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e + raise EmailUnparsableError, e end def process - return unless message && sent_notification + raise EmptyEmailError if @raw.blank? + + raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/ + + raise SentNotificationNotFound unless sent_notification + + author = sent_notification.recipient + + raise UserNotFoundError unless author + + project = sent_notification.project + + raise UserNotAuthorizedLevelError unless author.can?(:create_note, project) + + raise NoteableNotFoundError unless sent_notification.noteable + + body = parse_body(message) - Notes::CreateService.new( - sent_notification.project, - sent_notification.recipient, - note: message.text_part.to_s, + note = Notes::CreateService.new( + project, + author, + note: body, noteable_type: sent_notification.noteable_type, noteable_id: sent_notification.noteable_id, commit_id: sent_notification.commit_id ).execute + + unless note.persisted? + raise InvalidNote, note.errors.full_messages.join("\n") + end end private def reply_key - address = Gitlab.config.reply_by_email.address - return nil unless address - - regex = Regexp.escape(address) - regex = regex.gsub(Regexp.escape('%{reply_key}'), "(.*)") - regex = Regexp.new(regex) - - address = message.to.find { |address| address =~ regex } - return nil unless address + reply_key = nil + message.to.each do |address| + reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address) + break if reply_key + end - match = address.match(regex) - - return nil unless match && match[1].present? - - match[1] + reply_key end def sent_notification @@ -46,5 +71,86 @@ module Gitlab SentNotification.for(reply_key) end + + def parse_body(message) + body = select_body(message) + + encoding = body.encoding + raise EmptyEmailError if body.strip.blank? + + body = discourse_email_trimmer(body) + raise EmptyEmailError if body.strip.blank? + + body = EmailReplyParser.parse_reply(body) + raise EmptyEmailError if body.strip.blank? + + body.force_encoding(encoding).encode("UTF-8") + end + + def select_body(message) + html = nil + text = nil + + if message.multipart? + html = fix_charset(message.html_part) + text = fix_charset(message.text_part) + elsif message.content_type =~ /text\/html/ + html = fix_charset(message) + end + + # prefer plain text + return text if text + + if html + body = EmailHtmlCleaner.new(html).output_html + else + body = fix_charset(message) + end + + # Certain trigger phrases that means we didn't parse correctly + if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ + raise EmptyEmailError + end + + body + end + + # Force encoding to UTF-8 on a Mail::Message or Mail::Part + def fix_charset(object) + return nil if object.nil? + + if object.charset + object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s + else + object.body.to_s + end + rescue + nil + end + + REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date) + REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" }) + + def discourse_email_trimmer(body) + lines = body.scrub.lines.to_a + range_end = 0 + + lines.each_with_index do |l, idx| + break if l =~ /\A\s*\-{3,80}\s*\z/ || + # This one might be controversial but so many reply lines have years, times and end with a colon. + # Let's try it and see how well it works. + (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) || + (l =~ /On \w+ \d+,? \d+,?.*wrote:/) + + # Headers on subsequent lines + break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX } + # Headers on the same line + break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3 + + range_end = idx + end + + lines[0..range_end].join.strip + end end end diff --git a/lib/gitlab/reply_by_email.rb b/lib/gitlab/reply_by_email.rb new file mode 100644 index 0000000000..b6157de361 --- /dev/null +++ b/lib/gitlab/reply_by_email.rb @@ -0,0 +1,47 @@ +module Gitlab + module ReplyByEmail + class << self + def enabled? + config.enabled && + config.address && + config.address.include?("%{reply_key}") + end + + def reply_key + return nil unless enabled? + + SecureRandom.hex(16) + end + + def reply_address(reply_key) + config.address.gsub('%{reply_key}', reply_key) + end + + def reply_key_from_address(address) + return unless address_regex + + match = address.match(address_regex) + return unless match + + match[1] + end + + private + + def config + Gitlab.config.reply_by_email + end + + def address_regex + @address_regex ||= begin + wildcard_address = config.address + return nil unless wildcard_address + + regex = Regexp.escape(wildcard_address) + regex = regex.gsub(Regexp.escape('%{reply_key}'), "(.+)") + Regexp.new(regex).freeze + end + end + end + end +end -- 2.30.9