Elasticsearch v6 - migrate from parent/child relationships to joins

8e03c123 · Mario de la Ossa · Nick Thomas · caf0a2e9 · 8e03c123 · 8e03c123
Commit 8e03c123 authored Oct 19, 2018 by Mario de la Ossa Committed by Nick Thomas Oct 19, 2018
41 changed files
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -96,13 +96,13 @@ stages:
  services:
    - postgres:9.2
    - redis:alpine
-    - docker.elastic.co/elasticsearch/elasticsearch:5.5.2
+    - docker.elastic.co/elasticsearch/elasticsearch:5.6.12

 .use-mysql-with-elasticsearch: &use-mysql-with-elasticsearch
  services:
    - mysql:5.7
    - redis:alpine
-    - docker.elastic.co/elasticsearch/elasticsearch:5.5.2
+    - docker.elastic.co/elasticsearch/elasticsearch:5.6.12

 # END EE-only service helpers
 .rails5-variables: &rails5-variables

--- a/app/models/issue_assignee.rb
+++ b/app/models/issue_assignee.rb
 # frozen_string_literal: true

 class IssueAssignee < ActiveRecord::Base
+  prepend EE::IssueAssignee
+
  belongs_to :issue
  belongs_to :assignee, class_name: "User", foreign_key: :user_id
-
-  # EE-specific
-  after_commit :update_elasticsearch_index, on: [:create, :destroy]
-  # EE-specific
-
-  def update_elasticsearch_index
-    if Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
-      ElasticIndexerWorker.perform_async(
-        :update,
-        'Issue',
-        issue.id,
-        changed_fields: ['assignee_ids']
-      )
-    end
-  end
 end
--- a/app/models/legacy_diff_note.rb
+++ b/app/models/legacy_diff_note.rb
@@ -8,7 +8,7 @@
 # A note of this type is never resolvable.
 class LegacyDiffNote < Note
  # Elastic search configuration (it does not support STI properly)
-  document_type 'note'
+  document_type 'doc'
  index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
  include Elastic::NotesSearch


--- a/app/models/personal_snippet.rb
+++ b/app/models/personal_snippet.rb
@@ -2,7 +2,7 @@

 class PersonalSnippet < Snippet
  # Elastic search configuration (it does not support STI)
-  document_type 'snippet'
+  document_type 'doc'
  index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
  include Elastic::SnippetsSearch
  include WithUploads

--- a/app/models/project_feature.rb
+++ b/app/models/project_feature.rb
@@ -59,7 +59,7 @@ class ProjectFeature < ActiveRecord::Base

  after_commit on: :update do
    if Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
-      ElasticIndexerWorker.perform_async(:update, 'Project', project_id)
+      ElasticIndexerWorker.perform_async(:update, 'Project', project_id, project.es_id)
    end
  end


--- a/app/models/project_snippet.rb
+++ b/app/models/project_snippet.rb
@@ -2,7 +2,7 @@

 class ProjectSnippet < Snippet
  # Elastic search configuration (it does not support STI)
-  document_type 'snippet'
+  document_type 'doc'
  index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
  include Elastic::SnippetsSearch


--- a/config/initializers/elastic_client_setup.rb
+++ b/config/initializers/elastic_client_setup.rb
@@ -2,6 +2,10 @@

 require 'gitlab/current_settings'

+Elasticsearch::Model::Response::Records.prepend GemExtensions::Elasticsearch::Model::Response::Records
+Elasticsearch::Model::Adapter::Multiple::Records.prepend GemExtensions::Elasticsearch::Model::Adapter::Multiple::Records
+Elasticsearch::Model::Indexing::InstanceMethods.prepend GemExtensions::Elasticsearch::Model::Indexing::InstanceMethods
+
 module Elasticsearch
  module Model
    module Client

--- a/doc/development/elasticsearch.md
+++ b/doc/development/elasticsearch.md
@@ -9,19 +9,19 @@ Information on how to enable ElasticSearch and perform the initial indexing is k
 It is recommended to use the Docker image. After installing docker you can immediately spin up an instance with

 ```
-docker run --name elastic55 -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:5.5.3
+docker run --name elastic56 -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:5.6.12
 ```

-and use `docker stop elastic55` and `docker start elastic55` to stop/start it.
+and use `docker stop elastic56` and `docker start elastic56` to stop/start it.

 ### Installing on the host

-We currently only support Elasticsearch [up to 5.5](https://docs.gitlab.com/ee/integration/elasticsearch.html#requirements), but `brew` only has elasticsearch 6, 5.6, and 2.4 available. While 2.4 would work you probably want to test things out in the latest one we support.
+We currently only support Elasticsearch [5.6 to 6.x](https://docs.gitlab.com/ee/integration/elasticsearch.html#requirements)

-In order to install 5.5.2, you would usually have to hunt down an old homebrew-core commit that contains the recipe for it. We've already done the work for you. Simply run:
+Version 5.6 is available on homebrew and is the recommended version to use in order to test compatibility.

 ```
-brew install https://raw.githubusercontent.com/Homebrew/homebrew-core/f1a767645f61112762f05e68a610d89b161faa99/Formula/elasticsearch.rb
+brew install elasticsearch@5.6
 ```

 There is no need to install any plugins

--- a/doc/integration/elasticsearch.md
+++ b/doc/integration/elasticsearch.md
@@ -17,8 +17,7 @@ special searches:
 | -------------- | --------------------- |
 | GitLab Enterprise Edition 8.4 - 8.17  | Elasticsearch 2.4 with [Delete By Query Plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/2.4/plugins-delete-by-query.html) installed |
 | GitLab Enterprise Edition 9.0+        | Elasticsearch 5.1 - 5.5 |
-
-Elasticsearch 6.0+ is not supported currently. [We will support 6.0+ in the future.](https://gitlab.com/gitlab-org/gitlab-ee/issues/4218)
+| GitLab Enterprise Edition 11.4+        | Elasticsearch 5.6 - 6.x |

 ## Installing Elasticsearch


--- a/ee/app/helpers/ee/search_helper.rb
+++ b/ee/app/helpers/ee/search_helper.rb
+# frozen_string_literal: true
 module EE
  module SearchHelper
    extend ::Gitlab::Utils::Override
@@ -12,7 +13,7 @@ module EE
    override :find_project_for_result_blob
    # rubocop: disable CodeReuse/ActiveRecord
    def find_project_for_result_blob(result)
-      super || ::Project.find_by(id: result['_parent'])
+      super || ::Project.find_by(id: result.dig('_source', 'join_field', 'parent')&.split('_')&.last)
    end
    # rubocop: enable CodeReuse/ActiveRecord


--- a/ee/app/models/concerns/elastic/application_search.rb
+++ b/ee/app/models/concerns/elastic/application_search.rb
+# frozen_string_literal: true
 module Elastic
  module ApplicationSearch
    extend ActiveSupport::Concern
@@ -7,6 +8,9 @@ module Elastic

      index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')

+      # ES6 requires a single type per index
+      document_type 'doc'
+
      settings \
        index: {
          analysis: {
@@ -37,9 +41,159 @@ module Elastic
          }
        }

+      # Since we can't have multiple types in ES6, but want to be able to use JOINs, we must declare all our
+      # fields together instead of per model
+      mappings do
+        ### Shared fields
+        indexes :id, type: :integer
+        indexes :created_at, type: :date
+        indexes :updated_at, type: :date
+
+        # ES6-compatible way of having a parent, this is shared with all
+        # Please note that if we add a parent to `project` we'll have to use that "grand-parent" as the routing value
+        # for all children of project - therefore it is not advised.
+        indexes :join_field, type: :join,
+                             relations: {
+                               project: %i(
+                                 issue
+                                 merge_request
+                                 milestone
+                                 note
+                                 blob
+                                 wiki_blob
+                                 commit
+                               )
+                             }
+        # ES6 requires a single type per index, so we implement our own "type"
+        indexes :type, type: :keyword
+
+        indexes :iid, type: :integer
+
+        indexes :title, type: :text,
+                        index_options: 'offsets'
+        indexes :description, type: :text,
+                              index_options: 'offsets'
+        indexes :state, type: :text
+        indexes :project_id, type: :integer
+        indexes :author_id, type: :integer
+
+        ## Projects and Snippets
+        indexes :visibility_level, type: :integer
+
+        ### ISSUES
+        indexes :confidential, type: :boolean
+
+        # The field assignee_id does not exist in issues table anymore.
+        # Nevertheless we'll keep this field as is because we don't want users to rebuild index
+        # + the ES treats arrays transparently so
+        # to any integer field you can write any array of integers and you don't have to change mapping.
+        # More over you can query those items just like a single integer value.
+        indexes :assignee_id, type: :integer
+
+        ### MERGE REQUESTS
+        indexes :target_branch, type: :text,
+                                index_options: 'offsets'
+        indexes :source_branch, type: :text,
+                                index_options: 'offsets'
+        indexes :merge_status, type: :text
+        indexes :source_project_id, type: :integer
+        indexes :target_project_id, type: :integer
+
+        ### NOTES
+        indexes :note, type: :text,
+                       index_options: 'offsets'
+
+        indexes :issue do
+          indexes :assignee_id, type: :integer
+          indexes :author_id, type: :integer
+          indexes :confidential, type: :boolean
+        end
+
+        # ES6 gets rid of "index: :not_analyzed" option, but a keyword type behaves the same
+        # as it is not analyzed and is only searchable by its exact value.
+        indexes :noteable_type, type: :keyword
+        indexes :noteable_id, type: :keyword
+
+        ### PROJECTS
+        indexes :name, type: :text,
+                       index_options: 'offsets'
+        indexes :path, type: :text,
+                       index_options: 'offsets'
+        indexes :name_with_namespace, type: :text,
+                                      index_options: 'offsets',
+                                      analyzer: :my_ngram_analyzer
+        indexes :path_with_namespace, type: :text,
+                                      index_options: 'offsets'
+        indexes :namespace_id, type: :integer
+        indexes :archived, type: :boolean
+
+        indexes :issues_access_level, type: :integer
+        indexes :merge_requests_access_level, type: :integer
+        indexes :snippets_access_level, type: :integer
+        indexes :wiki_access_level, type: :integer
+        indexes :repository_access_level, type: :integer
+
+        indexes :last_activity_at, type: :date
+        indexes :last_pushed_at, type: :date
+
+        ### SNIPPETS
+        indexes :file_name, type: :text,
+                            index_options: 'offsets'
+        indexes :content, type: :text,
+                          index_options: 'offsets'
+
+        ### REPOSITORIES
+        indexes :blob do
+          indexes :id, type: :text,
+                       index_options: 'offsets',
+                       analyzer: :sha_analyzer
+          indexes :rid, type: :keyword
+          indexes :oid, type: :text,
+                        index_options: 'offsets',
+                        analyzer: :sha_analyzer
+          indexes :commit_sha, type: :text,
+                               index_options: 'offsets',
+                               analyzer: :sha_analyzer
+          indexes :path, type: :text,
+                         analyzer: :path_analyzer
+          indexes :file_name, type: :text,
+                              analyzer: :code_analyzer,
+                              search_analyzer: :code_search_analyzer
+          indexes :content, type: :text,
+                            index_options: 'offsets',
+                            analyzer: :code_analyzer,
+                            search_analyzer: :code_search_analyzer
+          indexes :language, type: :keyword
+        end
+
+        indexes :commit do
+          indexes :id, type: :text,
+                       index_options: 'offsets',
+                       analyzer: :sha_analyzer
+          indexes :rid, type: :keyword
+          indexes :sha, type: :text,
+                        index_options: 'offsets',
+                        analyzer: :sha_analyzer
+
+          indexes :author do
+            indexes :name, type: :text, index_options: 'offsets'
+            indexes :email, type: :text, index_options: 'offsets'
+            indexes :time, type: :date, format: :basic_date_time_no_millis
+          end
+
+          indexes :commiter do
+            indexes :name, type: :text, index_options: 'offsets'
+            indexes :email, type: :text, index_options: 'offsets'
+            indexes :time, type: :date, format: :basic_date_time_no_millis
+          end
+
+          indexes :message, type: :text, index_options: 'offsets'
+        end
+      end
+
      after_commit on: :create do
        if Gitlab::CurrentSettings.elasticsearch_indexing? && self.searchable?
-          ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id)
+          ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id, self.es_id)
        end
      end

@@ -49,6 +203,7 @@ module Elastic
            :update,
            self.class.to_s,
            self.id,
+            self.es_id,
            changed_fields: self.previous_changes.keys
          )
        end
@@ -60,7 +215,8 @@ module Elastic
            :delete,
            self.class.to_s,
            self.id,
-            project_id: self.es_parent
+            self.es_id,
+            es_parent: self.es_parent
          )
        end
      end
@@ -70,8 +226,26 @@ module Elastic
        true
      end

+      def generic_attributes
+        {
+          'join_field' => {
+            'name' => es_type,
+            'parent' => es_parent
+          },
+          'type' => es_type
+        }
+      end
+
      def es_parent
-        project_id if respond_to?(:project_id)
+        "project_#{project_id}" unless is_a?(Project) || self&.project_id.nil?
+      end
+
+      def es_type
+        self.class.es_type
+      end
+
+      def es_id
+        "#{es_type}_#{id}"
      end

      # Some attributes are actually complicated methods. Bad data can cause
@@ -91,6 +265,10 @@ module Elastic
        false
      end

+      def es_type
+        name.underscore
+      end
+
      def highlight_options(fields)
        es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
          memo[field.to_sym] = {}
@@ -99,9 +277,11 @@ module Elastic
        { fields: es_fields }
      end

-      def import_with_parent(options = {})
+      def es_import(options = {})
        transform = lambda do |r|
-          { index: { _id: r.id, _parent: r.es_parent, data: r.__elasticsearch__.as_indexed_json } }
+          { index: { _id: r.es_id, data: r.__elasticsearch__.as_indexed_json } }.tap do |data|
+            data[:index][:routing] = r.es_parent if r.es_parent
+          end
        end

        options[:transform] = transform
@@ -120,6 +300,9 @@ module Elastic
                                 query: query,
                                 default_operator: :and
                               }
+                             }],
+                             filter: [{
+                               term: { type: self.es_type }
                             }]
                           }
                         }

--- a/ee/app/models/concerns/elastic/issues_search.rb
+++ b/ee/app/models/concerns/elastic/issues_search.rb
@@ -5,29 +5,6 @@ module Elastic
    included do
      include ApplicationSearch

-      mappings _parent: { type: 'project' } do
-        indexes :id,          type: :integer
-        indexes :iid,         type: :integer
-        indexes :title,       type: :text,
-                              index_options: 'offsets'
-        indexes :description, type: :text,
-                              index_options: 'offsets'
-        indexes :created_at,  type: :date
-        indexes :updated_at,  type: :date
-        indexes :state,       type: :text
-        indexes :project_id,  type: :integer
-        indexes :author_id,   type: :integer
-
-        # The field assignee_id does not exist in issues table anymore.
-        # Nevertheless we'll keep this field as is because we don't want users to rebuild index
-        # + the ES treats arrays transparently so
-        # to any integer field you can write any array of integers and you don't have to change mapping.
-        # More over you can query those items just like a single integer value.
-        indexes :assignee_id, type: :integer
-
-        indexes :confidential, type: :boolean
-      end
-
      def as_indexed_json(options = {})
        data = {}

@@ -39,7 +16,7 @@ module Elastic

        data['assignee_id'] = safely_read_attribute_for_elasticsearch(:assignee_ids)

-        data
+        data.merge(generic_attributes)
      end

      def self.nested?

--- a/ee/app/models/concerns/elastic/merge_requests_search.rb
+++ b/ee/app/models/concerns/elastic/merge_requests_search.rb
@@ -5,26 +5,6 @@ module Elastic
    included do
      include ApplicationSearch

-      mappings _parent: { type: 'project' } do
-        indexes :id,                type: :integer
-        indexes :iid,               type: :integer
-        indexes :target_branch,     type: :text,
-                                    index_options: 'offsets'
-        indexes :source_branch,     type: :text,
-                                    index_options: 'offsets'
-        indexes :title,             type: :text,
-                                    index_options: 'offsets'
-        indexes :description,       type: :text,
-                                    index_options: 'offsets'
-        indexes :created_at,        type: :date
-        indexes :updated_at,        type: :date
-        indexes :state,             type: :text
-        indexes :merge_status,      type: :text
-        indexes :source_project_id, type: :integer
-        indexes :target_project_id, type: :integer
-        indexes :author_id,         type: :integer
-      end
-
      def as_indexed_json(options = {})
        # We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
        # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
@@ -48,11 +28,11 @@ module Elastic
          data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
        end

-        data
+        data.merge(generic_attributes)
      end

      def es_parent
-        target_project_id
+        "project_#{target_project_id}"
      end

      def self.nested?

--- a/ee/app/models/concerns/elastic/milestones_search.rb
+++ b/ee/app/models/concerns/elastic/milestones_search.rb
@@ -5,17 +5,6 @@ module Elastic
    included do
      include ApplicationSearch

-      mappings _parent: { type: 'project' } do
-        indexes :id,          type: :integer
-        indexes :title,       type: :text,
-                              index_options: 'offsets'
-        indexes :description, type: :text,
-                              index_options: 'offsets'
-        indexes :project_id,  type: :integer
-        indexes :created_at,  type: :date
-        indexes :updated_at,  type: :date
-      end
-
      def as_indexed_json(options = {})
        # We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
        # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
@@ -25,7 +14,7 @@ module Elastic
          data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
        end

-        data
+        data.merge(generic_attributes)
      end

      def self.nested?

--- a/ee/app/models/concerns/elastic/notes_search.rb
+++ b/ee/app/models/concerns/elastic/notes_search.rb
@@ -5,24 +5,6 @@ module Elastic
    included do
      include ApplicationSearch

-      mappings _parent: { type: 'project' } do
-        indexes :id,          type: :integer
-        indexes :note,        type: :text,
-                              index_options: 'offsets'
-        indexes :project_id,  type: :integer
-        indexes :created_at,  type: :date
-        indexes :updated_at,  type: :date
-
-        indexes :issue do
-          indexes :assignee_id, type: :integer
-          indexes :author_id, type: :integer
-          indexes :confidential, type: :boolean
-        end
-
-        indexes :noteable_type,  type: :string, index: :not_analyzed
-        indexes :noteable_id,    type: :integer, index: :not_analyzed
-      end
-
      def self.inherited(subclass)
        super

@@ -31,6 +13,10 @@ module Elastic
        subclass.__elasticsearch__.instance_variable_set(:@mapping, self.mapping.dup)
      end

+      def es_type
+        'note'
+      end
+
      def as_indexed_json(options = {})
        data = {}

@@ -48,7 +34,7 @@ module Elastic
          }
        end

-        data
+        data.merge(generic_attributes)
      end

      def self.nested?

--- a/ee/app/models/concerns/elastic/projects_search.rb
+++ b/ee/app/models/concerns/elastic/projects_search.rb
@@ -13,35 +13,6 @@ module Elastic
    included do
      include ApplicationSearch

-      mappings do
-        indexes :id,                  type: :integer
-        indexes :name,                type: :text,
-                                      index_options: 'offsets'
-        indexes :path,                type: :text,
-                                      index_options: 'offsets'
-        indexes :name_with_namespace, type: :text,
-                                      index_options: 'offsets',
-                                      analyzer: :my_ngram_analyzer
-        indexes :path_with_namespace, type: :text,
-                                      index_options: 'offsets'
-        indexes :description,         type: :text,
-                                      index_options: 'offsets'
-        indexes :namespace_id,        type: :integer
-        indexes :created_at,          type: :date
-        indexes :updated_at,          type: :date
-        indexes :archived,            type: :boolean
-
-        indexes :visibility_level,            type: :integer
-        indexes :issues_access_level,         type: :integer
-        indexes :merge_requests_access_level, type: :integer
-        indexes :snippets_access_level,       type: :integer
-        indexes :wiki_access_level,           type: :integer
-        indexes :repository_access_level,     type: :integer
-
-        indexes :last_activity_at,    type: :date
-        indexes :last_pushed_at,      type: :date
-      end
-
      def as_indexed_json(options = {})
        # We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
        # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
@@ -64,6 +35,12 @@ module Elastic
          data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
        end

+        # Set it as a parent in our `project => child` JOIN field
+        data['join_field'] = es_type
+
+        # ES6 is now single-type per index, so we implement our own typing
+        data['type'] = 'project'
+
        TRACKED_FEATURE_SETTINGS.each do |feature|
          data[feature] = project_feature.public_send(feature) # rubocop:disable GitlabSecurity/PublicSend
        end

--- a/ee/app/models/concerns/elastic/repositories_search.rb
+++ b/ee/app/models/concerns/elastic/repositories_search.rb
@@ -11,6 +11,10 @@ module Elastic
        project.id
      end

+      def es_type
+        'blob'
+      end
+
      delegate :id, to: :project, prefix: true

      def client_for_indexing

--- a/ee/app/models/concerns/elastic/snippets_search.rb
+++ b/ee/app/models/concerns/elastic/snippets_search.rb
@@ -5,22 +5,6 @@ module Elastic
    included do
      include ApplicationSearch

-      mappings do
-        indexes :id,               type: :integer
-        indexes :title,            type: :text,
-                                   index_options: 'offsets'
-        indexes :file_name,        type: :text,
-                                   index_options: 'offsets'
-        indexes :content,          type: :text,
-                                   index_options: 'offsets'
-        indexes :created_at,       type: :date
-        indexes :updated_at,       type: :date
-        indexes :state,            type: :text
-        indexes :project_id,       type: :integer
-        indexes :author_id,        type: :integer
-        indexes :visibility_level, type: :integer
-      end
-
      def as_indexed_json(options = {})
        # We don't use as_json(only: ...) because it calls all virtual and serialized attributtes
        # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
@@ -40,6 +24,9 @@ module Elastic
          data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
        end

+        # ES6 is now single-type per index, so we implement our own typing
+        data['type'] = es_type
+
        data
      end


--- a/ee/app/models/concerns/elastic/wiki_repositories_search.rb
+++ b/ee/app/models/concerns/elastic/wiki_repositories_search.rb
@@ -11,6 +11,10 @@ module Elastic
        "wiki_#{project.id}"
      end

+      def es_type
+        'wiki_blob'
+      end
+
      delegate :id, to: :project, prefix: true

      def client_for_indexing

--- a/ee/app/models/ee/issue_assignee.rb
+++ b/ee/app/models/ee/issue_assignee.rb
+# frozen_string_literal: true
+module EE
+  module IssueAssignee
+    extend ActiveSupport::Concern
+
+    prepended do
+      after_commit :update_elasticsearch_index, on: [:create, :destroy]
+    end
+
+    def update_elasticsearch_index
+      if ::Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
+        ::ElasticIndexerWorker.perform_async(
+          :update,
+          'Issue',
+          issue.id,
+          issue.es_id,
+          changed_fields: ['assignee_ids']
+        )
+      end
+    end
+  end
+end
--- a/ee/app/workers/elastic_indexer_worker.rb
+++ b/ee/app/workers/elastic_indexer_worker.rb
+# frozen_string_literal: true
 class ElasticIndexerWorker
  include ApplicationWorker
  include Elasticsearch::Model::Client::ClassMethods
@@ -6,7 +7,7 @@ class ElasticIndexerWorker

  ISSUE_TRACKED_FIELDS = %w(assignee_ids author_id confidential).freeze

-  def perform(operation, class_name, record_id, options = {})
+  def perform(operation, class_name, record_id, es_id, options = {})
    return true unless Gitlab::CurrentSettings.elasticsearch_indexing?

    klass = class_name.constantize
@@ -17,7 +18,7 @@ class ElasticIndexerWorker
      record.__elasticsearch__.client = client

      if klass.nested?
-        record.__elasticsearch__.__send__ "#{operation}_document", parent: record.es_parent # rubocop:disable GitlabSecurity/PublicSend
+        record.__elasticsearch__.__send__ "#{operation}_document", routing: record.es_parent # rubocop:disable GitlabSecurity/PublicSend
      else
        record.__elasticsearch__.__send__ "#{operation}_document" # rubocop:disable GitlabSecurity/PublicSend
      end
@@ -28,14 +29,13 @@ class ElasticIndexerWorker
        client.delete(
          index: klass.index_name,
          type: klass.document_type,
-          id: record_id,
-          parent: options["project_id"]
+          id: es_id,
+          routing: options["es_parent"]
        )
      else
-        client.delete index: klass.index_name, type: klass.document_type, id: record_id
+        clear_project_data(record_id, es_id) if klass == Project
+        client.delete index: klass.index_name, type: klass.document_type, id: es_id
      end
-
-      clear_project_data(record_id) if klass == Project
    end
  rescue Elasticsearch::Transport::Transport::Errors::NotFound, ActiveRecord::RecordNotFound
    # These errors can happen in several cases, including:
@@ -51,15 +51,12 @@ class ElasticIndexerWorker

  def update_issue_notes(record, changed_fields)
    if changed_fields && (changed_fields & ISSUE_TRACKED_FIELDS).any?
-      Note.import_with_parent query: -> { where(noteable: record) }
+      Note.es_import query: -> { where(noteable: record) }
    end
  end

-  def clear_project_data(record_id)
-    remove_children_documents(Repository.document_type, record_id)
-    remove_children_documents(ProjectWiki.document_type, record_id)
-    remove_children_documents(MergeRequest.document_type, record_id)
-    remove_documents_by_project_id(record_id)
+  def clear_project_data(record_id, es_id)
+    remove_children_documents('project', record_id, es_id)
  end

  def remove_documents_by_project_id(record_id)
@@ -73,14 +70,17 @@ class ElasticIndexerWorker
    })
  end

-  def remove_children_documents(document_type, parent_record_id)
+  def remove_children_documents(parent_type, parent_record_id, parent_es_id)
    client.delete_by_query({
      index: Project.__elasticsearch__.index_name,
+      routing: parent_es_id,
      body: {
        query: {
-          parent_id: {
-            type: document_type,
-            id: parent_record_id
+          has_parent: {
+            parent_type: parent_type,
+            query: {
+              term: { id: parent_record_id }
+            }
          }
        }
      }

--- a/ee/changelogs/unreleased/elasticsearch_v6.yml
+++ b/ee/changelogs/unreleased/elasticsearch_v6.yml
+---
+title: elasticsearch 6 support - migrate from parent/child relationships to join
+merge_request: 7618
+author:
+type: added
--- a/ee/lib/elasticsearch/git/model.rb
+++ b/ee/lib/elasticsearch/git/model.rb
@@ -16,6 +16,8 @@ module Elasticsearch

        index_name [self.name.downcase, 'index', env].compact.join('-')

+        document_type 'doc'
+
        settings \
          index: {
          analysis: {
@@ -56,7 +58,7 @@ module Elasticsearch
            filter: {
              code: {
                type: "pattern_capture",
-                preserve_original: 1,
+                preserve_original: true,
                patterns: [
                  "(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
                  "(\\d+)",

--- a/ee/lib/elasticsearch/git/repository.rb
+++ b/ee/lib/elasticsearch/git/repository.rb
@@ -12,53 +12,12 @@ module Elasticsearch
        include Elasticsearch::Git::Model
        include Elasticsearch::Git::EncoderHelper

-        mapping _parent: { type: 'project' } do
-          indexes :blob do
-            indexes :id,          type: :text,
-                                  index_options: 'offsets',
-                                  analyzer: :sha_analyzer
-            indexes :rid,         type: :keyword
-            indexes :oid,         type: :text,
-                                  index_options: 'offsets',
-                                  analyzer: :sha_analyzer
-            indexes :commit_sha,  type: :text,
-                                  index_options: 'offsets',
-                                  analyzer: :sha_analyzer
-            indexes :path,        type: :text,
-                                  analyzer: :path_analyzer
-            indexes :file_name,   type: :text,
-                                  analyzer: :code_analyzer,
-                                  search_analyzer: :code_search_analyzer
-            indexes :content,     type: :text,
-                                  index_options: 'offsets',
-                                  analyzer: :code_analyzer,
-                                  search_analyzer: :code_search_analyzer
-            indexes :language,    type: :keyword
-          end
-
-          indexes :commit do
-            indexes :id,          type: :text,
-                                  index_options: 'offsets',
-                                  analyzer: :sha_analyzer
-            indexes :rid,         type: :keyword
-            indexes :sha,         type: :text,
-                                  index_options: 'offsets',
-                                  analyzer: :sha_analyzer
-
-            indexes :author do
-              indexes :name,      type: :text, index_options: 'offsets'
-              indexes :email,     type: :text, index_options: 'offsets'
-              indexes :time,      type: :date, format: :basic_date_time_no_millis
-            end
-
-            indexes :commiter do
-              indexes :name,      type: :text, index_options: 'offsets'
-              indexes :email,     type: :text, index_options: 'offsets'
-              indexes :time,      type: :date, format: :basic_date_time_no_millis
-            end
+        def es_parent
+          "project_#{project_id}"
+        end

-            indexes :message,     type: :text, index_options: 'offsets'
-          end
+        def es_type
+          'blob'
        end

        # Indexing all text-like blobs in repository
@@ -118,9 +77,9 @@ module Elasticsearch
          {
            delete: {
              _index: "#{self.class.index_name}",
-              _type: self.class.name.underscore,
+              _type: 'doc',
              _id: "#{repository_id}_#{blob.path}",
-              _parent: project_id
+              routing: es_parent
            }
          }
        end
@@ -131,12 +90,11 @@ module Elasticsearch
          {
            index:  {
              _index: "#{self.class.index_name}",
-              _type: self.class.name.underscore,
+              _type: 'doc',
              _id: "#{repository_id}_#{blob.path}",
-              _parent: project_id,
+              routing: es_parent,
              data: {
                blob: {
-                  type: "blob",
                  oid: blob.id,
                  rid: repository_id,
                  content: blob.data,
@@ -150,10 +108,15 @@ module Elasticsearch
                  # install newest versions
                  # https://github.com/elastic/elasticsearch-mapper-attachments/issues/124
                  file_name: blob.path,
-
                  # Linguist is not available in the Ruby indexer. The Go indexer can
                  # fill in the right language.
                  language: nil
+                },
+                type: es_type,
+                join_field: {
+                  'name' => es_type,
+                  'parent' => es_parent
+
                }
              }
            }
@@ -217,12 +180,11 @@ module Elasticsearch
          {
            index:  {
              _index: "#{self.class.index_name}",
-              _type: self.class.name.underscore,
+              _type: 'doc',
              _id: "#{repository_id}_#{commit.oid}",
-              _parent: project_id,
+              routing: es_parent,
              data: {
                commit: {
-                  type: "commit",
                  rid: repository_id,
                  sha: commit.oid,
                  author: {
@@ -236,6 +198,11 @@ module Elasticsearch
                    time: committer[:time].strftime('%Y%m%dT%H%M%S%z')
                  },
                  message: encode!(commit.message)
+                },
+                type: 'commit',
+                join_field: {
+                  'name' => 'commit',
+                  'parent' => es_parent
                }
              }
            }
@@ -288,7 +255,6 @@ module Elasticsearch
              if b.text?
                result.push(
                  {
-                    type: 'blob',
                    id: "#{target_sha}_#{b.path}",
                    rid: repository_id,
                    oid: b.id,
@@ -312,7 +278,6 @@ module Elasticsearch
            if b.text?
              result.push(
                {
-                  type: 'blob',
                  id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}",
                  rid: repository_id,
                  oid: b.id,
@@ -338,7 +303,6 @@ module Elasticsearch
            if obj.type == :commit
              res.push(
                {
-                  type: 'commit',
                  sha: obj.oid,
                  author: obj.author,
                  committer: obj.committer,
@@ -410,10 +374,11 @@ module Elasticsearch
          when :all
            results[:blobs] = search_blob(query, page: page, per: per, options: options)
            results[:commits] = search_commit(query, page: page, per: per, options: options)
-          when :blob
-            results[:blobs] = search_blob(query, page: page, per: per, options: options)
+            results[:wiki_blobs] = search_blob(query, type: :wiki_blob, page: page, per: per, options: options)
          when :commit
            results[:commits] = search_commit(query, page: page, per: per, options: options)
+          when :blob, :wiki_blob
+            results[type.to_s.pluralize.to_sym] = search_blob(query, type: type, page: page, per: per, options: options)
          end

          results
@@ -434,7 +399,7 @@ module Elasticsearch
                    default_operator: :and
                  }
                },
-                filter: [{ term: { 'commit.type' => 'commit' } }]
+                filter: [{ term: { 'type' => 'commit' } }]
              }
            },
            size: per,
@@ -481,7 +446,7 @@ module Elasticsearch
          }
        end

-        def search_blob(query, type: :all, page: 1, per: 20, options: {})
+        def search_blob(query, type: :blob, page: 1, per: 20, options: {})
          page ||= 1

          query = ::Gitlab::Search::Query.new(query) do
@@ -500,7 +465,9 @@ module Elasticsearch
                    fields: %w[blob.content blob.file_name]
                  }
                },
-                filter: [{ term: { 'blob.type' => 'blob' } }]
+                filter: [
+                  { term: { type: type } }
+                ]
              }
            },
            size: per,

--- a/ee/lib/gem_extensions/elasticsearch/model/adapter/multiple/records.rb
+++ b/ee/lib/gem_extensions/elasticsearch/model/adapter/multiple/records.rb
+# frozen_string_literal: true
+module GemExtensions
+  module Elasticsearch
+    module Model
+      module Adapter
+        module Multiple
+          # We need to change the ID used to recover items from the database.
+          # Originally elasticsearch-model uses `_id`, but we need to use the `id` field
+          module Records
+            def records
+              records_by_type = __records_by_type
+
+              records = response.response["hits"]["hits"].map do |hit|
+                records_by_type[__type_for_hit(hit)][hit[:_source][:id].to_s]
+              end
+
+              records.compact
+            end
+
+            def __type_for_hit(hit)
+              @@__types ||= {} # rubocop:disable Style/ClassVars
+
+              @@__types[ "#{hit[:_index]}::#{hit[:_source][:type]}" ] ||= begin
+                ::Elasticsearch::Model::Registry.all.detect do |model|
+                  model.index_name == hit[:_index] && model.es_type == hit[:_source][:type]
+                end
+              end
+            end
+
+            def __ids_by_type
+              ids_by_type = {}
+
+              response.response["hits"]["hits"].each do |hit|
+                type = __type_for_hit(hit)
+                ids_by_type[type] ||= []
+                ids_by_type[type] << hit[:_source][:id]
+              end
+              ids_by_type
+            end
+          end
+        end
+      end
+    end
+  end
+end
--- a/ee/lib/gem_extensions/elasticsearch/model/indexing/instance_methods.rb
+++ b/ee/lib/gem_extensions/elasticsearch/model/indexing/instance_methods.rb
+# frozen_string_literal: true
+module GemExtensions
+  module Elasticsearch
+    module Model
+      module Indexing
+        # We need `_id` to be the model's `#es_id` in all indexing/editing operations
+        module InstanceMethods
+          def index_document(options = {})
+            document = self.as_indexed_json
+
+            client.index(
+              { index: index_name,
+                type:  document_type,
+                id:    self.es_id,
+                body:  document }.merge(options)
+            )
+          end
+
+          def delete_document(options = {})
+            client.delete(
+              { index: index_name,
+                type:  document_type,
+                id:    self.es_id }.merge(options)
+            )
+          end
+
+          def update_document(options = {})
+            if attributes_in_database = self.instance_variable_get(:@__changed_model_attributes).presence
+              attributes = if respond_to?(:as_indexed_json)
+                             self.as_indexed_json.select { |k, _v| attributes_in_database.keys.map(&:to_s).include? k.to_s }
+                           else
+                             attributes_in_database
+                           end
+
+              client.update(
+                { index: index_name,
+                  type:  document_type,
+                  id:    self.es_id,
+                  body:  { doc: attributes } }.merge(options)
+              )
+            else
+              index_document(options)
+            end
+          end
+
+          def update_document_attributes(attributes, options = {})
+            client.update(
+              { index: index_name,
+                type:  document_type,
+                id:    self.es_id,
+                body:  { doc: attributes } }.merge(options)
+            )
+          end
+        end
+      end
+    end
+  end
+end
--- a/ee/lib/gem_extensions/elasticsearch/model/response/records.rb
+++ b/ee/lib/gem_extensions/elasticsearch/model/response/records.rb
+# frozen_string_literal: true
+module GemExtensions
+  module Elasticsearch
+    module Model
+      module Response
+        # We need to change the ID used to recover items from the database.
+        # Originally elasticsearch-model uses `_id`, but we need to use the `id` field
+        module Records
+          def ids
+            response.response['hits']['hits'].map { |hit| hit['_source']['id'] }
+          end
+        end
+      end
+    end
+  end
+end
--- a/ee/lib/gitlab/elastic/helper.rb
+++ b/ee/lib/gitlab/elastic/helper.rb
@@ -18,11 +18,16 @@ module Gitlab
          Repository
        ].each do |klass|
          settings.deep_merge!(klass.settings.to_hash)
-          mappings.merge!(klass.mappings.to_hash)
+          mappings.deep_merge!(klass.mappings.to_hash)
        end

        client = Project.__elasticsearch__.client

+        # ES5.6 needs a setting enabled to support JOIN datatypes that ES6 does not support...
+        if Gitlab::VersionInfo.parse(client.info['version']['number']) < Gitlab::VersionInfo.new(6)
+          settings['index.mapping.single_type'] = true
+        end
+
        if client.indices.exists? index: index_name
          client.indices.delete index: index_name
        end

--- a/ee/lib/gitlab/elastic/project_search_results.rb
+++ b/ee/lib/gitlab/elastic/project_search_results.rb
@@ -75,9 +75,9 @@ module Gitlab
        if project.wiki_enabled? && !project.wiki.empty? && query.present?
          project.wiki.search(
            query,
-            type: :blob,
+            type: :wiki_blob,
            options: { highlight: true }
-          )[:blobs][:results].response
+          )[:wiki_blobs][:results].response
        else
          Kaminari.paginate_array([])
        end

--- a/ee/lib/gitlab/elastic/search_results.rb
+++ b/ee/lib/gitlab/elastic/search_results.rb
@@ -172,9 +172,9 @@ module Gitlab

          ProjectWiki.search(
            query,
-            type: :blob,
+            type: :wiki_blob,
            options: opt.merge({ highlight: true })
-          )[:blobs][:results].response
+          )[:wiki_blobs][:results].response
        end
      end


--- a/ee/lib/tasks/gitlab/elastic.rake
+++ b/ee/lib/tasks/gitlab/elastic.rake
@@ -74,13 +74,10 @@ namespace :gitlab do

        klass = Kernel.const_get(klass_name)

-        case klass_name
-        when 'Note'
-          Note.searchable.import_with_parent
-        when 'Project', 'Snippet'
-          klass.import
+        if klass_name == 'Note'
+          Note.searchable.es_import
        else
-          klass.import_with_parent
+          klass.es_import
        end

        logger.info("Indexing #{klass_name.pluralize}... " + "done".color(:green))

--- a/ee/spec/helpers/search_helper_spec.rb
+++ b/ee/spec/helpers/search_helper_spec.rb
@@ -51,7 +51,7 @@ describe SearchHelper do
      Gitlab::Elastic::Helper.refresh_index
      @project_2.destroy

-      blob = { _parent: @project_2.id }
+      blob = { _source: { join_field: { parent: @project_2.es_id } } }.as_json

      result = find_project_for_result_blob(blob)


--- a/ee/spec/lib/gitlab/elastic/search_results_spec.rb
+++ b/ee/spec/lib/gitlab/elastic/search_results_spec.rb
@@ -779,14 +779,14 @@ describe Gitlab::Elastic::SearchResults do
        results = described_class.new(user, 'term', limit_project_ids)
        blobs = results.objects('wiki_blobs')

-        expect(blobs.map {|blob| blob._parent.to_i }).to match_array [internal_project.id, private_project2.id, public_project.id]
+        expect(blobs.map { |blob| blob.join_field.parent }).to match_array [internal_project.es_id, private_project2.es_id, public_project.es_id]
        expect(results.wiki_blobs_count).to eq 3

        # Unauthenticated search
        results = described_class.new(nil, 'term', [])
        blobs = results.objects('wiki_blobs')

-        expect(blobs.first._parent.to_i).to eq public_project.id
+        expect(blobs.first.join_field.parent).to eq public_project.es_id
        expect(results.wiki_blobs_count).to eq 1
      end
    end
@@ -843,14 +843,14 @@ describe Gitlab::Elastic::SearchResults do
        results = described_class.new(user, 'tesla', limit_project_ids)
        blobs = results.objects('blobs')

-        expect(blobs.map { |blob| blob._parent.to_i }).to match_array [internal_project.id, private_project2.id, public_project.id]
+        expect(blobs.map { |blob| blob.join_field.parent }).to match_array [internal_project.es_id, private_project2.es_id, public_project.es_id]
        expect(results.blobs_count).to eq 3

        # Unauthenticated search
        results = described_class.new(nil, 'tesla', [])
        blobs = results.objects('blobs')

-        expect(blobs.first._parent.to_i).to eq public_project.id.to_i
+        expect(blobs.first.join_field.parent).to eq public_project.es_id
        expect(results.blobs_count).to eq 1
      end
    end

--- a/ee/spec/models/concerns/elastic/issue_spec.rb
+++ b/ee/spec/models/concerns/elastic/issue_spec.rb
@@ -33,6 +33,13 @@ describe Issue, :elastic do
    expected_hash = issue.attributes.extract!('id', 'iid', 'title', 'description', 'created_at',
                                                'updated_at', 'state', 'project_id', 'author_id',
                                                'confidential')
+                                    .merge({
+                                            'join_field' => {
+                                              'name' => issue.es_type,
+                                              'parent' => issue.es_parent
+                                            },
+                                            'type' => issue.es_type
+                                           })

    expected_hash['assignee_id'] = [assignee.id]


--- a/ee/spec/models/concerns/elastic/merge_request_spec.rb
+++ b/ee/spec/models/concerns/elastic/merge_request_spec.rb
@@ -44,7 +44,13 @@ describe MergeRequest, :elastic do
      'source_project_id',
      'target_project_id',
      'author_id'
-    )
+    ).merge({
+              'join_field' => {
+                'name' => merge_request.es_type,
+                'parent' => merge_request.es_parent
+              },
+              'type' => merge_request.es_type
+            })

    expect(merge_request.as_indexed_json).to eq(expected_hash)
  end

--- a/ee/spec/models/concerns/elastic/milestone_spec.rb
+++ b/ee/spec/models/concerns/elastic/milestone_spec.rb
@@ -35,7 +35,13 @@ describe Milestone, :elastic do
      'project_id',
      'created_at',
      'updated_at'
-    )
+    ).merge({
+      'join_field' => {
+        'name' => milestone.es_type,
+        'parent' => milestone.es_parent
+      },
+      'type' => milestone.es_type
+    })

    expect(milestone.as_indexed_json).to eq(expected_hash)
  end

--- a/ee/spec/models/concerns/elastic/note_spec.rb
+++ b/ee/spec/models/concerns/elastic/note_spec.rb
@@ -55,6 +55,8 @@ describe Note, :elastic do
      created_at
      updated_at
      issue
+      join_field
+      type
    )

    expect(note.as_indexed_json.keys).to eq(expected_hash_keys)
@@ -66,7 +68,7 @@ describe Note, :elastic do
    issue = create :issue, project: project, updated_at: 1.minute.ago

    # Only issue should be updated
-    expect(ElasticIndexerWorker).to receive(:perform_async).with(:update, 'Issue', anything, anything)
+    expect(ElasticIndexerWorker).to receive(:perform_async).with(:update, 'Issue', anything, anything, anything)
    create :note, :system, project: project, noteable: issue
  end


--- a/ee/spec/models/concerns/elastic/project_spec.rb
+++ b/ee/spec/models/concerns/elastic/project_spec.rb
@@ -57,7 +57,7 @@ describe Project, :elastic do
      'updated_at',
      'visibility_level',
      'last_activity_at'
-    )
+    ).merge({ 'join_field' => project.es_type, 'type' => project.es_type })

    expected_hash.merge!(
      project.project_feature.attributes.extract!(

--- a/ee/spec/models/concerns/elastic/project_wiki_spec.rb
+++ b/ee/spec/models/concerns/elastic/project_wiki_spec.rb
@@ -16,7 +16,7 @@ describe ProjectWiki, :elastic do
      Gitlab::Elastic::Helper.refresh_index
    end

-    expect(project.wiki.search('term1', type: :blob)[:blobs][:total_count]).to eq(1)
-    expect(project.wiki.search('term1 | term2', type: :blob)[:blobs][:total_count]).to eq(2)
+    expect(project.wiki.search('term1', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
+    expect(project.wiki.search('term1 | term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(2)
  end
 end
--- a/ee/spec/models/concerns/elastic/snippet_spec.rb
+++ b/ee/spec/models/concerns/elastic/snippet_spec.rb
@@ -124,7 +124,7 @@ describe Snippet, :elastic do
      'project_id',
      'author_id',
      'visibility_level'
-    )
+    ).merge({ 'type' => snippet.es_type })

    expect(snippet.as_indexed_json).to eq(expected_hash)
  end

--- a/ee/spec/workers/elastic_indexer_worker_spec.rb
+++ b/ee/spec/workers/elastic_indexer_worker_spec.rb