Commit 65dac44a authored by Mark Chao's avatar Mark Chao

Extract record indexing as a service

Separation of concern
parent 76dadf3d
# frozen_string_literal: true
module Elastic
class IndexRecordService
include Elasticsearch::Model::Client::ClassMethods
ISSUE_TRACKED_FIELDS = %w(assignee_ids author_id confidential).freeze
# @param indexing [Boolean] determines whether operation is "indexing" or "updating"
def execute(record, indexing, options = {})
record.__elasticsearch__.client = client
import(record, record.class.nested?, indexing)
initial_index_project(record) if record.class == Project && indexing
update_issue_notes(record, options["changed_fields"]) if record.class == Issue
rescue Elasticsearch::Transport::Transport::Errors::NotFound, ActiveRecord::RecordNotFound
# These errors can happen in several cases, including:
# - A record is updated, then removed before the update is handled
# - Indexing is enabled, but not every item has been indexed yet - updating
# and deleting the un-indexed records will raise exception
#
# We can ignore these.
true
end
private
def update_issue_notes(record, changed_fields)
if changed_fields && (changed_fields & ISSUE_TRACKED_FIELDS).any?
Note.es_import query: -> { where(noteable: record) }
end
end
def initial_index_project(project)
project.each_indexed_association do |klass, objects|
nested = klass.nested?
objects.find_each { |object| import(object, nested, true) }
end
# Finally, index blobs/commits/wikis
ElasticCommitIndexerWorker.perform_async(project.id)
end
def import(record, nested, indexing)
operation = indexing ? 'index_document' : 'update_document'
if nested
record.__elasticsearch__.__send__ operation, routing: record.es_parent # rubocop:disable GitlabSecurity/PublicSend
else
record.__elasticsearch__.__send__ operation # rubocop:disable GitlabSecurity/PublicSend
end
end
end
end
...@@ -13,7 +13,7 @@ class ElasticFullIndexWorker ...@@ -13,7 +13,7 @@ class ElasticFullIndexWorker
return true unless Gitlab::CurrentSettings.elasticsearch_indexing? return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
Project.id_in(start_id..end_id).find_each do |project| Project.id_in(start_id..end_id).find_each do |project|
ElasticIndexerWorker.new.index(:index, project) Elastic::IndexRecordService.new.execute(project, true)
end end
end end
end end
...@@ -5,8 +5,6 @@ class ElasticIndexerWorker ...@@ -5,8 +5,6 @@ class ElasticIndexerWorker
sidekiq_options retry: 2 sidekiq_options retry: 2
ISSUE_TRACKED_FIELDS = %w(assignee_ids author_id confidential).freeze
def perform(operation, class_name, record_id, es_id, options = {}) def perform(operation, class_name, record_id, es_id, options = {})
return true unless Gitlab::CurrentSettings.elasticsearch_indexing? return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
...@@ -14,7 +12,11 @@ class ElasticIndexerWorker ...@@ -14,7 +12,11 @@ class ElasticIndexerWorker
case operation.to_s case operation.to_s
when /index|update/ when /index|update/
index(operation, klass.find(record_id), options) Elastic::IndexRecordService.new.execute(
klass.find(record_id),
operation.to_s.match?(/index/),
options
)
when /delete/ when /delete/
if klass.nested? if klass.nested?
client.delete( client.delete(
...@@ -38,46 +40,13 @@ class ElasticIndexerWorker ...@@ -38,46 +40,13 @@ class ElasticIndexerWorker
true true
end end
def index(operation, record, options = {})
record.__elasticsearch__.client = client
import(operation, record, record.class)
initial_index_project(record) if record.class == Project && operation.to_s.match?(/index/)
update_issue_notes(record, options["changed_fields"]) if record.class == Issue
end
private private
def update_issue_notes(record, changed_fields)
if changed_fields && (changed_fields & ISSUE_TRACKED_FIELDS).any?
Note.es_import query: -> { where(noteable: record) }
end
end
def clear_project_data(record_id, es_id) def clear_project_data(record_id, es_id)
remove_children_documents('project', record_id, es_id) remove_children_documents('project', record_id, es_id)
IndexStatus.for_project(record_id).delete_all IndexStatus.for_project(record_id).delete_all
end end
def initial_index_project(project)
project.each_indexed_association do |klass, objects|
objects.find_each { |object| import(:index, object, klass) }
end
# Finally, index blobs/commits/wikis
ElasticCommitIndexerWorker.perform_async(project.id)
end
def import(operation, record, klass)
if klass.nested?
record.__elasticsearch__.__send__ "#{operation}_document", routing: record.es_parent # rubocop:disable GitlabSecurity/PublicSend
else
record.__elasticsearch__.__send__ "#{operation}_document" # rubocop:disable GitlabSecurity/PublicSend
end
end
def remove_documents_by_project_id(record_id) def remove_documents_by_project_id(record_id)
client.delete_by_query({ client.delete_by_query({
index: Project.__elasticsearch__.index_name, index: Project.__elasticsearch__.index_name,
......
require 'spec_helper'
describe Elastic::IndexRecordService, :elastic do
subject { described_class.new }
before do
stub_ee_application_setting(elasticsearch_indexing: true)
Elasticsearch::Model.client =
Gitlab::Elastic::Client.build(Gitlab::CurrentSettings.elasticsearch_config)
end
describe 'Indexing, updating, and deleting records' do
using RSpec::Parameterized::TableSyntax
where(:type, :name, :attribute) do
:project | "Project" | :name
:issue | "Issue" | :title
:note | "Note" | :note
:milestone | "Milestone" | :title
:merge_request | "MergeRequest" | :title
end
with_them do
it 'indexes new records' do
object = nil
Sidekiq::Testing.disable! do
object = create(type)
end
expect do
subject.execute(object, true)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
end
it 'updates the index when object is changed' do
object = nil
Sidekiq::Testing.disable! do
object = create(type)
subject.execute(object, true)
object.update(attribute => "new")
end
expect do
subject.execute(object, false)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end
end
end
it 'indexes all nested objects for a Project' do
# To be able to access it outside the following block
project = nil
Sidekiq::Testing.disable! do
project = create :project, :repository
create :issue, project: project
create :milestone, project: project
create :note, project: project
create :merge_request, target_project: project, source_project: project
create :project_snippet, project: project
end
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id).and_call_original
# Nothing should be in the index at this point
expect(Elasticsearch::Model.search('*').total_count).to be(0)
Sidekiq::Testing.inline! do
subject.execute(project, true)
end
Gitlab::Elastic::Helper.refresh_index
## All database objects + data from repository. The absolute value does not matter
expect(Elasticsearch::Model.search('*').total_count).to be > 40
end
it 'indexes changes during indexing gap' do
project = nil
note = nil
Sidekiq::Testing.inline! do
project = create :project, :repository
note = create :note, project: project, note: 'note_1'
Gitlab::Elastic::Helper.refresh_index
end
options = { project_ids: [project.id] }
Sidekiq::Testing.disable! do
note.update_columns(note: 'note_2')
create :note, project: project, note: 'note_3'
end
expect(Note.elastic_search('note_1', options: options).present?).to eq(true)
expect(Note.elastic_search('note_2', options: options).present?).to eq(false)
expect(Note.elastic_search('note_3', options: options).present?).to eq(false)
Sidekiq::Testing.inline! do
subject.execute(project, true)
Gitlab::Elastic::Helper.refresh_index
end
expect(Note.elastic_search('note_1', options: options).present?).to eq(false)
expect(Note.elastic_search('note_2', options: options).present?).to eq(true)
expect(Note.elastic_search('note_3', options: options).present?).to eq(true)
end
end
...@@ -30,31 +30,14 @@ describe ElasticIndexerWorker, :elastic do ...@@ -30,31 +30,14 @@ describe ElasticIndexerWorker, :elastic do
end end
with_them do with_them do
it 'indexes new records' do it 'calls record indexing' do
object = nil object = create(type)
Sidekiq::Testing.disable! do
object = create(type)
end
expect do
subject.perform("index", name, object.id, object.es_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('*').records.size }.by(1)
end
it 'updates the index when object is changed' do expect_next_instance_of(Elastic::IndexRecordService) do |service|
object = nil expect(service).to receive(:execute).with(object, true, {})
Sidekiq::Testing.disable! do
object = create(type)
subject.perform("index", name, object.id, object.es_id)
object.update(attribute => "new")
end end
expect do subject.perform("index", name, object.id, object.es_id)
subject.perform("update", name, object.id, object.es_id)
Gitlab::Elastic::Helper.refresh_index
end.to change { Elasticsearch::Model.search('new').records.size }.by(1)
end end
it 'deletes from index when an object is deleted' do it 'deletes from index when an object is deleted' do
...@@ -106,62 +89,4 @@ describe ElasticIndexerWorker, :elastic do ...@@ -106,62 +89,4 @@ describe ElasticIndexerWorker, :elastic do
expect(Elasticsearch::Model.search('*').total_count).to be(0) expect(Elasticsearch::Model.search('*').total_count).to be(0)
end end
it 'indexes all nested objects for a Project' do
# To be able to access it outside the following block
project = nil
Sidekiq::Testing.disable! do
project = create :project, :repository
create :issue, project: project
create :milestone, project: project
create :note, project: project
create :merge_request, target_project: project, source_project: project
create :project_snippet, project: project
end
expect(ElasticCommitIndexerWorker).to receive(:perform_async).with(project.id).and_call_original
# Nothing should be in the index at this point
expect(Elasticsearch::Model.search('*').total_count).to be(0)
Sidekiq::Testing.inline! do
subject.perform("index", "Project", project.id, project.es_id)
end
Gitlab::Elastic::Helper.refresh_index
## All database objects + data from repository. The absolute value does not matter
expect(Elasticsearch::Model.search('*').total_count).to be > 40
end
it 'indexes changes during indexing gap' do
project = nil
note = nil
Sidekiq::Testing.inline! do
project = create :project, :repository
note = create :note, project: project, note: 'note_1'
Gitlab::Elastic::Helper.refresh_index
end
options = { project_ids: [project.id] }
Sidekiq::Testing.disable! do
note.update_columns(note: 'note_2')
create :note, project: project, note: 'note_3'
end
expect(Note.elastic_search('note_1', options: options).present?).to eq(true)
expect(Note.elastic_search('note_2', options: options).present?).to eq(false)
expect(Note.elastic_search('note_3', options: options).present?).to eq(false)
Sidekiq::Testing.inline! do
subject.perform("index", "Project", project.id, project.es_id)
Gitlab::Elastic::Helper.refresh_index
end
expect(Note.elastic_search('note_1', options: options).present?).to eq(false)
expect(Note.elastic_search('note_2', options: options).present?).to eq(true)
expect(Note.elastic_search('note_3', options: options).present?).to eq(true)
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment