Commit 2b9ca1d1 authored by Mark Chao's avatar Mark Chao

Always index with last_commit if exists

This ensures index in sync when indexing gap occurs,
see https://gitlab.com/gitlab-org/gitlab-ee/issues/5299
parent 97c18a3d
...@@ -28,9 +28,7 @@ class ElasticBatchProjectIndexerWorker ...@@ -28,9 +28,7 @@ class ElasticBatchProjectIndexerWorker
logger.info "Indexing #{project.full_name} (ID=#{project.id})..." logger.info "Indexing #{project.full_name} (ID=#{project.id})..."
# Get the last commit if we're updating indexed projects - otherwise we'll want to index everything Gitlab::Elastic::Indexer.new(project).run
last_commit = project.index_status.try(:last_commit) if update_index
Gitlab::Elastic::Indexer.new(project).run(last_commit)
logger.info "Indexing #{project.full_name} (ID=#{project.id}) is done!" logger.info "Indexing #{project.full_name} (ID=#{project.id}) is done!"
rescue => err rescue => err
......
...@@ -12,6 +12,6 @@ class ElasticCommitIndexerWorker ...@@ -12,6 +12,6 @@ class ElasticCommitIndexerWorker
return true unless project.use_elasticsearch? return true unless project.use_elasticsearch?
Gitlab::Elastic::Indexer.new(project).run(oldrev, newrev) Gitlab::Elastic::Indexer.new(project).run(newrev)
end end
end end
...@@ -38,12 +38,9 @@ module Gitlab ...@@ -38,12 +38,9 @@ module Gitlab
@index_status = project.index_status @index_status = project.index_status
end end
def run(from_sha = nil, to_sha = nil) def run(to_sha = nil)
to_sha = nil if to_sha == Gitlab::Git::BLANK_SHA to_sha = nil if to_sha == Gitlab::Git::BLANK_SHA
# Project has not been indexed before, therefore index from beginning
from_sha = nil unless @index_status
head_commit = repository.try(:commit) head_commit = repository.try(:commit)
if repository.nil? || !repository.exists? || repository.empty? || head_commit.nil? if repository.nil? || !repository.exists? || repository.empty? || head_commit.nil?
...@@ -51,7 +48,7 @@ module Gitlab ...@@ -51,7 +48,7 @@ module Gitlab
return return
end end
run_indexer!(from_sha, to_sha) run_indexer!(to_sha)
update_index_status(to_sha) update_index_status(to_sha)
true true
...@@ -77,7 +74,7 @@ module Gitlab ...@@ -77,7 +74,7 @@ module Gitlab
end end
end end
def run_indexer!(from_sha, to_sha) def run_indexer!(to_sha)
command = [path_to_indexer, project.id.to_s, repository_path] command = [path_to_indexer, project.id.to_s, repository_path]
vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha) vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha)
...@@ -87,6 +84,20 @@ module Gitlab ...@@ -87,6 +84,20 @@ module Gitlab
raise Error, output unless status&.zero? raise Error, output unless status&.zero?
end end
def last_commit
@index_status&.last_commit
end
def from_sha
repository_contains_last_indexed_commit? ? last_commit : Gitlab::Git::EMPTY_TREE_ID
end
def repository_contains_last_indexed_commit?
strong_memoize(:repository_contains_last_indexed_commit) do
last_commit.present? && repository.commit(last_commit).present?
end
end
def repository_path def repository_path
# Go indexer needs relative path while ruby indexer needs absolute one # Go indexer needs relative path while ruby indexer needs absolute one
if use_experimental_indexer? if use_experimental_indexer?
......
...@@ -5,12 +5,14 @@ describe Gitlab::Elastic::Indexer do ...@@ -5,12 +5,14 @@ describe Gitlab::Elastic::Indexer do
before do before do
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true') stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true')
stub_ee_application_setting(elasticsearch_url: ['http://localhost:9200']) stub_ee_application_setting(ee_application_setting) if ee_application_setting.present?
end end
let(:ee_application_setting) { { elasticsearch_url: ['http://localhost:9200'] } }
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }
let(:from_sha) { Gitlab::Git::BLANK_SHA } let(:expected_from_sha) { Gitlab::Git::EMPTY_TREE_ID }
let(:to_sha) { project.commit.try(:sha) } let(:to_commit) { project.commit }
let(:to_sha) { to_commit.try(:sha) }
let(:indexer) { described_class.new(project) } let(:indexer) { described_class.new(project) }
let(:popen_success) { [[''], 0] } let(:popen_success) { [[''], 0] }
...@@ -53,18 +55,50 @@ describe Gitlab::Elastic::Indexer do ...@@ -53,18 +55,50 @@ describe Gitlab::Elastic::Indexer do
hash_including( hash_including(
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json, 'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env, 'RAILS_ENV' => Rails.env,
'FROM_SHA' => from_sha, 'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha 'TO_SHA' => to_sha
) )
).and_return(popen_success) ).and_return(popen_success)
indexer.run(from_sha, to_sha) indexer.run(to_sha)
end
context 'when IndexStatus exists' do
context 'when last_commit exists' do
let(:last_commit) { to_commit.parent_ids.first }
before do
project.create_index_status!(last_commit: last_commit)
end
it 'uses last_commit as from_sha' do
expect_popen.and_return(popen_success)
indexer.run(to_sha)
expect_index_status(to_sha)
end
end
context 'when last_commit no longer exists' do
before do
project.create_index_status!(last_commit: '51749675fc22nononononononono343bd54a3c95')
end
it 'use nil as from_sha' do
expect_popen.and_return(popen_success)
indexer.run(to_sha)
expect_index_status(to_sha)
end
end
end end
it 'updates the index status when the indexing is a success' do it 'updates the index status when the indexing is a success' do
expect_popen.and_return(popen_success) expect_popen.and_return(popen_success)
indexer.run(from_sha, to_sha) indexer.run(to_sha)
expect_index_status(to_sha) expect_index_status(to_sha)
end end
...@@ -72,7 +106,7 @@ describe Gitlab::Elastic::Indexer do ...@@ -72,7 +106,7 @@ describe Gitlab::Elastic::Indexer do
it 'leaves the index status untouched when indexing a non-HEAD commit' do it 'leaves the index status untouched when indexing a non-HEAD commit' do
expect_popen.and_return(popen_success) expect_popen.and_return(popen_success)
indexer.run(from_sha, project.repository.commit('HEAD~1')) indexer.run(project.repository.commit('HEAD~1'))
expect(project.index_status).to be_nil expect(project.index_status).to be_nil
end end
...@@ -125,12 +159,82 @@ describe Gitlab::Elastic::Indexer do ...@@ -125,12 +159,82 @@ describe Gitlab::Elastic::Indexer do
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json, 'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json, 'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env, 'RAILS_ENV' => Rails.env,
'FROM_SHA' => from_sha, 'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha 'TO_SHA' => to_sha
) )
).and_return(popen_success) ).and_return(popen_success)
indexer.run(from_sha, to_sha) indexer.run(to_sha)
end
end
end
context 'reverting a change', :elastic do
let(:user) { project.owner }
let!(:initial_commit) { project.repository.commit('master').sha }
let(:ee_application_setting) { nil }
def change_repository_and_index(project, &blk)
yield blk
current_commit = project.repository.commit('master').sha
described_class.new(project).run(current_commit)
Gitlab::Elastic::Helper.refresh_index
end
def indexed_file_paths_for(term)
blobs = Repository.search(
term,
type: :blob
)[:blobs][:results].response
blobs.map do |blob|
blob['_source']['blob']['path']
end
end
context 'when IndexStatus#last_commit is no longer in repository', :pending do
before do
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
project.index_status.update(last_commit: 'ABCDABCDABCD')
end
it 'reindexes from scratch if IndexStatus#last_commit is no longer in repository' do
change_repository_and_index(project) do
project.repository.write_ref('master', initial_commit)
end
expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end
end
context 'when branch is reset to an earlier commit' do
before do
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
end
it 'reverses already indexed commits' do
change_repository_and_index(project) do
project.repository.write_ref('master', initial_commit)
end
expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end end
end end
end end
......
...@@ -43,7 +43,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -43,7 +43,7 @@ describe ElasticBatchProjectIndexerWorker do
expect_index(projects.first, false).and_call_original expect_index(projects.first, false).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(nil) expect(indexer).to receive(:run)
end end
expect { worker.perform(projects.first.id, projects.first.id) } expect { worker.perform(projects.first.id, projects.first.id) }
...@@ -56,7 +56,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -56,7 +56,7 @@ describe ElasticBatchProjectIndexerWorker do
expect_index(projects.first, false).and_call_original expect_index(projects.first, false).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(nil) expect(indexer).to receive(:run)
end end
worker.perform(projects.first.id, projects.first.id) worker.perform(projects.first.id, projects.first.id)
...@@ -75,7 +75,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -75,7 +75,7 @@ describe ElasticBatchProjectIndexerWorker do
projects.first.index_status.update!(last_commit: 'foo') projects.first.index_status.update!(last_commit: 'foo')
expect_index(projects.first, true).and_call_original expect_index(projects.first, true).and_call_original
expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run).with('foo') expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run)
worker.perform(projects.first.id, projects.first.id, true) worker.perform(projects.first.id, projects.first.id, true)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment