Commit e7f97ec1 authored by Stan Hu's avatar Stan Hu

Merge branch '8628-full-index' into 'master'

Repository indexing gaps handling

Closes #10712

See merge request gitlab-org/gitlab-ee!10273
parents cb57b547 2b9ca1d1
...@@ -28,9 +28,7 @@ class ElasticBatchProjectIndexerWorker ...@@ -28,9 +28,7 @@ class ElasticBatchProjectIndexerWorker
logger.info "Indexing #{project.full_name} (ID=#{project.id})..." logger.info "Indexing #{project.full_name} (ID=#{project.id})..."
# Get the last commit if we're updating indexed projects - otherwise we'll want to index everything Gitlab::Elastic::Indexer.new(project).run
last_commit = project.index_status.try(:last_commit) if update_index
Gitlab::Elastic::Indexer.new(project).run(last_commit)
logger.info "Indexing #{project.full_name} (ID=#{project.id}) is done!" logger.info "Indexing #{project.full_name} (ID=#{project.id}) is done!"
rescue => err rescue => err
......
...@@ -12,6 +12,6 @@ class ElasticCommitIndexerWorker ...@@ -12,6 +12,6 @@ class ElasticCommitIndexerWorker
return true unless project.use_elasticsearch? return true unless project.use_elasticsearch?
Gitlab::Elastic::Indexer.new(project).run(oldrev, newrev) Gitlab::Elastic::Indexer.new(project).run(newrev)
end end
end end
...@@ -33,9 +33,12 @@ module Gitlab ...@@ -33,9 +33,12 @@ module Gitlab
storage: project.repository_storage storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage)).to_json }.merge(Gitlab::GitalyClient.connection_data(project.repository_storage)).to_json
end end
# Use the eager-loaded association if available.
@index_status = project.index_status
end end
def run(from_sha = nil, to_sha = nil) def run(to_sha = nil)
to_sha = nil if to_sha == Gitlab::Git::BLANK_SHA to_sha = nil if to_sha == Gitlab::Git::BLANK_SHA
head_commit = repository.try(:commit) head_commit = repository.try(:commit)
...@@ -45,7 +48,7 @@ module Gitlab ...@@ -45,7 +48,7 @@ module Gitlab
return return
end end
run_indexer!(from_sha, to_sha) run_indexer!(to_sha)
update_index_status(to_sha) update_index_status(to_sha)
true true
...@@ -71,7 +74,7 @@ module Gitlab ...@@ -71,7 +74,7 @@ module Gitlab
end end
end end
def run_indexer!(from_sha, to_sha) def run_indexer!(to_sha)
command = [path_to_indexer, project.id.to_s, repository_path] command = [path_to_indexer, project.id.to_s, repository_path]
vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha) vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha)
...@@ -81,6 +84,20 @@ module Gitlab ...@@ -81,6 +84,20 @@ module Gitlab
raise Error, output unless status&.zero? raise Error, output unless status&.zero?
end end
def last_commit
@index_status&.last_commit
end
def from_sha
repository_contains_last_indexed_commit? ? last_commit : Gitlab::Git::EMPTY_TREE_ID
end
def repository_contains_last_indexed_commit?
strong_memoize(:repository_contains_last_indexed_commit) do
last_commit.present? && repository.commit(last_commit).present?
end
end
def repository_path def repository_path
# Go indexer needs relative path while ruby indexer needs absolute one # Go indexer needs relative path while ruby indexer needs absolute one
if use_experimental_indexer? if use_experimental_indexer?
...@@ -94,11 +111,9 @@ module Gitlab ...@@ -94,11 +111,9 @@ module Gitlab
def update_index_status(to_sha) def update_index_status(to_sha)
head_commit = repository.try(:commit) head_commit = repository.try(:commit)
# Use the eager-loaded association if available. An index_status should # An index_status should always be created,
# always be created, even if the repository is empty, so we know it's # even if the repository is empty, so we know it's been looked at.
# been looked at. @index_status ||=
index_status = project.index_status
index_status ||=
begin begin
IndexStatus.find_or_create_by(project_id: project.id) IndexStatus.find_or_create_by(project_id: project.id)
rescue ActiveRecord::RecordNotUnique rescue ActiveRecord::RecordNotUnique
...@@ -110,7 +125,7 @@ module Gitlab ...@@ -110,7 +125,7 @@ module Gitlab
sha = head_commit.try(:sha) sha = head_commit.try(:sha)
sha ||= Gitlab::Git::BLANK_SHA sha ||= Gitlab::Git::BLANK_SHA
index_status.update(last_commit: sha, indexed_at: Time.now) @index_status.update(last_commit: sha, indexed_at: Time.now)
project.reload_index_status project.reload_index_status
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
...@@ -5,13 +5,15 @@ describe Gitlab::Elastic::Indexer do ...@@ -5,13 +5,15 @@ describe Gitlab::Elastic::Indexer do
before do before do
stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true') stub_env('IN_MEMORY_APPLICATION_SETTINGS', 'true')
stub_ee_application_setting(elasticsearch_url: ['http://localhost:9200']) stub_ee_application_setting(ee_application_setting) if ee_application_setting.present?
end end
let(:project) { create(:project, :repository) } let(:ee_application_setting) { { elasticsearch_url: ['http://localhost:9200'] } }
let(:from_sha) { Gitlab::Git::BLANK_SHA } let(:project) { create(:project, :repository) }
let(:to_sha) { project.commit.try(:sha) } let(:expected_from_sha) { Gitlab::Git::EMPTY_TREE_ID }
let(:indexer) { described_class.new(project) } let(:to_commit) { project.commit }
let(:to_sha) { to_commit.try(:sha) }
let(:indexer) { described_class.new(project) }
let(:popen_success) { [[''], 0] } let(:popen_success) { [[''], 0] }
let(:popen_failure) { [['error'], 1] } let(:popen_failure) { [['error'], 1] }
...@@ -53,18 +55,50 @@ describe Gitlab::Elastic::Indexer do ...@@ -53,18 +55,50 @@ describe Gitlab::Elastic::Indexer do
hash_including( hash_including(
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json, 'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env, 'RAILS_ENV' => Rails.env,
'FROM_SHA' => from_sha, 'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha 'TO_SHA' => to_sha
) )
).and_return(popen_success) ).and_return(popen_success)
indexer.run(from_sha, to_sha) indexer.run(to_sha)
end
context 'when IndexStatus exists' do
context 'when last_commit exists' do
let(:last_commit) { to_commit.parent_ids.first }
before do
project.create_index_status!(last_commit: last_commit)
end
it 'uses last_commit as from_sha' do
expect_popen.and_return(popen_success)
indexer.run(to_sha)
expect_index_status(to_sha)
end
end
context 'when last_commit no longer exists' do
before do
project.create_index_status!(last_commit: '51749675fc22nononononononono343bd54a3c95')
end
it 'use nil as from_sha' do
expect_popen.and_return(popen_success)
indexer.run(to_sha)
expect_index_status(to_sha)
end
end
end end
it 'updates the index status when the indexing is a success' do it 'updates the index status when the indexing is a success' do
expect_popen.and_return(popen_success) expect_popen.and_return(popen_success)
indexer.run(from_sha, to_sha) indexer.run(to_sha)
expect_index_status(to_sha) expect_index_status(to_sha)
end end
...@@ -72,7 +106,7 @@ describe Gitlab::Elastic::Indexer do ...@@ -72,7 +106,7 @@ describe Gitlab::Elastic::Indexer do
it 'leaves the index status untouched when indexing a non-HEAD commit' do it 'leaves the index status untouched when indexing a non-HEAD commit' do
expect_popen.and_return(popen_success) expect_popen.and_return(popen_success)
indexer.run(from_sha, project.repository.commit('HEAD~1')) indexer.run(project.repository.commit('HEAD~1'))
expect(project.index_status).to be_nil expect(project.index_status).to be_nil
end end
...@@ -125,12 +159,82 @@ describe Gitlab::Elastic::Indexer do ...@@ -125,12 +159,82 @@ describe Gitlab::Elastic::Indexer do
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json, 'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json, 'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env, 'RAILS_ENV' => Rails.env,
'FROM_SHA' => from_sha, 'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha 'TO_SHA' => to_sha
) )
).and_return(popen_success) ).and_return(popen_success)
indexer.run(from_sha, to_sha) indexer.run(to_sha)
end
end
end
context 'reverting a change', :elastic do
let(:user) { project.owner }
let!(:initial_commit) { project.repository.commit('master').sha }
let(:ee_application_setting) { nil }
def change_repository_and_index(project, &blk)
yield blk
current_commit = project.repository.commit('master').sha
described_class.new(project).run(current_commit)
Gitlab::Elastic::Helper.refresh_index
end
def indexed_file_paths_for(term)
blobs = Repository.search(
term,
type: :blob
)[:blobs][:results].response
blobs.map do |blob|
blob['_source']['blob']['path']
end
end
context 'when IndexStatus#last_commit is no longer in repository', :pending do
before do
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
project.index_status.update(last_commit: 'ABCDABCDABCD')
end
it 'reindexes from scratch if IndexStatus#last_commit is no longer in repository' do
change_repository_and_index(project) do
project.repository.write_ref('master', initial_commit)
end
expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end
end
context 'when branch is reset to an earlier commit' do
before do
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
end
it 'reverses already indexed commits' do
change_repository_and_index(project) do
project.repository.write_ref('master', initial_commit)
end
expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end end
end end
end end
......
...@@ -43,7 +43,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -43,7 +43,7 @@ describe ElasticBatchProjectIndexerWorker do
expect_index(projects.first, false).and_call_original expect_index(projects.first, false).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(nil) expect(indexer).to receive(:run)
end end
expect { worker.perform(projects.first.id, projects.first.id) } expect { worker.perform(projects.first.id, projects.first.id) }
...@@ -56,7 +56,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -56,7 +56,7 @@ describe ElasticBatchProjectIndexerWorker do
expect_index(projects.first, false).and_call_original expect_index(projects.first, false).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run).with(nil) expect(indexer).to receive(:run)
end end
worker.perform(projects.first.id, projects.first.id) worker.perform(projects.first.id, projects.first.id)
...@@ -75,7 +75,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -75,7 +75,7 @@ describe ElasticBatchProjectIndexerWorker do
projects.first.index_status.update!(last_commit: 'foo') projects.first.index_status.update!(last_commit: 'foo')
expect_index(projects.first, true).and_call_original expect_index(projects.first, true).and_call_original
expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run).with('foo') expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run)
worker.perform(projects.first.id, projects.first.id, true) worker.perform(projects.first.id, projects.first.id, true)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment