Commit 89bae130 authored by Nick Thomas's avatar Nick Thomas

Merge branch '8628-remove-commit-blob-index' into 'master'

Reset blob/commit index if last_commit is no longer in repository

Closes #10712

See merge request gitlab-org/gitlab-ee!10538
parents 9610b8ff bf96377f
...@@ -10,15 +10,15 @@ class ElasticBatchProjectIndexerWorker ...@@ -10,15 +10,15 @@ class ElasticBatchProjectIndexerWorker
# necessary # necessary
sidekiq_options retry: 10 sidekiq_options retry: 10
def perform(start, finish, update_index = false) def perform(start, finish)
projects = build_relation(start, finish) projects = build_relation(start, finish)
projects.find_each { |project| run_indexer(project, update_index) } projects.find_each { |project| run_indexer(project) }
end end
private private
def run_indexer(project, update_index) def run_indexer(project)
return unless project.use_elasticsearch? return unless project.use_elasticsearch?
# Ensure we remove the hold on the project, no matter what, so ElasticCommitIndexerWorker can do its thing # Ensure we remove the hold on the project, no matter what, so ElasticCommitIndexerWorker can do its thing
......
...@@ -318,6 +318,36 @@ module Elasticsearch ...@@ -318,6 +318,36 @@ module Elasticsearch
res res
end end
def delete_index_for_commits_and_blobs
client_for_indexing.delete_by_query(
index: self.class.index_name,
routing: es_parent,
body: {
query: {
bool: {
filter: [
{
terms: {
type: %w{commit blob}
}
},
{
has_parent: {
parent_type: 'project',
query: {
term: {
id: project_id
}
}
}
}
]
}
}
}
)
end
def search(query, type: :all, page: 1, per: 20, options: {}) def search(query, type: :all, page: 1, per: 20, options: {})
options[:repository_id] = repository_id if options[:repository_id].nil? options[:repository_id] = repository_id if options[:repository_id].nil?
self.class.search(query, type: type, page: page, per: per, options: options) self.class.search(query, type: type, page: page, per: per, options: options)
......
...@@ -16,7 +16,7 @@ module Gitlab ...@@ -16,7 +16,7 @@ module Gitlab
Gitlab::Utils.which(EXPERIMENTAL_INDEXER).present? Gitlab::Utils.which(EXPERIMENTAL_INDEXER).present?
end end
attr_reader :project attr_reader :project, :index_status
def initialize(project) def initialize(project)
@project = project @project = project
...@@ -75,6 +75,10 @@ module Gitlab ...@@ -75,6 +75,10 @@ module Gitlab
end end
def run_indexer!(to_sha) def run_indexer!(to_sha)
if index_status && !repository_contains_last_indexed_commit?
project.repository.delete_index_for_commits_and_blobs
end
command = [path_to_indexer, project.id.to_s, repository_path] command = [path_to_indexer, project.id.to_s, repository_path]
vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha) vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha)
...@@ -85,7 +89,7 @@ module Gitlab ...@@ -85,7 +89,7 @@ module Gitlab
end end
def last_commit def last_commit
@index_status&.last_commit index_status&.last_commit
end end
def from_sha def from_sha
......
...@@ -19,7 +19,7 @@ namespace :gitlab do ...@@ -19,7 +19,7 @@ namespace :gitlab do
print "Enqueuing project repositories in batches of #{batch_size}" print "Enqueuing project repositories in batches of #{batch_size}"
project_id_batches do |start, finish| project_id_batches do |start, finish|
ElasticBatchProjectIndexerWorker.perform_async(start, finish, ENV['UPDATE_INDEX']) ElasticBatchProjectIndexerWorker.perform_async(start, finish)
print "." print "."
end end
...@@ -41,7 +41,7 @@ namespace :gitlab do ...@@ -41,7 +41,7 @@ namespace :gitlab do
Sidekiq::Logging.logger = Logger.new(STDOUT) Sidekiq::Logging.logger = Logger.new(STDOUT)
project_id_batches do |start, finish| project_id_batches do |start, finish|
ElasticBatchProjectIndexerWorker.new.perform(start, finish, ENV['UPDATE_INDEX']) ElasticBatchProjectIndexerWorker.new.perform(start, finish)
end end
end end
......
...@@ -79,20 +79,6 @@ describe Gitlab::Elastic::Indexer do ...@@ -79,20 +79,6 @@ describe Gitlab::Elastic::Indexer do
expect_index_status(to_sha) expect_index_status(to_sha)
end end
end end
context 'when last_commit no longer exists' do
before do
project.create_index_status!(last_commit: '51749675fc22nononononononono343bd54a3c95')
end
it 'use nil as from_sha' do
expect_popen.and_return(popen_success)
indexer.run(to_sha)
expect_index_status(to_sha)
end
end
end end
it 'updates the index status when the indexing is a success' do it 'updates the index status when the indexing is a success' do
...@@ -174,8 +160,12 @@ describe Gitlab::Elastic::Indexer do ...@@ -174,8 +160,12 @@ describe Gitlab::Elastic::Indexer do
let!(:initial_commit) { project.repository.commit('master').sha } let!(:initial_commit) { project.repository.commit('master').sha }
let(:ee_application_setting) { nil } let(:ee_application_setting) { nil }
before do
stub_ee_application_setting(elasticsearch_indexing: true)
end
def change_repository_and_index(project, &blk) def change_repository_and_index(project, &blk)
yield blk yield blk if blk
current_commit = project.repository.commit('master').sha current_commit = project.repository.commit('master').sha
...@@ -194,25 +184,29 @@ describe Gitlab::Elastic::Indexer do ...@@ -194,25 +184,29 @@ describe Gitlab::Elastic::Indexer do
end end
end end
context 'when IndexStatus#last_commit is no longer in repository', :pending do context 'when IndexStatus#last_commit is no longer in repository' do
before do before do
ElasticIndexerWorker.new.perform("index", "Project", project.id, project.es_id)
end
it 'reindexes from scratch if IndexStatus#last_commit is no longer in repository' do
sha_for_reset = nil
change_repository_and_index(project) do change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master') sha_for_reset = project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master') project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end end
expect(indexed_file_paths_for('12')).to include('12') expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23') expect(indexed_file_paths_for('23')).to include('23')
project.index_status.update(last_commit: 'ABCDABCDABCD') project.index_status.update(last_commit: '____________')
end
it 'reindexes from scratch if IndexStatus#last_commit is no longer in repository' do
change_repository_and_index(project) do change_repository_and_index(project) do
project.repository.write_ref('master', initial_commit) project.repository.write_ref('master', sha_for_reset)
end end
expect(indexed_file_paths_for('12')).not_to include('12') expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).not_to include('23') expect(indexed_file_paths_for('23')).not_to include('23')
end end
end end
...@@ -221,11 +215,9 @@ describe Gitlab::Elastic::Indexer do ...@@ -221,11 +215,9 @@ describe Gitlab::Elastic::Indexer do
before do before do
change_repository_and_index(project) do change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master') project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end end
expect(indexed_file_paths_for('12')).to include('12') expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
end end
it 'reverses already indexed commits' do it 'reverses already indexed commits' do
...@@ -234,7 +226,6 @@ describe Gitlab::Elastic::Indexer do ...@@ -234,7 +226,6 @@ describe Gitlab::Elastic::Indexer do
end end
expect(indexed_file_paths_for('12')).not_to include('12') expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end end
end end
end end
......
...@@ -16,7 +16,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -16,7 +16,7 @@ describe ElasticBatchProjectIndexerWorker do
end end
it 'only indexes the enabled project' do it 'only indexes the enabled project' do
projects.each { |project| expect_index(project, false).and_call_original } projects.each { |project| expect_index(project).and_call_original }
expect(Gitlab::Elastic::Indexer).to receive(:new).with(projects.first).and_return(double(run: true)) expect(Gitlab::Elastic::Indexer).to receive(:new).with(projects.first).and_return(double(run: true))
expect(Gitlab::Elastic::Indexer).not_to receive(:new).with(projects.last) expect(Gitlab::Elastic::Indexer).not_to receive(:new).with(projects.last)
...@@ -26,14 +26,14 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -26,14 +26,14 @@ describe ElasticBatchProjectIndexerWorker do
end end
it 'runs the indexer for projects in the batch range' do it 'runs the indexer for projects in the batch range' do
projects.each { |project| expect_index(project, false) } projects.each { |project| expect_index(project) }
worker.perform(projects.first.id, projects.last.id) worker.perform(projects.first.id, projects.last.id)
end end
it 'skips projects not in the batch range' do it 'skips projects not in the batch range' do
expect_index(projects.first, false).never expect_index(projects.first).never
expect_index(projects.last, false) expect_index(projects.last)
worker.perform(projects.last.id, projects.last.id) worker.perform(projects.last.id, projects.last.id)
end end
...@@ -41,7 +41,7 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -41,7 +41,7 @@ describe ElasticBatchProjectIndexerWorker do
it 'clears the "locked" state from redis when the project finishes indexing' do it 'clears the "locked" state from redis when the project finishes indexing' do
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, projects.first.id) } Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, projects.first.id) }
expect_index(projects.first, false).and_call_original expect_index(projects.first).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run) expect(indexer).to receive(:run)
end end
...@@ -50,11 +50,17 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -50,11 +50,17 @@ describe ElasticBatchProjectIndexerWorker do
.to change { project_locked?(projects.first) }.from(true).to(false) .to change { project_locked?(projects.first) }.from(true).to(false)
end end
context 'update_index = false' do it 'reindexes projects that were already indexed' do
expect_index(projects.first)
expect_index(projects.last)
worker.perform(projects.first.id, projects.last.id)
end
it 'indexes all projects it receives even if already indexed' do it 'indexes all projects it receives even if already indexed' do
projects.first.index_status.update!(last_commit: 'foo') projects.first.index_status.update!(last_commit: 'foo')
expect_index(projects.first, false).and_call_original expect_index(projects.first).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer| expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run) expect(indexer).to receive(:run)
end end
...@@ -63,27 +69,8 @@ describe ElasticBatchProjectIndexerWorker do ...@@ -63,27 +69,8 @@ describe ElasticBatchProjectIndexerWorker do
end end
end end
context 'with update_index' do def expect_index(project)
it 'reindexes projects that were already indexed' do expect(worker).to receive(:run_indexer).with(project)
expect_index(projects.first, true)
expect_index(projects.last, true)
worker.perform(projects.first.id, projects.last.id, true)
end
it 'starts indexing at the last indexed commit' do
projects.first.index_status.update!(last_commit: 'foo')
expect_index(projects.first, true).and_call_original
expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run)
worker.perform(projects.first.id, projects.first.id, true)
end
end
end
def expect_index(project, update_index)
expect(worker).to receive(:run_indexer).with(project, update_index)
end end
def project_locked?(project) def project_locked?(project)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment