Commit 89bae130 authored by Nick Thomas's avatar Nick Thomas

Merge branch '8628-remove-commit-blob-index' into 'master'

Reset blob/commit index if last_commit is no longer in repository

Closes #10712

See merge request gitlab-org/gitlab-ee!10538
parents 9610b8ff bf96377f
......@@ -10,15 +10,15 @@ class ElasticBatchProjectIndexerWorker
# necessary
sidekiq_options retry: 10
def perform(start, finish, update_index = false)
def perform(start, finish)
projects = build_relation(start, finish)
projects.find_each { |project| run_indexer(project, update_index) }
projects.find_each { |project| run_indexer(project) }
end
private
def run_indexer(project, update_index)
def run_indexer(project)
return unless project.use_elasticsearch?
# Ensure we remove the hold on the project, no matter what, so ElasticCommitIndexerWorker can do its thing
......
......@@ -318,6 +318,36 @@ module Elasticsearch
res
end
def delete_index_for_commits_and_blobs
client_for_indexing.delete_by_query(
index: self.class.index_name,
routing: es_parent,
body: {
query: {
bool: {
filter: [
{
terms: {
type: %w{commit blob}
}
},
{
has_parent: {
parent_type: 'project',
query: {
term: {
id: project_id
}
}
}
}
]
}
}
}
)
end
def search(query, type: :all, page: 1, per: 20, options: {})
options[:repository_id] = repository_id if options[:repository_id].nil?
self.class.search(query, type: type, page: page, per: per, options: options)
......
......@@ -16,7 +16,7 @@ module Gitlab
Gitlab::Utils.which(EXPERIMENTAL_INDEXER).present?
end
attr_reader :project
attr_reader :project, :index_status
def initialize(project)
@project = project
......@@ -75,6 +75,10 @@ module Gitlab
end
def run_indexer!(to_sha)
if index_status && !repository_contains_last_indexed_commit?
project.repository.delete_index_for_commits_and_blobs
end
command = [path_to_indexer, project.id.to_s, repository_path]
vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha)
......@@ -85,7 +89,7 @@ module Gitlab
end
def last_commit
@index_status&.last_commit
index_status&.last_commit
end
def from_sha
......
......@@ -19,7 +19,7 @@ namespace :gitlab do
print "Enqueuing project repositories in batches of #{batch_size}"
project_id_batches do |start, finish|
ElasticBatchProjectIndexerWorker.perform_async(start, finish, ENV['UPDATE_INDEX'])
ElasticBatchProjectIndexerWorker.perform_async(start, finish)
print "."
end
......@@ -41,7 +41,7 @@ namespace :gitlab do
Sidekiq::Logging.logger = Logger.new(STDOUT)
project_id_batches do |start, finish|
ElasticBatchProjectIndexerWorker.new.perform(start, finish, ENV['UPDATE_INDEX'])
ElasticBatchProjectIndexerWorker.new.perform(start, finish)
end
end
......
......@@ -79,20 +79,6 @@ describe Gitlab::Elastic::Indexer do
expect_index_status(to_sha)
end
end
context 'when last_commit no longer exists' do
before do
project.create_index_status!(last_commit: '51749675fc22nononononononono343bd54a3c95')
end
it 'use nil as from_sha' do
expect_popen.and_return(popen_success)
indexer.run(to_sha)
expect_index_status(to_sha)
end
end
end
it 'updates the index status when the indexing is a success' do
......@@ -174,8 +160,12 @@ describe Gitlab::Elastic::Indexer do
let!(:initial_commit) { project.repository.commit('master').sha }
let(:ee_application_setting) { nil }
before do
stub_ee_application_setting(elasticsearch_indexing: true)
end
def change_repository_and_index(project, &blk)
yield blk
yield blk if blk
current_commit = project.repository.commit('master').sha
......@@ -194,25 +184,29 @@ describe Gitlab::Elastic::Indexer do
end
end
context 'when IndexStatus#last_commit is no longer in repository', :pending do
context 'when IndexStatus#last_commit is no longer in repository' do
before do
ElasticIndexerWorker.new.perform("index", "Project", project.id, project.es_id)
end
it 'reindexes from scratch if IndexStatus#last_commit is no longer in repository' do
sha_for_reset = nil
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
sha_for_reset = project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
project.index_status.update(last_commit: 'ABCDABCDABCD')
end
project.index_status.update(last_commit: '____________')
it 'reindexes from scratch if IndexStatus#last_commit is no longer in repository' do
change_repository_and_index(project) do
project.repository.write_ref('master', initial_commit)
project.repository.write_ref('master', sha_for_reset)
end
expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end
end
......@@ -221,11 +215,9 @@ describe Gitlab::Elastic::Indexer do
before do
change_repository_and_index(project) do
project.repository.create_file(user, '12', '', message: '12', branch_name: 'master')
project.repository.create_file(user, '23', '', message: '23', branch_name: 'master')
end
expect(indexed_file_paths_for('12')).to include('12')
expect(indexed_file_paths_for('23')).to include('23')
end
it 'reverses already indexed commits' do
......@@ -234,7 +226,6 @@ describe Gitlab::Elastic::Indexer do
end
expect(indexed_file_paths_for('12')).not_to include('12')
expect(indexed_file_paths_for('23')).not_to include('23')
end
end
end
......
......@@ -16,7 +16,7 @@ describe ElasticBatchProjectIndexerWorker do
end
it 'only indexes the enabled project' do
projects.each { |project| expect_index(project, false).and_call_original }
projects.each { |project| expect_index(project).and_call_original }
expect(Gitlab::Elastic::Indexer).to receive(:new).with(projects.first).and_return(double(run: true))
expect(Gitlab::Elastic::Indexer).not_to receive(:new).with(projects.last)
......@@ -26,14 +26,14 @@ describe ElasticBatchProjectIndexerWorker do
end
it 'runs the indexer for projects in the batch range' do
projects.each { |project| expect_index(project, false) }
projects.each { |project| expect_index(project) }
worker.perform(projects.first.id, projects.last.id)
end
it 'skips projects not in the batch range' do
expect_index(projects.first, false).never
expect_index(projects.last, false)
expect_index(projects.first).never
expect_index(projects.last)
worker.perform(projects.last.id, projects.last.id)
end
......@@ -41,7 +41,7 @@ describe ElasticBatchProjectIndexerWorker do
it 'clears the "locked" state from redis when the project finishes indexing' do
Gitlab::Redis::SharedState.with { |redis| redis.sadd(:elastic_projects_indexing, projects.first.id) }
expect_index(projects.first, false).and_call_original
expect_index(projects.first).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run)
end
......@@ -50,11 +50,17 @@ describe ElasticBatchProjectIndexerWorker do
.to change { project_locked?(projects.first) }.from(true).to(false)
end
context 'update_index = false' do
it 'reindexes projects that were already indexed' do
expect_index(projects.first)
expect_index(projects.last)
worker.perform(projects.first.id, projects.last.id)
end
it 'indexes all projects it receives even if already indexed' do
projects.first.index_status.update!(last_commit: 'foo')
expect_index(projects.first, false).and_call_original
expect_index(projects.first).and_call_original
expect_next_instance_of(Gitlab::Elastic::Indexer) do |indexer|
expect(indexer).to receive(:run)
end
......@@ -63,27 +69,8 @@ describe ElasticBatchProjectIndexerWorker do
end
end
context 'with update_index' do
it 'reindexes projects that were already indexed' do
expect_index(projects.first, true)
expect_index(projects.last, true)
worker.perform(projects.first.id, projects.last.id, true)
end
it 'starts indexing at the last indexed commit' do
projects.first.index_status.update!(last_commit: 'foo')
expect_index(projects.first, true).and_call_original
expect_any_instance_of(Gitlab::Elastic::Indexer).to receive(:run)
worker.perform(projects.first.id, projects.first.id, true)
end
end
end
def expect_index(project, update_index)
expect(worker).to receive(:run_indexer).with(project, update_index)
def expect_index(project)
expect(worker).to receive(:run_indexer).with(project)
end
def project_locked?(project)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment