Commit 46b28842 authored by Tiago Botelho's avatar Tiago Botelho

StuckImportJobsWorker query performance optimization

Improves the performance of fetching the enqueued
projects for StuckImportJobsWorker, preventing a
statement timeout.
parent 11831834
...@@ -56,4 +56,17 @@ class ProjectImportState < ActiveRecord::Base ...@@ -56,4 +56,17 @@ class ProjectImportState < ActiveRecord::Base
end end
end end
end end
def mark_as_failed(error_message)
original_errors = errors.dup
sanitized_message = Gitlab::UrlSanitizer.sanitize(error_message)
fail_op
update_column(:last_error, sanitized_message)
rescue ActiveRecord::ActiveRecordError => e
Rails.logger.error("Error setting import status to failed: #{e.message}. Original error: #{sanitized_message}")
ensure
@errors = original_errors
end
end end
...@@ -7,60 +7,58 @@ class StuckImportJobsWorker ...@@ -7,60 +7,58 @@ class StuckImportJobsWorker
IMPORT_JOBS_EXPIRATION = 15.hours.to_i IMPORT_JOBS_EXPIRATION = 15.hours.to_i
def perform def perform
projects_without_jid_count = mark_projects_without_jid_as_failed! import_state_without_jid_count = mark_import_states_without_jid_as_failed!
projects_with_jid_count = mark_projects_with_jid_as_failed! import_state_with_jid_count = mark_import_states_with_jid_as_failed!
Gitlab::Metrics.add_event(:stuck_import_jobs, Gitlab::Metrics.add_event(:stuck_import_jobs,
projects_without_jid_count: projects_without_jid_count, projects_without_jid_count: import_state_without_jid_count,
projects_with_jid_count: projects_with_jid_count) projects_with_jid_count: import_state_with_jid_count)
end end
private private
def mark_projects_without_jid_as_failed! def mark_import_states_without_jid_as_failed!
enqueued_projects_without_jid.each do |project| enqueued_import_states_without_jid.each do |import_state|
project.mark_import_as_failed(error_message) import_state.mark_as_failed(error_message)
end.count end.count
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def mark_projects_with_jid_as_failed! def mark_import_states_with_jid_as_failed!
# TODO: Rollback this change to use SQL through #pluck jids_and_ids = enqueued_import_states_with_jid.pluck(:jid, :id).to_h
jids_and_ids = enqueued_projects_with_jid.map { |project| [project.import_jid, project.id] }.to_h
# Find the jobs that aren't currently running or that exceeded the threshold. # Find the jobs that aren't currently running or that exceeded the threshold.
completed_jids = Gitlab::SidekiqStatus.completed_jids(jids_and_ids.keys) completed_jids = Gitlab::SidekiqStatus.completed_jids(jids_and_ids.keys)
return unless completed_jids.any? return unless completed_jids.any?
completed_project_ids = jids_and_ids.values_at(*completed_jids) completed_import_state_ids = jids_and_ids.values_at(*completed_jids)
# We select the projects again, because they may have transitioned from # We select the import states again, because they may have transitioned from
# scheduled/started to finished/failed while we were looking up their Sidekiq status. # scheduled/started to finished/failed while we were looking up their Sidekiq status.
completed_projects = enqueued_projects_with_jid.where(id: completed_project_ids) completed_import_states = enqueued_import_states_with_jid.where(id: completed_import_state_ids)
Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_projects.map(&:import_jid).join(', ')}") completed_import_state_jids = completed_import_states.map { |import_state| import_state.jid }.join(', ')
Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_import_state_jids}")
completed_projects.each do |project| completed_import_states.each do |import_state|
project.mark_import_as_failed(error_message) import_state.mark_as_failed(error_message)
end.count end.count
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord def enqueued_import_states
def enqueued_projects ProjectImportState.with_status([:scheduled, :started])
Project.joins_import_state.where("(import_state.status = 'scheduled' OR import_state.status = 'started') OR (projects.import_status = 'scheduled' OR projects.import_status = 'started')")
end end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def enqueued_projects_with_jid def enqueued_import_states_with_jid
enqueued_projects.where.not("import_state.jid IS NULL AND projects.import_jid IS NULL") enqueued_import_states.where.not(jid: nil)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def enqueued_projects_without_jid def enqueued_import_states_without_jid
enqueued_projects.where("import_state.jid IS NULL AND projects.import_jid IS NULL") enqueued_import_states.where(jid: nil)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
......
---
title: Improves performance of stuck import jobs detection
merge_request: 22879
author:
type: performance
...@@ -8,29 +8,29 @@ describe StuckImportJobsWorker do ...@@ -8,29 +8,29 @@ describe StuckImportJobsWorker do
context 'when the import status was already updated' do context 'when the import status was already updated' do
before do before do
allow(Gitlab::SidekiqStatus).to receive(:completed_jids) do allow(Gitlab::SidekiqStatus).to receive(:completed_jids) do
project.import_start import_state.start
project.import_finish import_state.finish
[project.import_jid] [import_state.jid]
end end
end end
it 'does not mark the project as failed' do it 'does not mark the project as failed' do
worker.perform worker.perform
expect(project.reload.import_status).to eq('finished') expect(import_state.reload.status).to eq('finished')
end end
end end
context 'when the import status was not updated' do context 'when the import status was not updated' do
before do before do
allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([project.import_jid]) allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([import_state.jid])
end end
it 'marks the project as failed' do it 'marks the project as failed' do
worker.perform worker.perform
expect(project.reload.import_status).to eq('failed') expect(import_state.reload.status).to eq('failed')
end end
end end
end end
...@@ -41,27 +41,27 @@ describe StuckImportJobsWorker do ...@@ -41,27 +41,27 @@ describe StuckImportJobsWorker do
end end
it 'does not mark the project as failed' do it 'does not mark the project as failed' do
expect { worker.perform }.not_to change { project.reload.import_status } expect { worker.perform }.not_to change { import_state.reload.status }
end end
end end
end end
describe 'with scheduled import_status' do describe 'with scheduled import_status' do
it_behaves_like 'project import job detection' do it_behaves_like 'project import job detection' do
let(:project) { create(:project, :import_scheduled) } let(:import_state) { create(:project, :import_scheduled).import_state }
before do before do
project.import_state.update(jid: '123') import_state.update(jid: '123')
end end
end end
end end
describe 'with started import_status' do describe 'with started import_status' do
it_behaves_like 'project import job detection' do it_behaves_like 'project import job detection' do
let(:project) { create(:project, :import_started) } let(:import_state) { create(:project, :import_started).import_state }
before do before do
project.import_state.update(jid: '123') import_state.update(jid: '123')
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment