Commit d095fe6a authored by Alexandru Croitor's avatar Alexandru Croitor

Extract AdvancedStageWorker to be reused

AdvanceStageWorker is a worker that initally was
used by github importer.

Extracted its functionality so that it can be used by
any multi stage importer to wait for a number of jobs
to complete, without blocking a thread. Once all jobs
have been completed this worker will advance the import
process to the next stage.
parent b815e6f6
...@@ -98,6 +98,10 @@ class ProjectImportState < ApplicationRecord ...@@ -98,6 +98,10 @@ class ProjectImportState < ApplicationRecord
Gitlab::SidekiqStatus.set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION) Gitlab::SidekiqStatus.set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
end end
def self.jid_by(project_id:, status:)
select(:jid).with_status(status).find_by(project_id: project_id)
end
end end
ProjectImportState.prepend_if_ee('EE::ProjectImportState') ProjectImportState.prepend_if_ee('EE::ProjectImportState')
...@@ -8,15 +8,12 @@ module Gitlab ...@@ -8,15 +8,12 @@ module Gitlab
# stage. # stage.
class AdvanceStageWorker # rubocop:disable Scalability/IdempotentWorker class AdvanceStageWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker include ApplicationWorker
include ::Gitlab::Import::AdvanceStage
sidekiq_options dead: false sidekiq_options dead: false
feature_category :importers feature_category :importers
INTERVAL = 30.seconds.to_i private
# The number of seconds to wait (while blocking the thread) before
# continuing to the next waiter.
BLOCKING_WAIT_TIME = 5
# The known importer stages and their corresponding Sidekiq workers. # The known importer stages and their corresponding Sidekiq workers.
STAGES = { STAGES = {
...@@ -26,49 +23,9 @@ module Gitlab ...@@ -26,49 +23,9 @@ module Gitlab
finish: Stage::FinishImportWorker finish: Stage::FinishImportWorker
}.freeze }.freeze
# project_id - The ID of the project being imported. def next_stage_worker(next_stage)
# waiters - A Hash mapping Gitlab::JobWaiter keys to the number of STAGES.fetch(next_stage.to_sym)
# remaining jobs.
# next_stage - The name of the next stage to start when all jobs have been
# completed.
def perform(project_id, waiters, next_stage)
return unless import_state = find_import_state(project_id)
new_waiters = wait_for_jobs(waiters)
if new_waiters.empty?
# We refresh the import JID here so workers importing individual
# resources (e.g. notes) don't have to do this all the time, reducing
# the pressure on Redis. We _only_ do this once all jobs are done so
# we don't get stuck forever if one or more jobs failed to notify the
# JobWaiter.
import_state.refresh_jid_expiration
STAGES.fetch(next_stage.to_sym).perform_async(project_id)
else
self.class.perform_in(INTERVAL, project_id, new_waiters, next_stage)
end
end
def wait_for_jobs(waiters)
waiters.each_with_object({}) do |(key, remaining), new_waiters|
waiter = JobWaiter.new(remaining, key)
# We wait for a brief moment of time so we don't reschedule if we can
# complete the work fast enough.
waiter.wait(BLOCKING_WAIT_TIME)
next unless waiter.jobs_remaining.positive?
new_waiters[waiter.key] = waiter.jobs_remaining
end
end
# rubocop: disable CodeReuse/ActiveRecord
def find_import_state(project_id)
ProjectImportState.select(:jid).with_status(:started).find_by(project_id: project_id)
end end
# rubocop: enable CodeReuse/ActiveRecord
end end
end end
end end
# frozen_string_literal: true
module Gitlab
module Import
module AdvanceStage
INTERVAL = 30.seconds.to_i
# The number of seconds to wait (while blocking the thread) before
# continuing to the next waiter.
BLOCKING_WAIT_TIME = 5
# project_id - The ID of the project being imported.
# waiters - A Hash mapping Gitlab::JobWaiter keys to the number of
# remaining jobs.
# next_stage - The name of the next stage to start when all jobs have been
# completed.
def perform(project_id, waiters, next_stage)
return unless import_state = find_import_state(project_id)
new_waiters = wait_for_jobs(waiters)
if new_waiters.empty?
# We refresh the import JID here so workers importing individual
# resources (e.g. notes) don't have to do this all the time, reducing
# the pressure on Redis. We _only_ do this once all jobs are done so
# we don't get stuck forever if one or more jobs failed to notify the
# JobWaiter.
import_state.refresh_jid_expiration
next_stage_worker(next_stage).perform_async(project_id)
else
self.class.perform_in(INTERVAL, project_id, new_waiters, next_stage)
end
end
def wait_for_jobs(waiters)
waiters.each_with_object({}) do |(key, remaining), new_waiters|
waiter = JobWaiter.new(remaining, key)
# We wait for a brief moment of time so we don't reschedule if we can
# complete the work fast enough.
waiter.wait(BLOCKING_WAIT_TIME)
next unless waiter.jobs_remaining.positive?
new_waiters[waiter.key] = waiter.jobs_remaining
end
end
def find_import_state(project_id)
ProjectImportState.jid_by(project_id: project_id, status: :started)
end
private
def next_stage_worker(next_stage)
raise NotImplementedError
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment