Commit 418249c6 authored by Robert Speicher's avatar Robert Speicher

Merge branch '44542-move-import-specific-attributes-out-of-the-project-model' into 'master'

Moves import specific attributes to out of the Project model and into their own model

See merge request gitlab-org/gitlab-ee!5522
parents 43377b38 b204d11c
class Import::BaseController < ApplicationController
private
def find_already_added_projects(import_type)
current_user.created_projects.where(import_type: import_type).includes(:import_state)
end
def find_jobs(import_type)
current_user.created_projects
.includes(:import_state)
.where(import_type: import_type)
.to_json(only: [:id], methods: [:import_status])
end
def find_or_create_namespace(names, owner)
names = params[:target_namespace].presence || names
......
......@@ -22,16 +22,14 @@ class Import::BitbucketController < Import::BaseController
@repos, @incompatible_repos = repos.partition { |repo| repo.valid? }
@already_added_projects = current_user.created_projects.where(import_type: 'bitbucket')
@already_added_projects = find_already_added_projects('bitbucket')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.to_a.reject! { |repo| already_added_projects_names.include?(repo.full_name) }
end
def jobs
render json: current_user.created_projects
.where(import_type: 'bitbucket')
.to_json(only: [:id, :import_status])
render json: find_jobs('bitbucket')
end
def create
......
......@@ -46,15 +46,14 @@ class Import::FogbugzController < Import::BaseController
@repos = client.repos
@already_added_projects = current_user.created_projects.where(import_type: 'fogbugz')
@already_added_projects = find_already_added_projects('fogbugz')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.reject! { |repo| already_added_projects_names.include? repo.name }
end
def jobs
jobs = current_user.created_projects.where(import_type: 'fogbugz').to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs('fogbugz')
end
def create
......
......@@ -26,15 +26,14 @@ class Import::GithubController < Import::BaseController
def status
@repos = client.repos
@already_added_projects = current_user.created_projects.where(import_type: provider)
@already_added_projects = find_already_added_projects(provider)
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.reject! { |repo| already_added_projects_names.include? repo.full_name }
end
def jobs
jobs = current_user.created_projects.where(import_type: provider).to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs(provider)
end
def create
......
......@@ -12,15 +12,14 @@ class Import::GitlabController < Import::BaseController
def status
@repos = client.projects
@already_added_projects = current_user.created_projects.where(import_type: "gitlab")
@already_added_projects = find_already_added_projects('gitlab')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos = @repos.to_a.reject { |repo| already_added_projects_names.include? repo["path_with_namespace"] }
end
def jobs
jobs = current_user.created_projects.where(import_type: "gitlab").to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs('gitlab')
end
def create
......
......@@ -73,15 +73,14 @@ class Import::GoogleCodeController < Import::BaseController
@repos = client.repos
@incompatible_repos = client.incompatible_repos
@already_added_projects = current_user.created_projects.where(import_type: "google_code")
@already_added_projects = find_already_added_projects('google_code')
already_added_projects_names = @already_added_projects.pluck(:import_source)
@repos.reject! { |repo| already_added_projects_names.include? repo.name }
end
def jobs
jobs = current_user.created_projects.where(import_type: "google_code").to_json(only: [:id, :import_status])
render json: jobs
render json: find_jobs('google_code')
end
def create
......
......@@ -71,6 +71,9 @@ class Project < ActiveRecord::Base
before_save :ensure_runners_token
after_save :update_project_statistics, if: :namespace_id_changed?
after_save :create_import_state, if: ->(project) { project.import? && project.import_state.nil? }
after_create :create_project_feature, unless: :project_feature
after_create :create_ci_cd_settings,
......@@ -162,6 +165,8 @@ class Project < ActiveRecord::Base
has_one :fork_network_member
has_one :fork_network, through: :fork_network_member
has_one :import_state, autosave: true, class_name: 'ProjectImportState', inverse_of: :project
# Merge Requests for target project should be removed with it
has_many :merge_requests, foreign_key: 'target_project_id'
has_many :source_of_merge_requests, foreign_key: 'source_project_id', class_name: 'MergeRequest'
......@@ -392,51 +397,9 @@ class Project < ActiveRecord::Base
scope :abandoned, -> { where('projects.last_activity_at < ?', 6.months.ago) }
scope :excluding_project, ->(project) { where.not(id: project) }
scope :import_started, -> { where(import_status: 'started') }
state_machine :import_status, initial: :none do
event :import_schedule do
transition [:none, :finished, :failed] => :scheduled
end
event :force_import_start do
transition [:none, :finished, :failed] => :started
end
event :import_start do
transition scheduled: :started
end
event :import_finish do
transition started: :finished
end
event :import_fail do
transition [:scheduled, :started] => :failed
end
state :scheduled
state :started
state :finished
state :failed
after_transition [:none, :finished, :failed] => :scheduled do |project, _|
project.run_after_commit do
job_id = add_import_job
update(import_jid: job_id) if job_id
end
end
after_transition started: :finished do |project, _|
project.reset_cache_and_import_attrs
if Gitlab::ImportSources.importer_names.include?(project.import_type) && project.repo_exists?
project.run_after_commit do
Projects::AfterImportService.new(project).execute
end
end
end
end
scope :joins_import_state, -> { joins("LEFT JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :import_started, -> { joins_import_state.where("import_state.status = 'started' OR projects.import_status = 'started'") }
class << self
# Searches for a list of projects based on the query given in `query`.
......@@ -676,10 +639,6 @@ class Project < ActiveRecord::Base
external_import? || forked? || gitlab_project_import? || bare_repository_import?
end
def no_import?
import_status == 'none'
end
def external_import?
import_url.present?
end
......@@ -692,6 +651,93 @@ class Project < ActiveRecord::Base
import_started? || import_scheduled?
end
def import_state_args
{
status: self[:import_status],
jid: self[:import_jid],
last_error: self[:import_error]
}
end
def ensure_import_state
return if self[:import_status] == 'none' || self[:import_status].nil?
return unless import_state.nil?
create_import_state(import_state_args)
update_column(:import_status, 'none')
end
def import_schedule
ensure_import_state
import_state&.schedule
end
def force_import_start
ensure_import_state
import_state&.force_start
end
def import_start
ensure_import_state
import_state&.start
end
def import_fail
ensure_import_state
import_state&.fail_op
end
def import_finish
ensure_import_state
import_state&.finish
end
def import_jid=(new_jid)
ensure_import_state
import_state&.jid = new_jid
end
def import_jid
ensure_import_state
import_state&.jid
end
def import_error=(new_error)
ensure_import_state
import_state&.last_error = new_error
end
def import_error
ensure_import_state
import_state&.last_error
end
def import_status=(new_status)
ensure_import_state
import_state&.status = new_status
end
def import_status
ensure_import_state
import_state&.status || 'none'
end
def no_import?
import_status == 'none'
end
def import_started?
# import? does SQL work so only run it if it looks like there's an import running
import_status == 'started' && import?
......@@ -1494,7 +1540,7 @@ class Project < ActiveRecord::Base
def rename_repo_notify!
# When we import a project overwriting the original project, there
# is a move operation. In that case we don't want to send the instructions.
send_move_instructions(full_path_was) unless started?
send_move_instructions(full_path_was) unless import_started?
expires_full_path_cache
self.old_path_with_namespace = full_path_was
......@@ -1548,7 +1594,8 @@ class Project < ActiveRecord::Base
return unless import_jid
Gitlab::SidekiqStatus.unset(import_jid)
update_column(:import_jid, nil)
import_state.update_column(:jid, nil)
end
def running_or_pending_build_count(force: false)
......@@ -1567,7 +1614,8 @@ class Project < ActiveRecord::Base
sanitized_message = Gitlab::UrlSanitizer.sanitize(error_message)
import_fail
update_column(:import_error, sanitized_message)
import_state.update_column(:last_error, sanitized_message)
rescue ActiveRecord::ActiveRecordError => e
Rails.logger.error("Error setting import status to failed: #{e.message}. Original error: #{sanitized_message}")
ensure
......
class ProjectImportState < ActiveRecord::Base
include AfterCommitQueue
self.table_name = "project_mirror_data"
prepend EE::ProjectImportState
belongs_to :project, inverse_of: :import_state
validates :project, presence: true
state_machine :status, initial: :none do
event :schedule do
transition [:none, :finished, :failed] => :scheduled
end
event :force_start do
transition [:none, :finished, :failed] => :started
end
event :start do
transition scheduled: :started
end
event :finish do
transition started: :finished
end
event :fail_op do
transition [:scheduled, :started] => :failed
end
state :scheduled
state :started
state :finished
state :failed
after_transition [:none, :finished, :failed] => :scheduled do |state, _|
state.run_after_commit do
job_id = project.add_import_job
update(jid: job_id) if job_id
end
end
after_transition started: :finished do |state, _|
project = state.project
project.reset_cache_and_import_attrs
if Gitlab::ImportSources.importer_names.include?(project.import_type) && project.repo_exists?
state.run_after_commit do
Projects::AfterImportService.new(project).execute
end
end
end
end
end
......@@ -144,7 +144,7 @@ module Projects
if @project
@project.errors.add(:base, message)
@project.mark_import_as_failed(message) if @project.import?
@project.mark_import_as_failed(message) if @project.persisted? && @project.import?
end
@project
......
......@@ -63,11 +63,10 @@ module Gitlab
end
def find_project(id)
# We only care about the import JID so we can refresh it. We also only
# want the project if it hasn't been marked as failed yet. It's possible
# the import gets marked as stuck when jobs of the current stage failed
# somehow.
Project.select(:import_jid).import_started.find_by(id: id)
# TODO: Only select the JID
# This is due to the fact that the JID could be present in either the project record or
# its associated import_state record
Project.import_started.find_by(id: id)
end
end
end
......
......@@ -31,7 +31,10 @@ module Gitlab
end
def find_project(id)
Project.select(:import_jid).import_started.find_by(id: id)
# TODO: Only select the JID
# This is due to the fact that the JID could be present in either the project record or
# its associated import_state record
Project.import_started.find_by(id: id)
end
end
end
......
......@@ -29,7 +29,8 @@ class StuckImportJobsWorker
end
def mark_projects_with_jid_as_failed!
jids_and_ids = enqueued_projects_with_jid.pluck(:import_jid, :id).to_h
# TODO: Rollback this change to use SQL through #pluck
jids_and_ids = enqueued_projects_with_jid.map { |project| [project.import_jid, project.id] }.to_h
# Find the jobs that aren't currently running or that exceeded the threshold.
completed_jids = Gitlab::SidekiqStatus.completed_jids(jids_and_ids.keys)
......@@ -49,15 +50,15 @@ class StuckImportJobsWorker
end
def enqueued_projects
Project.with_import_status(:scheduled, :started)
Project.joins_import_state.where("(import_state.status = 'scheduled' OR import_state.status = 'started') OR (projects.import_status = 'scheduled' OR projects.import_status = 'started')")
end
def enqueued_projects_with_jid
enqueued_projects.where.not(import_jid: nil)
enqueued_projects.where.not("import_state.jid IS NULL AND projects.import_jid IS NULL")
end
def enqueued_projects_without_jid
enqueued_projects.where(import_jid: nil)
enqueued_projects.where("import_state.jid IS NULL AND projects.import_jid IS NULL")
end
def error_message
......
class AddIndexesToProjectMirrorData < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :project_mirror_data, :last_successful_update_at
add_concurrent_index :project_mirror_data, :jid
add_concurrent_index :project_mirror_data, :status
end
def down
remove_index :project_mirror_data, :last_successful_update_at if index_exists? :project_mirror_data, :last_successful_update_at
remove_index :project_mirror_data, :jid if index_exists? :project_mirror_data, :jid
remove_index :project_mirror_data, :status if index_exists? :project_mirror_data, :status
end
end
class MigrateImportAttributesDataFromProjectsToProjectMirrorData < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
UP_MIGRATION = 'PopulateImportState'.freeze
DOWN_MIGRATION = 'RollbackImportStateData'.freeze
BATCH_SIZE = 1000
DELAY_INTERVAL = 5.minutes
disable_ddl_transaction!
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
end
class ProjectImportState < ActiveRecord::Base
include EachBatch
self.table_name = 'project_mirror_data'
end
def up
projects = Project.where.not(import_status: :none)
queue_background_migration_jobs_by_range_at_intervals(projects, UP_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
import_state = ProjectImportState.where.not(status: :none)
queue_background_migration_jobs_by_range_at_intervals(import_state, DOWN_MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180503150427) do
ActiveRecord::Schema.define(version: 20180503154922) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -1980,10 +1980,18 @@ ActiveRecord::Schema.define(version: 20180503150427) do
t.datetime "next_execution_timestamp"
t.datetime "created_at"
t.datetime "updated_at"
t.string "status"
t.string "jid"
t.datetime_with_timezone "last_update_at"
t.datetime_with_timezone "last_successful_update_at"
t.text "last_error"
end
add_index "project_mirror_data", ["jid"], name: "index_project_mirror_data_on_jid", using: :btree
add_index "project_mirror_data", ["last_successful_update_at"], name: "index_project_mirror_data_on_last_successful_update_at", using: :btree
add_index "project_mirror_data", ["next_execution_timestamp", "retry_count"], name: "index_mirror_data_on_next_execution_and_retry_count", using: :btree
add_index "project_mirror_data", ["project_id"], name: "index_project_mirror_data_on_project_id", unique: true, using: :btree
add_index "project_mirror_data", ["status"], name: "index_project_mirror_data_on_status", using: :btree
create_table "project_repository_states", force: :cascade do |t|
t.integer "project_id", null: false
......
......@@ -10,15 +10,13 @@ module EE
prepended do
include Elastic::ProjectsSearch
prepend ImportStatusStateMachine
include EE::DeploymentPlatform
include EachBatch
before_validation :mark_remote_mirrors_for_removal
before_save :set_override_pull_mirror_available, unless: -> { ::Gitlab::CurrentSettings.mirror_available }
after_save :create_mirror_data, if: ->(project) { project.mirror? && project.mirror_changed? }
after_save :destroy_mirror_data, if: ->(project) { !project.mirror? && project.mirror_changed? }
before_save :set_next_execution_timestamp_to_now, if: ->(project) { project.mirror? && project.mirror_changed? && project.import_state }
after_update :remove_mirror_repository_reference,
if: ->(project) { project.mirror? && project.import_url_updated? }
......@@ -26,7 +24,6 @@ module EE
belongs_to :mirror_user, foreign_key: 'mirror_user_id', class_name: 'User'
has_one :repository_state, class_name: 'ProjectRepositoryState', inverse_of: :project
has_one :mirror_data, autosave: true, class_name: 'ProjectMirrorData'
has_one :push_rule, ->(project) { project&.feature_available?(:push_rules) ? all : none }
has_one :index_status
has_one :jenkins_service
......@@ -47,10 +44,14 @@ module EE
scope :mirror, -> { where(mirror: true) }
scope :inner_joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :mirrors_to_sync, ->(freeze_at) do
mirror.joins(:mirror_data).without_import_status(:scheduled, :started)
.where("next_execution_timestamp <= ?", freeze_at)
.where("project_mirror_data.retry_count <= ?", ::Gitlab::Mirror::MAX_RETRY)
mirror
.inner_joins_import_state
.where.not(import_state: { status: [:scheduled, :started] })
.where("import_state.next_execution_timestamp <= ?", freeze_at)
.where("import_state.retry_count <= ?", ::Gitlab::Mirror::MAX_RETRY)
end
scope :with_remote_mirrors, -> { joins(:remote_mirrors).where(remote_mirrors: { enabled: true }).distinct }
......@@ -119,21 +120,50 @@ module EE
def mirror_waiting_duration
return unless mirror?
(mirror_data.last_update_started_at.to_i -
mirror_data.last_update_scheduled_at.to_i).seconds
(import_state.last_update_started_at.to_i -
import_state.last_update_scheduled_at.to_i).seconds
end
def mirror_update_duration
return unless mirror?
(mirror_last_update_at.to_i -
mirror_data.last_update_started_at.to_i).seconds
import_state.last_update_started_at.to_i).seconds
end
def mirror_with_content?
mirror? && !empty_repo?
end
def import_state_args
super.merge(last_update_at: self[:mirror_last_update_at],
last_successful_update_at: self[:mirror_last_successful_update_at])
end
def mirror_last_update_at=(new_value)
ensure_import_state
import_state&.last_update_at = new_value
end
def mirror_last_update_at
ensure_import_state
import_state&.last_update_at
end
def mirror_last_successful_update_at=(new_value)
ensure_import_state
import_state&.last_successful_update_at = new_value
end
def mirror_last_successful_update_at
ensure_import_state
import_state&.last_successful_update_at
end
override :import_in_progress?
def import_in_progress?
# If we're importing while we do have a repository, we're simply updating the mirror.
......@@ -145,7 +175,7 @@ module EE
return false if mirror_hard_failed?
return false if updating_mirror?
self.mirror_data.next_execution_timestamp <= Time.now
self.import_state.next_execution_timestamp <= Time.now
end
def updating_mirror?
......@@ -175,7 +205,7 @@ module EE
end
def mirror_hard_failed?
self.mirror_data.retry_limit_exceeded?
self.import_state.retry_limit_exceeded?
end
def has_remote_mirror?
......@@ -259,12 +289,12 @@ module EE
def force_import_job!
return if mirror_about_to_update? || updating_mirror?
mirror_data = self.mirror_data
import_state = self.import_state
mirror_data.set_next_execution_to_now
mirror_data.reset_retry_count if mirror_data.retry_limit_exceeded?
import_state.set_next_execution_to_now
import_state.reset_retry_count if import_state.retry_limit_exceeded?
mirror_data.save!
import_state.save!
UpdateAllMirrorsWorker.perform_async
end
......@@ -511,6 +541,10 @@ module EE
true
end
def set_next_execution_timestamp_to_now
import_state.set_next_execution_to_now
end
def licensed_feature_available?(feature)
available_features = strong_memoize(:licensed_feature_available) do
Hash.new do |h, feature|
......@@ -532,10 +566,6 @@ module EE
end
end
def destroy_mirror_data
mirror_data.destroy
end
def validate_board_limit(board)
# Board limits are disabled in EE, so this method is just a no-op.
end
......
module EE
module Project
module ImportStatusStateMachine
extend ActiveSupport::Concern
included do
state_machine :import_status, initial: :none do
before_transition [:none, :finished, :failed] => :scheduled do |project, _|
project.mirror_data&.last_update_scheduled_at = Time.now
end
before_transition scheduled: :started do |project, _|
project.mirror_data&.last_update_started_at = Time.now
end
before_transition scheduled: :failed do |project, _|
if project.mirror?
project.mirror_last_update_at = Time.now
project.mirror_data.set_next_execution_to_now
end
end
after_transition started: :failed do |project, _|
if project.mirror? && project.mirror_hard_failed?
::NotificationService.new.mirror_was_hard_failed(project)
end
end
after_transition [:scheduled, :started] => [:finished, :failed] do |project, _|
::Gitlab::Mirror.decrement_capacity(project.id) if project.mirror?
end
before_transition started: :failed do |project, _|
if project.mirror?
project.mirror_last_update_at = Time.now
mirror_data = project.mirror_data
mirror_data.increment_retry_count
mirror_data.set_next_execution_timestamp
end
end
before_transition started: :finished do |project, _|
if project.mirror?
timestamp = Time.now
project.mirror_last_update_at = timestamp
project.mirror_last_successful_update_at = timestamp
mirror_data = project.mirror_data
mirror_data.reset_retry_count
mirror_data.set_next_execution_timestamp
end
if ::Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
project.run_after_commit do
last_indexed_commit = project.index_status&.last_commit
ElasticCommitIndexerWorker.perform_async(project.id, last_indexed_commit)
end
end
end
after_transition [:finished, :failed] => [:scheduled, :started] do |project, _|
::Gitlab::Mirror.increment_capacity(project.id) if project.mirror?
end
end
end
end
end
end
module EE
module ProjectImportState
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
prepended do
BACKOFF_PERIOD = 24.seconds
JITTER = 6.seconds
delegate :mirror?, to: :project
before_validation :set_next_execution_to_now, on: :create
state_machine :status, initial: :none do
before_transition [:none, :finished, :failed] => :scheduled do |state, _|
state.last_update_scheduled_at = Time.now
end
before_transition scheduled: :started do |state, _|
state.last_update_started_at = Time.now
end
before_transition scheduled: :failed do |state, _|
if state.mirror?
state.last_update_at = Time.now
state.set_next_execution_to_now
end
end
after_transition started: :failed do |state, _|
if state.mirror? && state.retry_limit_exceeded?
::NotificationService.new.mirror_was_hard_failed(state.project)
end
end
after_transition [:scheduled, :started] => [:finished, :failed] do |state, _|
::Gitlab::Mirror.decrement_capacity(state.project_id) if state.mirror?
end
before_transition started: :failed do |state, _|
if state.mirror?
state.last_update_at = Time.now
state.increment_retry_count
state.set_next_execution_timestamp
end
end
before_transition started: :finished do |state, _|
if state.mirror?
timestamp = Time.now
state.last_update_at = timestamp
state.last_successful_update_at = timestamp
state.reset_retry_count
state.set_next_execution_timestamp
end
if ::Gitlab::CurrentSettings.current_application_settings.elasticsearch_indexing?
state.run_after_commit do
last_indexed_commit = state.project.index_status&.last_commit
ElasticCommitIndexerWorker.perform_async(state.project_id, last_indexed_commit)
end
end
end
after_transition [:finished, :failed] => [:scheduled, :started] do |state, _|
::Gitlab::Mirror.increment_capacity(state.project_id) if state.mirror?
end
end
end
def reset_retry_count
self.retry_count = 0
end
def increment_retry_count
self.retry_count += 1
end
# We schedule the next sync time based on the duration of the
# last mirroring period and add it a fixed backoff period with a random jitter
def set_next_execution_timestamp
timestamp = Time.now
retry_factor = [1, self.retry_count].max
delay = [base_delay(timestamp), ::Gitlab::Mirror.min_delay].max
delay = [delay * retry_factor, ::Gitlab::Mirror.max_delay].min
self.next_execution_timestamp = timestamp + delay
end
def set_next_execution_to_now
return unless project.mirror?
self.next_execution_timestamp = Time.now
end
def retry_limit_exceeded?
self.retry_count > ::Gitlab::Mirror::MAX_RETRY
end
private
def base_delay(timestamp)
return 0 unless self.last_update_started_at
duration = timestamp - self.last_update_started_at
(BACKOFF_PERIOD + rand(JITTER)) * duration.seconds
end
end
end
class ProjectMirrorData < ActiveRecord::Base
BACKOFF_PERIOD = 24.seconds
JITTER = 6.seconds
belongs_to :project
validates :project, presence: true
validates :next_execution_timestamp, presence: true
before_validation :set_next_execution_to_now, on: :create
def reset_retry_count
self.retry_count = 0
end
def increment_retry_count
self.retry_count += 1
end
# We schedule the next sync time based on the duration of the
# last mirroring period and add it a fixed backoff period with a random jitter
def set_next_execution_timestamp
timestamp = Time.now
retry_factor = [1, self.retry_count].max
delay = [base_delay(timestamp), Gitlab::Mirror.min_delay].max
delay = [delay * retry_factor, Gitlab::Mirror.max_delay].min
self.next_execution_timestamp = timestamp + delay
end
def set_next_execution_to_now
self.next_execution_timestamp = Time.now
end
def retry_limit_exceeded?
self.retry_count > Gitlab::Mirror::MAX_RETRY
end
private
def base_delay(timestamp)
return 0 unless self.last_update_started_at
duration = timestamp - self.last_update_started_at
(BACKOFF_PERIOD + rand(JITTER)) * duration.seconds
end
end
......@@ -37,7 +37,7 @@ class UpdateAllMirrorsWorker
# If fewer than `batch_size` projects were returned, we don't need to query again
break if projects.length < batch_size
last = projects.last.mirror_data.next_execution_timestamp
last = projects.last.import_state.next_execution_timestamp
end
ProjectImportScheduleWorker.bulk_perform_and_wait(all_project_ids.map { |id| [id] }, timeout: SCHEDULE_WAIT_TIMEOUT.to_i)
......@@ -56,11 +56,11 @@ class UpdateAllMirrorsWorker
def pull_mirrors_batch(freeze_at:, batch_size:, offset_at: nil)
relation = Project
.mirrors_to_sync(freeze_at)
.reorder('project_mirror_data.next_execution_timestamp')
.reorder('import_state.next_execution_timestamp')
.limit(batch_size)
.includes(:namespace) # Used by `project.mirror?`
relation = relation.where('next_execution_timestamp > ?', offset_at) if offset_at
relation = relation.where('import_state.next_execution_timestamp > ?', offset_at) if offset_at
relation
end
......
---
title: Improves database performance of mirrors, forks and imports
merge_request: 5522
author:
type: performance
class MigrateImportAttributesFromProjectToProjectMirrorData < ActiveRecord::Migration
DOWNTIME = false
def up
add_column :project_mirror_data, :status, :string
add_column :project_mirror_data, :jid, :string
add_column :project_mirror_data, :last_update_at, :datetime_with_timezone
add_column :project_mirror_data, :last_successful_update_at, :datetime_with_timezone
add_column :project_mirror_data, :last_error, :text
end
def down
remove_column :project_mirror_data, :status
remove_column :project_mirror_data, :jid
remove_column :project_mirror_data, :last_update_at
remove_column :project_mirror_data, :last_successful_update_at
remove_column :project_mirror_data, :last_error
end
end
class MigrateMirrorAttributesDataFromProjectsToImportState < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
scope :join_mirror_data, -> { joins('INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id') }
end
def up
Project.join_mirror_data.each_batch do |batch|
start, stop = batch.pluck('MIN(projects.id), MAX(projects.id)').first
if Gitlab::Database.mysql?
execute <<~SQL
UPDATE project_mirror_data, projects
SET
project_mirror_data.status = projects.import_status,
project_mirror_data.jid = projects.import_jid,
project_mirror_data.last_update_at = projects.mirror_last_update_at,
project_mirror_data.last_successful_update_at = projects.mirror_last_successful_update_at,
project_mirror_data.last_error = projects.import_error
WHERE projects.id = project_mirror_data.project_id
AND projects.mirror = TRUE
AND projects.id BETWEEN #{start} AND #{stop}
SQL
else
execute <<~SQL
UPDATE project_mirror_data
SET
status = projects.import_status,
jid = projects.import_jid,
last_update_at = projects.mirror_last_update_at,
last_successful_update_at = projects.mirror_last_successful_update_at,
last_error = projects.import_error
FROM projects
WHERE projects.id = project_id
AND projects.mirror = TRUE
AND projects.id BETWEEN #{start} AND #{stop}
SQL
end
execute <<~SQL
UPDATE projects
SET import_status = 'none'
WHERE mirror = TRUE
AND id BETWEEN #{start} AND #{stop}
SQL
end
end
def down
Project.join_mirror_data.each_batch do |batch|
start, stop = batch.pluck('MIN(projects.id), MAX(projects.id)').first
if Gitlab::Database.mysql?
execute <<~SQL
UPDATE projects, project_mirror_data
SET
projects.import_status = project_mirror_data.status,
projects.import_jid = project_mirror_data.jid,
projects.mirror_last_update_at = project_mirror_data.last_update_at,
projects.mirror_last_successful_update_at = project_mirror_data.last_successful_update_at,
projects.import_error = project_mirror_data.last_error
WHERE project_mirror_data.project_id = projects.id
AND projects.mirror = TRUE
AND projects.id BETWEEN #{start} AND #{stop}
SQL
execute <<~SQL
UPDATE project_mirror_data, projects
SET project_mirror_data.status = 'none'
WHERE projects.id = project_mirror_data.project_id
AND projects.mirror = TRUE
AND projects.id BETWEEN #{start} AND #{stop}
SQL
else
execute <<~SQL
UPDATE projects
SET
import_status = project_mirror_data.status,
import_jid = project_mirror_data.jid,
mirror_last_update_at = project_mirror_data.last_update_at,
mirror_last_successful_update_at = project_mirror_data.last_successful_update_at,
import_error = project_mirror_data.last_error
FROM project_mirror_data
WHERE project_mirror_data.project_id = projects.id
AND projects.mirror = TRUE
AND projects.id BETWEEN #{start} AND #{stop}
SQL
execute <<~SQL
UPDATE project_mirror_data
SET status = 'none'
FROM projects
WHERE projects.id = project_id
AND projects.mirror = TRUE
AND projects.id BETWEEN #{start} AND #{stop}
SQL
end
end
end
end
......@@ -3,7 +3,8 @@ require 'spec_helper'
feature 'Project mirror', :js do
include ReactiveCachingHelpers
let(:project) { create(:project, :mirror, :import_finished, :repository, creator: user, name: 'Victorialand') }
let(:project) { create(:project, :repository, creator: user, name: 'Victorialand') }
let(:import_state) { create(:import_state, :mirror, :finished, project: project) }
let(:user) { create(:user) }
describe 'On a project' do
......@@ -30,12 +31,12 @@ feature 'Project mirror', :js do
let(:timestamp) { Time.now }
before do
project.mirror_data.update_attributes(next_execution_timestamp: timestamp + 10.minutes)
import_state.update_attributes(next_execution_timestamp: timestamp + 10.minutes)
end
context 'when able to force update' do
it 'forces import' do
project.update_attributes(mirror_last_update_at: timestamp - 8.minutes)
import_state.update_attributes(last_update_at: timestamp - 8.minutes)
expect_any_instance_of(EE::Project).to receive(:force_import_job!)
......@@ -49,7 +50,7 @@ feature 'Project mirror', :js do
context 'when unable to force update' do
it 'does not force import' do
project.update_attributes(mirror_last_update_at: timestamp - 3.minutes)
import_state.update_attributes(last_update_at: timestamp - 3.minutes)
expect_any_instance_of(EE::Project).not_to receive(:force_import_job!)
......
......@@ -118,7 +118,8 @@ feature 'New project' do
# Mock the POST `/import/github`
allow_any_instance_of(Gitlab::LegacyGithubImport::Client).to receive(:repo).and_return(repo)
project = create(:project, name: 'some-github-repo', creator: user, import_type: 'github', import_status: 'finished', import_url: repo.clone_url)
project = create(:project, name: 'some-github-repo', creator: user, import_type: 'github')
create(:import_state, :finished, import_url: repo.clone_url, project: project)
allow_any_instance_of(CiCd::SetupProject).to receive(:setup_external_service)
CiCd::SetupProject.new(project, user).execute
allow_any_instance_of(Gitlab::LegacyGithubImport::ProjectCreator)
......
require 'spec_helper'
require Rails.root.join('ee', 'db', 'post_migrate', '20180430180136_migrate_mirror_attributes_data_from_projects_to_import_state.rb')
describe MigrateMirrorAttributesDataFromProjectsToImportState, :migration do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:import_state) { table(:project_mirror_data) }
describe '#up' do
before do
namespaces.create(id: 1, name: 'gitlab-org', path: 'gitlab-org')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab1',
path: 'gitlab1', import_error: "foo", import_status: :started,
mirror: true, import_url: generate(:url))
projects.create!(id: 2, namespace_id: 1, name: 'gitlab2',
path: 'gitlab2', import_error: "foo", import_status: :finished,
mirror: false, import_url: generate(:url))
import_state.create!(id: 1, project_id: 1)
import_state.create!(id: 2, project_id: 2)
end
it 'migrates the mirror data to the import_state table' do
expect(projects.joins("INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id").count).to eq(2)
expect do
subject.up
end.to change { projects.where(import_status: 'none').count }.from(0).to(1)
expect(import_state.first.status).to eq("started")
expect(import_state.first.last_error).to eq("foo")
expect(import_state.last.status).to be_nil
expect(import_state.last.last_error).to be_nil
end
end
describe '#down' do
before do
namespaces.create(id: 1, name: 'gitlab-org', path: 'gitlab-org')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab1',
path: 'gitlab1', mirror: true, import_url: generate(:url))
import_state.create!(id: 1, project_id: 1, status: :started, last_error: "foo")
end
it 'migrates the import_state mirror data into the projects table' do
expect(projects.joins("INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id").count).to eq(1)
expect do
subject.down
end.to change { import_state.where(status: 'none').count }.from(0).to(1)
expect(projects.first.import_status).to eq("started")
expect(projects.first.import_error).to eq("foo")
end
end
end
require 'rails_helper'
describe ProjectMirrorData, type: :model do
describe 'associations' do
it { is_expected.to belong_to(:project) }
end
describe 'validations' do
it { is_expected.to validate_presence_of(:project) }
end
describe ProjectImportState, type: :model do
describe 'when create' do
it 'sets next execution timestamp to now' do
project = create(:project)
Timecop.freeze(Time.now) do
project.create_mirror_data
import_state = create(:import_state, :mirror)
expect(project.mirror_data.next_execution_timestamp).to eq(Time.now)
expect(import_state.next_execution_timestamp).to eq(Time.now)
end
end
end
describe '#reset_retry_count' do
let(:mirror_data) { create(:project, :mirror, :import_finished).mirror_data }
let(:import_state) { create(:import_state, :mirror, :finished, retry_count: 3) }
it 'resets retry_count to 0' do
mirror_data.retry_count = 3
expect { mirror_data.reset_retry_count }.to change { mirror_data.retry_count }.from(3).to(0)
expect { import_state.reset_retry_count }.to change { import_state.retry_count }.from(3).to(0)
end
end
describe '#increment_retry_count' do
let(:mirror_data) { create(:project, :mirror, :import_finished).mirror_data }
let(:import_state) { create(:import_state, :mirror, :finished) }
it 'increments retry_count' do
expect { mirror_data.increment_retry_count }.to change { mirror_data.retry_count }.from(0).to(1)
expect { import_state.increment_retry_count }.to change { import_state.retry_count }.from(0).to(1)
end
end
describe '#set_next_execution_timestamp' do
let(:mirror_data) { create(:project, :mirror, :import_finished).mirror_data }
let(:import_state) { create(:import_state, :mirror, :finished) }
let!(:timestamp) { Time.now }
let!(:jitter) { 2.seconds }
before do
allow_any_instance_of(ProjectMirrorData).to receive(:rand).and_return(jitter)
allow_any_instance_of(ProjectImportState).to receive(:rand).and_return(jitter)
end
context 'when base delay is lower than mirror_max_delay' do
before do
mirror_data.last_update_started_at = timestamp - 2.minutes
import_state.last_update_started_at = timestamp - 2.minutes
end
context 'when retry count is 0' do
it 'applies transition successfully' do
expect_next_execution_timestamp(mirror_data, timestamp + 52.minutes)
expect_next_execution_timestamp(import_state, timestamp + 52.minutes)
end
end
context 'when incrementing retry count' do
it 'applies transition successfully' do
mirror_data.retry_count = 2
mirror_data.increment_retry_count
import_state.retry_count = 2
import_state.increment_retry_count
expect_next_execution_timestamp(mirror_data, timestamp + 156.minutes)
expect_next_execution_timestamp(import_state, timestamp + 156.minutes)
end
end
end
......@@ -78,27 +66,27 @@ describe ProjectMirrorData, type: :model do
context 'when last_update_started_at is nil' do
it 'applies transition successfully' do
expect_next_execution_timestamp(mirror_data, timestamp + 30.minutes + mirror_jitter)
expect_next_execution_timestamp(import_state, timestamp + 30.minutes + mirror_jitter)
end
end
context 'when base delay is lower than mirror min_delay' do
before do
mirror_data.last_update_started_at = timestamp - 1.second
import_state.last_update_started_at = timestamp - 1.second
end
context 'when resetting retry count' do
it 'applies transition successfully' do
expect_next_execution_timestamp(mirror_data, timestamp + 30.minutes + mirror_jitter)
expect_next_execution_timestamp(import_state, timestamp + 30.minutes + mirror_jitter)
end
end
context 'when incrementing retry count' do
it 'applies transition successfully' do
mirror_data.retry_count = 3
mirror_data.increment_retry_count
import_state.retry_count = 3
import_state.increment_retry_count
expect_next_execution_timestamp(mirror_data, timestamp + 122.minutes)
expect_next_execution_timestamp(import_state, timestamp + 122.minutes)
end
end
end
......@@ -107,31 +95,31 @@ describe ProjectMirrorData, type: :model do
let(:max_timestamp) { timestamp + Gitlab::CurrentSettings.mirror_max_delay.minutes }
before do
mirror_data.last_update_started_at = timestamp - 1.hour
import_state.last_update_started_at = timestamp - 1.hour
end
context 'when resetting retry count' do
it 'applies transition successfully' do
expect_next_execution_timestamp(mirror_data, max_timestamp + mirror_jitter)
expect_next_execution_timestamp(import_state, max_timestamp + mirror_jitter)
end
end
context 'when incrementing retry count' do
it 'applies transition successfully' do
mirror_data.retry_count = 2
mirror_data.increment_retry_count
import_state.retry_count = 2
import_state.increment_retry_count
expect_next_execution_timestamp(mirror_data, max_timestamp + mirror_jitter)
expect_next_execution_timestamp(import_state, max_timestamp + mirror_jitter)
end
end
end
end
def expect_next_execution_timestamp(mirror_data, new_timestamp)
def expect_next_execution_timestamp(import_state, new_timestamp)
Timecop.freeze(timestamp) do
expect do
mirror_data.set_next_execution_timestamp
end.to change { mirror_data.next_execution_timestamp }.to eq(new_timestamp)
import_state.set_next_execution_timestamp
end.to change { import_state.next_execution_timestamp }.to eq(new_timestamp)
end
end
end
......
......@@ -13,7 +13,7 @@ describe Project do
it { is_expected.to delegate_method(:shared_runners_minutes_limit_enabled?).to(:shared_runners_limit_namespace) }
it { is_expected.to delegate_method(:shared_runners_minutes_used?).to(:shared_runners_limit_namespace) }
it { is_expected.to have_one(:mirror_data).class_name('ProjectMirrorData') }
it { is_expected.to have_one(:import_state).class_name('ProjectImportState') }
it { is_expected.to have_one(:repository_state).class_name('ProjectRepositoryState').inverse_of(:project) }
it { is_expected.to have_many(:path_locks) }
......@@ -48,6 +48,52 @@ describe Project do
end
end
describe 'setting up a mirror' do
context 'when new project' do
it 'creates import_state and sets next_execution_timestamp to now' do
project = build(:project, :mirror)
Timecop.freeze do
expect do
project.save
end.to change { ProjectImportState.count }.by(1)
expect(project.import_state.next_execution_timestamp).to eq(Time.now)
end
end
end
context 'when project already exists' do
context 'when project is not import' do
it 'creates import_state and sets next_execution_timestamp to now' do
project = create(:project)
Timecop.freeze do
expect do
project.update_attributes(mirror: true, mirror_user_id: project.creator.id, import_url: generate(:url))
end.to change { ProjectImportState.count }.by(1)
expect(project.import_state.next_execution_timestamp).to eq(Time.now)
end
end
end
context 'when project is import' do
it 'sets current import_state next_execution_timestamp to now' do
project = create(:project, import_url: generate(:url))
Timecop.freeze do
expect do
project.update_attributes(mirror: true, mirror_user_id: project.creator.id)
end.not_to change { ProjectImportState.count }
expect(project.import_state.next_execution_timestamp).to eq(Time.now)
end
end
end
end
end
describe '.mirrors_to_sync' do
let(:timestamp) { Time.now }
......@@ -68,14 +114,15 @@ describe Project do
end
context 'when mirror is finished' do
let!(:project) { create(:project, :mirror, :import_finished) }
let!(:project) { create(:project) }
let!(:import_state) { create(:import_state, :mirror, :finished, project: project) }
it 'returns project if next_execution_timestamp is not in the future' do
expect(described_class.mirrors_to_sync(timestamp)).to match_array(project)
end
it 'returns empty if next_execution_timestamp is in the future' do
project.mirror_data.update_attributes(next_execution_timestamp: timestamp + 2.minutes)
import_state.update_attributes(next_execution_timestamp: timestamp + 2.minutes)
expect(described_class.mirrors_to_sync(timestamp)).to be_empty
end
......@@ -89,7 +136,7 @@ describe Project do
end
it 'returns empty if next_execution_timestamp is in the future' do
project.mirror_data.update_attributes(next_execution_timestamp: timestamp + 2.minutes)
project.import_state.update_attributes(next_execution_timestamp: timestamp + 2.minutes)
expect(described_class.mirrors_to_sync(timestamp)).to be_empty
end
......@@ -119,7 +166,7 @@ describe Project do
describe 'hard failing a mirror' do
it 'sends a notification' do
project = create(:project, :mirror, :import_started)
project.mirror_data.update_attributes(retry_count: Gitlab::Mirror::MAX_RETRY)
project.import_state.update_attributes(retry_count: Gitlab::Mirror::MAX_RETRY)
expect_any_instance_of(EE::NotificationService).to receive(:mirror_was_hard_failed).with(project)
......@@ -316,7 +363,7 @@ describe Project do
expect(UpdateAllMirrorsWorker).to receive(:perform_async)
Timecop.freeze(timestamp) do
expect { project.force_import_job! }.to change(project.mirror_data, :next_execution_timestamp).to(timestamp)
expect { project.force_import_job! }.to change(project.import_state, :next_execution_timestamp).to(timestamp)
end
end
......@@ -328,8 +375,8 @@ describe Project do
expect(UpdateAllMirrorsWorker).to receive(:perform_async)
Timecop.freeze(timestamp) do
expect { project.force_import_job! }.to change(project.mirror_data, :retry_count).to(0)
expect(project.mirror_data.next_execution_timestamp).to eq(timestamp)
expect { project.force_import_job! }.to change(project.import_state, :retry_count).to(0)
expect(project.import_state.next_execution_timestamp).to eq(timestamp)
end
end
end
......@@ -366,9 +413,9 @@ describe Project do
describe '#mirror_waiting_duration' do
it 'returns in seconds the time spent in the queue' do
project = create(:project, :mirror, :import_scheduled)
mirror_data = project.mirror_data
import_state = project.import_state
mirror_data.update_attributes(last_update_started_at: mirror_data.last_update_scheduled_at + 5.minutes)
import_state.update_attributes(last_update_started_at: import_state.last_update_scheduled_at + 5.minutes)
expect(project.mirror_waiting_duration).to eq(300)
end
......@@ -378,7 +425,7 @@ describe Project do
it 'returns in seconds the time spent updating' do
project = create(:project, :mirror, :import_started)
project.update_attributes(mirror_last_update_at: project.mirror_data.last_update_started_at + 5.minutes)
project.update_attributes(mirror_last_update_at: project.import_state.last_update_started_at + 5.minutes)
expect(project.mirror_update_duration).to eq(300)
end
......@@ -390,7 +437,7 @@ describe Project do
timestamp = Time.now
project = create(:project, :mirror, :import_finished, :repository)
project.mirror_last_update_at = timestamp - 3.minutes
project.mirror_data.next_execution_timestamp = timestamp - 2.minutes
project.import_state.next_execution_timestamp = timestamp - 2.minutes
expect(project.mirror_about_to_update?).to be true
end
......
......@@ -24,13 +24,15 @@ describe API::ProjectMirror do
context 'when import state is' do
def project_in_state(state)
project = create(:project, :repository, :mirror, state, namespace: user.namespace)
project.mirror_data.update_attributes(next_execution_timestamp: 10.minutes.from_now)
project = create(:project, :repository, namespace: user.namespace)
import_state = create(:import_state, :mirror, state, project: project)
import_state.update_attributes(next_execution_timestamp: 10.minutes.from_now)
project
end
it 'none it triggers the pull mirroring operation' do
project = project_in_state(:import_none)
project = project_in_state(:none)
expect(UpdateAllMirrorsWorker).to receive(:perform_async).once
......@@ -40,7 +42,7 @@ describe API::ProjectMirror do
end
it 'failed it triggers the pull mirroring operation' do
project = project_in_state(:import_failed)
project = project_in_state(:failed)
expect(UpdateAllMirrorsWorker).to receive(:perform_async).once
......@@ -50,7 +52,7 @@ describe API::ProjectMirror do
end
it 'finished it triggers the pull mirroring operation' do
project = project_in_state(:import_finished)
project = project_in_state(:finished)
expect(UpdateAllMirrorsWorker).to receive(:perform_async).once
......@@ -60,7 +62,7 @@ describe API::ProjectMirror do
end
it 'scheduled does not trigger the pull mirroring operation and returns 200' do
project = project_in_state(:import_scheduled)
project = project_in_state(:scheduled)
expect(UpdateAllMirrorsWorker).not_to receive(:perform_async)
......@@ -70,7 +72,7 @@ describe API::ProjectMirror do
end
it 'started does not trigger the pull mirroring operation and returns 200' do
project = project_in_state(:import_started)
project = project_in_state(:started)
expect(UpdateAllMirrorsWorker).not_to receive(:perform_async)
......
......@@ -12,8 +12,6 @@ describe Projects::UpdateMirrorService do
end
it 'does nothing' do
allow_any_instance_of(EE::Project).to receive(:destroy_mirror_data)
expect(project).not_to receive(:fetch_mirror)
result = described_class.new(project, project.owner).execute
......
......@@ -52,7 +52,7 @@ describe 'shared/_mirror_status.html.haml' do
context 'with a hard failed mirror' do
it 'renders hard failed message' do
@project.mirror_data.retry_count = Gitlab::Mirror::MAX_RETRY + 1
@project.import_state.retry_count = Gitlab::Mirror::MAX_RETRY + 1
render 'shared/mirror_status', raw_message: true
......
......@@ -3,7 +3,8 @@ require 'rails_helper'
describe RepositoryUpdateMirrorWorker do
describe '#perform' do
let(:jid) { '12345678' }
let!(:project) { create(:project, :mirror, :import_scheduled) }
let!(:project) { create(:project) }
let!(:import_state) { create(:import_state, :mirror, :scheduled, project: project) }
before do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(true)
......@@ -13,7 +14,7 @@ describe RepositoryUpdateMirrorWorker do
it 'sets status as finished when update mirror service executes successfully' do
expect_any_instance_of(Projects::UpdateMirrorService).to receive(:execute).and_return(status: :success)
expect { subject.perform(project.id) }.to change { project.reload.import_status }.to('finished')
expect { subject.perform(project.id) }.to change { import_state.reload.status }.to('finished')
end
it 'sets status as failed when update mirror service executes with errors' do
......@@ -35,16 +36,17 @@ describe RepositoryUpdateMirrorWorker do
allow_any_instance_of(Projects::UpdateMirrorService).to receive(:execute).and_raise(RuntimeError)
expect { subject.perform(project.id) }.to raise_error(RepositoryUpdateMirrorWorker::UpdateError)
expect(project.reload.import_status).to eq('failed')
expect(import_state.reload.status).to eq('failed')
end
context 'when worker was reset without cleanup' do
let(:started_project) { create(:project, :mirror, :import_started, import_jid: jid) }
let(:started_project) { create(:project) }
let(:import_state) { create(:import_state, :mirror, :started, project: started_project, jid: jid) }
it 'sets status as finished when update mirror service executes successfully' do
expect_any_instance_of(Projects::UpdateMirrorService).to receive(:execute).and_return(status: :success)
expect { subject.perform(started_project.id) }.to change { started_project.reload.import_status }.to('finished')
expect { subject.perform(started_project.id) }.to change { import_state.reload.status }.to('finished')
end
end
......
......@@ -37,7 +37,7 @@ describe UpdateAllMirrorsWorker do
end
def expect_import_status(project, status)
expect(project.reload.import_status).to eq(status)
expect(project.import_state.reload.status).to eq(status)
end
def expect_import_scheduled(*projects)
......@@ -65,7 +65,7 @@ describe UpdateAllMirrorsWorker do
namespace = create(:group, :public, plan: (:bronze_plan if licensed))
project = create(:project, :public, :mirror, namespace: namespace)
project.mirror_data.update!(next_execution_timestamp: at)
project.import_state.update!(next_execution_timestamp: at)
project.update!(visibility_level: Gitlab::VisibilityLevel::PRIVATE)
project
end
......
......@@ -136,6 +136,7 @@ module API
def self.preload_relation(projects_relation, options = {})
projects_relation.preload(:project_feature, :route)
.preload(:import_state)
.preload(namespace: [:route, :owner],
tags: :taggings)
end
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This background migration creates all the records on the
# import state table for projects that are considered imports or forks
class PopulateImportState
def perform(start_id, end_id)
move_attributes_data_to_import_state(start_id, end_id)
rescue ActiveRecord::RecordNotUnique
retry
end
def move_attributes_data_to_import_state(start_id, end_id)
Rails.logger.info("#{self.class.name} - Moving import attributes data to project mirror data table: #{start_id} - #{end_id}")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_mirror_data (project_id, status, jid, last_update_at, last_successful_update_at, last_error)
SELECT id, import_status, import_jid, mirror_last_update_at, mirror_last_successful_update_at, import_error
FROM projects
WHERE projects.import_status != 'none'
AND projects.id BETWEEN #{start_id} AND #{end_id}
AND NOT EXISTS (
SELECT id
FROM project_mirror_data
WHERE project_id = projects.id
)
SQL
ActiveRecord::Base.connection.execute <<~SQL
UPDATE projects
SET import_status = 'none'
WHERE import_status != 'none'
AND id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This background migration migrates all the data of import_state
# back to the projects table for projects that are considered imports or forks
class RollbackImportStateData
def perform(start_id, end_id)
move_attributes_data_to_project(start_id, end_id)
end
def move_attributes_data_to_project(start_id, end_id)
Rails.logger.info("#{self.class.name} - Moving import attributes data to projects table: #{start_id} - #{end_id}")
if Gitlab::Database.mysql?
ActiveRecord::Base.connection.execute <<~SQL
UPDATE projects, project_mirror_data
SET
projects.import_status = project_mirror_data.status,
projects.import_jid = project_mirror_data.jid,
projects.mirror_last_update_at = project_mirror_data.last_update_at,
projects.mirror_last_successful_update_at = project_mirror_data.last_successful_update_at,
projects.import_error = project_mirror_data.last_error
WHERE project_mirror_data.project_id = projects.id
AND project_mirror_data.id BETWEEN #{start_id} AND #{end_id}
SQL
else
ActiveRecord::Base.connection.execute <<~SQL
UPDATE projects
SET
import_status = project_mirror_data.status,
import_jid = project_mirror_data.jid,
mirror_last_update_at = project_mirror_data.last_update_at,
mirror_last_successful_update_at = project_mirror_data.last_successful_update_at,
import_error = project_mirror_data.last_error
FROM project_mirror_data
WHERE project_mirror_data.project_id = projects.id
AND project_mirror_data.id BETWEEN #{start_id} AND #{end_id}
SQL
end
end
end
end
end
......@@ -43,7 +43,8 @@ module Gitlab
Gitlab::SidekiqStatus
.set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
project.update_column(:import_jid, jid)
project.ensure_import_state
project.import_state&.update_column(:jid, jid)
Stage::ImportRepositoryWorker
.perform_async(project.id)
......
......@@ -78,7 +78,9 @@ module Gitlab
def handle_errors
return unless errors.any?
project.update_column(:import_error, {
project.ensure_import_state
project.import_state&.update_column(:last_error, {
message: 'The remote data could not be fully imported.',
errors: errors
}.to_json)
......
FactoryBot.define do
factory :import_state, class: ProjectImportState do
status :none
association :project, factory: :project
transient do
import_url { generate(:url) }
end
trait :repository do
association :project, factory: [:project, :repository]
end
trait :mirror do
transient do
mirror true
import_url { generate(:url) }
end
before(:create) do |import_state, evaluator|
project = import_state.project
project.update_columns(mirror: evaluator.mirror,
import_url: evaluator.import_url,
mirror_user_id: project.creator_id)
end
end
trait :none do
status :none
end
trait :scheduled do
status :scheduled
last_update_scheduled_at { Time.now }
end
trait :started do
status :started
last_update_started_at { Time.now }
end
trait :finished do
timestamp = Time.now
status :finished
last_update_at timestamp
last_successful_update_at timestamp
end
trait :failed do
status :failed
last_update_at { Time.now }
end
trait :hard_failed do
status :failed
retry_count { Gitlab::Mirror::MAX_RETRY + 1 }
last_update_at { Time.now - 1.minute }
end
after(:create) do |import_state, evaluator|
import_state.project.update_columns(import_url: evaluator.import_url)
end
end
end
......@@ -69,47 +69,87 @@ FactoryBot.define do
end
trait :import_none do
import_status :none
transient do
status :none
end
before(:create) do |project, evaluator|
project.create_import_state(:status, evaluator.status)
end
end
trait :import_scheduled do
import_status :scheduled
transient do
status :scheduled
last_update_scheduled_at Time.now
end
after(:create) do |project, _|
project.mirror_data&.update_attributes(last_update_scheduled_at: Time.now)
before(:create) do |project, evaluator|
project.create_import_state(status: evaluator.status,
last_update_scheduled_at: evaluator.last_update_scheduled_at)
end
end
trait :import_started do
import_status :started
transient do
status :started
last_update_started_at Time.now
end
after(:create) do |project, _|
project.mirror_data&.update_attributes(last_update_started_at: Time.now)
before(:create) do |project, evaluator|
project.create_import_state(status: evaluator.status,
last_update_started_at: evaluator.last_update_started_at)
end
end
trait :import_finished do
timestamp = Time.now
transient do
timestamp = Time.now
import_status :finished
mirror_last_update_at timestamp
mirror_last_successful_update_at timestamp
status :finished
last_update_at timestamp
last_successful_update_at timestamp
end
before(:create) do |project, evaluator|
project.create_import_state(status: evaluator.status,
last_update_at: evaluator.last_update_at,
last_successful_update_at: evaluator.last_successful_update_at)
end
end
trait :import_failed do
import_status :failed
mirror_last_update_at { Time.now }
transient do
status :failed
last_update_at { Time.now }
end
before(:create) do |project, evaluator|
project.create_import_state(status: evaluator.status,
last_update_at: evaluator.last_update_at)
end
end
trait :import_hard_failed do
import_status :failed
mirror_last_update_at { Time.now - 1.minute }
transient do
status :failed
retry_count Gitlab::Mirror::MAX_RETRY + 1
last_update_at Time.now - 1.minute
end
after(:create) do |project|
project.mirror_data&.update_attributes(retry_count: Gitlab::Mirror::MAX_RETRY + 1)
before(:create) do |project, evaluator|
project.create_import_state(status: evaluator.status,
retry_count: evaluator.retry_count,
last_update_at: evaluator.last_update_at)
end
end
trait :disabled_mirror do
mirror false
import_url { generate(:url) }
mirror_user_id { creator_id }
end
trait :mirror do
mirror true
import_url { generate(:url) }
......
......@@ -46,7 +46,7 @@ feature 'Import/Export - project import integration test', :js do
expect(project.merge_requests).not_to be_empty
expect(project_hook_exists?(project)).to be true
expect(wiki_exists?(project)).to be true
expect(project.import_status).to eq('finished')
expect(project.import_state.status).to eq('finished')
end
end
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::PopulateImportState, :migration, schema: 20180430144643 do
let(:migration) { described_class.new }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:import_state) { table(:project_mirror_data) }
before do
namespaces.create(id: 1, name: 'gitlab-org', path: 'gitlab-org')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab1',
path: 'gitlab1', import_error: "foo", import_status: :started,
import_url: generate(:url))
projects.create!(id: 2, namespace_id: 1, name: 'gitlab2', path: 'gitlab2',
import_status: :none, import_url: generate(:url))
projects.create!(id: 3, namespace_id: 1, name: 'gitlab3',
path: 'gitlab3', import_error: "bar", import_status: :failed,
import_url: generate(:url))
allow(BackgroundMigrationWorker).to receive(:perform_in)
end
it "creates new import_state records with project's import data" do
expect(projects.where.not(import_status: :none).count).to eq(2)
expect do
migration.perform(1, 3)
end.to change { import_state.all.count }.from(0).to(2)
expect(import_state.first.last_error).to eq("foo")
expect(import_state.last.last_error).to eq("bar")
expect(import_state.first.status).to eq("started")
expect(import_state.last.status).to eq("failed")
expect(projects.first.import_status).to eq("none")
expect(projects.last.import_status).to eq("none")
end
end
require 'spec_helper'
describe Gitlab::BackgroundMigration::RollbackImportStateData, :migration, schema: 20180430144643 do
let(:migration) { described_class.new }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:import_state) { table(:project_mirror_data) }
before do
namespaces.create(id: 1, name: 'gitlab-org', path: 'gitlab-org')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab1', import_url: generate(:url))
projects.create!(id: 2, namespace_id: 1, name: 'gitlab2', path: 'gitlab2', import_url: generate(:url))
import_state.create!(id: 1, project_id: 1, status: :started, last_error: "foo")
import_state.create!(id: 2, project_id: 2, status: :failed)
allow(BackgroundMigrationWorker).to receive(:perform_in)
end
it "creates new import_state records with project's import data" do
migration.perform(1, 2)
expect(projects.first.import_status).to eq("started")
expect(projects.second.import_status).to eq("failed")
expect(projects.first.import_error).to eq("foo")
end
end
......@@ -2,6 +2,7 @@ require 'spec_helper'
describe Gitlab::GithubImport::Importer::RepositoryImporter do
let(:repository) { double(:repository) }
let(:import_state) { double(:import_state) }
let(:client) { double(:client) }
let(:project) do
......@@ -12,7 +13,8 @@ describe Gitlab::GithubImport::Importer::RepositoryImporter do
repository_storage: 'foo',
disk_path: 'foo',
repository: repository,
create_wiki: true
create_wiki: true,
import_state: import_state
)
end
......
......@@ -12,6 +12,8 @@ describe Gitlab::GithubImport::ParallelImporter do
let(:importer) { described_class.new(project) }
before do
create(:import_state, :started, project: project)
expect(Gitlab::GithubImport::Stage::ImportRepositoryWorker)
.to receive(:perform_async)
.with(project.id)
......@@ -34,7 +36,7 @@ describe Gitlab::GithubImport::ParallelImporter do
it 'updates the import JID of the project' do
importer.execute
expect(project.import_jid).to eq("github-importer/#{project.id}")
expect(project.reload.import_jid).to eq("github-importer/#{project.id}")
end
end
end
......@@ -307,7 +307,7 @@ project:
- statistics
- container_repositories
- uploads
- mirror_data
- import_state
- repository_state
- source_pipelines
- sourced_pipelines
......
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180430144643_migrate_import_attributes_data_from_projects_to_project_mirror_data.rb')
describe MigrateImportAttributesDataFromProjectsToProjectMirrorData, :sidekiq, :migration do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:import_state) { table(:project_mirror_data) }
before do
stub_const("#{described_class}::BATCH_SIZE", 1)
namespaces.create(id: 1, name: 'gitlab-org', path: 'gitlab-org')
projects.create!(id: 1, namespace_id: 1, name: 'gitlab1',
path: 'gitlab1', import_error: "foo", import_status: :started,
import_url: generate(:url))
projects.create!(id: 2, namespace_id: 1, name: 'gitlab2',
path: 'gitlab2', import_error: "bar", import_status: :failed,
import_url: generate(:url))
projects.create!(id: 3, namespace_id: 1, name: 'gitlab3', path: 'gitlab3', import_status: :none, import_url: generate(:url))
end
it 'schedules delayed background migrations in batches in bulk' do
Sidekiq::Testing.fake! do
Timecop.freeze do
expect(projects.where.not(import_status: :none).count).to eq(2)
subject.up
expect(BackgroundMigrationWorker.jobs.size).to eq 2
expect(described_class::UP_MIGRATION).to be_scheduled_delayed_migration(5.minutes, 1, 1)
expect(described_class::UP_MIGRATION).to be_scheduled_delayed_migration(10.minutes, 2, 2)
end
end
end
describe '#down' do
before do
import_state.create!(id: 1, project_id: 1, status: :started)
import_state.create!(id: 2, project_id: 2, status: :started)
end
it 'schedules delayed background migrations in batches in bulk for rollback' do
Sidekiq::Testing.fake! do
Timecop.freeze do
expect(import_state.where.not(status: :none).count).to eq(2)
subject.down
expect(BackgroundMigrationWorker.jobs.size).to eq 2
expect(described_class::DOWN_MIGRATION).to be_scheduled_delayed_migration(5.minutes, 1, 1)
expect(described_class::DOWN_MIGRATION).to be_scheduled_delayed_migration(10.minutes, 2, 2)
end
end
end
end
end
require 'rails_helper'
describe ProjectImportState, type: :model do
subject { create(:import_state) }
describe 'associations' do
it { is_expected.to belong_to(:project) }
end
describe 'validations' do
it { is_expected.to validate_presence_of(:project) }
end
end
......@@ -279,16 +279,12 @@ describe Project do
expect(project2.errors[:import_url].first).to include('Only allowed ports are 22, 80, 443')
end
it 'creates mirror data when enabled' do
project2 = create(:project, :mirror, mirror: false)
expect { project2.update_attributes(mirror: true) }.to change { ProjectMirrorData.count }.from(0).to(1)
end
it 'destroys mirror data when disabled' do
project2 = create(:project, :mirror)
it 'creates import state when mirror gets enabled' do
project2 = create(:project)
expect { project2.update_attributes(mirror: false) }.to change { ProjectMirrorData.count }.from(1).to(0)
expect do
project2.update_attributes(mirror: true, import_url: generate(:url), mirror_user: project.creator)
end.to change { ProjectImportState.where(project: project2).count }.from(0).to(1)
end
describe 'project pending deletion' do
......@@ -1698,7 +1694,7 @@ describe Project do
context 'when project is not a mirror' do
it 'returns the sanitized URL' do
project = create(:project, import_status: 'started', import_url: 'http://user:pass@test.com')
project = create(:project, :import_started, import_url: 'http://user:pass@test.com')
project.import_finish
......@@ -1870,7 +1866,8 @@ describe Project do
it 'resets project import_error' do
error_message = 'Some error'
mirror = create(:project_empty_repo, :import_started, import_error: error_message)
mirror = create(:project_empty_repo, :import_started)
mirror.import_state.update_attributes(last_error: error_message)
expect { mirror.import_finish }.to change { mirror.import_error }.from(error_message).to(nil)
end
......@@ -2615,11 +2612,11 @@ describe Project do
end
end
describe '#create_mirror_data' do
describe '#create_import_state' do
it 'it is called after save' do
project = create(:project)
expect(project).to receive(:create_mirror_data)
expect(project).to receive(:create_import_state)
project.update(mirror: true, mirror_user: project.owner, import_url: 'http://foo.com')
end
......@@ -3641,7 +3638,7 @@ describe Project do
end
context 'branch protection' do
let(:project) { create(:project, :repository) }
let(:project) { create(:project, :repository, :import_started) }
it 'does not protect when branch protection is disabled' do
stub_application_setting(default_branch_protection: Gitlab::Access::PROTECTION_NONE)
......@@ -3714,7 +3711,8 @@ describe Project do
context 'with an import JID' do
it 'unsets the import JID' do
project = create(:project, import_jid: '123')
project = create(:project)
create(:import_state, project: project, jid: '123')
expect(Gitlab::SidekiqStatus)
.to receive(:unset)
......
......@@ -145,7 +145,7 @@ describe API::ProjectImport do
describe 'GET /projects/:id/import' do
it 'returns the import status' do
project = create(:project, import_status: 'started')
project = create(:project, :import_started)
project.add_master(user)
get api("/projects/#{project.id}/import", user)
......@@ -155,8 +155,9 @@ describe API::ProjectImport do
end
it 'returns the import status and the error if failed' do
project = create(:project, import_status: 'failed', import_error: 'error')
project = create(:project, :import_failed)
project.add_master(user)
project.import_state.update_attributes(last_error: 'error')
get api("/projects/#{project.id}/import", user)
......
......@@ -23,7 +23,7 @@ describe Projects::CreateFromTemplateService do
project = subject.execute
expect(project).to be_saved
expect(project.scheduled?).to be(true)
expect(project.import_scheduled?).to be(true)
end
context 'the result project' do
......
......@@ -4,9 +4,10 @@ describe "projects/imports/new.html.haml" do
let(:user) { create(:user) }
context 'when import fails' do
let(:project) { create(:project_empty_repo, import_status: :failed, import_error: '<a href="http://googl.com">Foo</a>', import_type: :gitlab_project, import_source: '/var/opt/gitlab/gitlab-rails/shared/tmp/project_exports/uploads/t.tar.gz', import_url: nil) }
let(:project) { create(:project_empty_repo, :import_failed, import_type: :gitlab_project, import_source: '/var/opt/gitlab/gitlab-rails/shared/tmp/project_exports/uploads/t.tar.gz', import_url: nil) }
before do
project.import_state.update_attributes(last_error: '<a href="http://googl.com">Foo</a>')
sign_in(user)
project.add_master(user)
end
......
require 'spec_helper'
describe Gitlab::GithubImport::AdvanceStageWorker, :clean_gitlab_redis_shared_state do
let(:project) { create(:project, import_jid: '123') }
let(:project) { create(:project) }
let(:import_state) { create(:import_state, project: project, jid: '123') }
let(:worker) { described_class.new }
describe '#perform' do
......@@ -105,7 +106,8 @@ describe Gitlab::GithubImport::AdvanceStageWorker, :clean_gitlab_redis_shared_st
# This test is there to make sure we only select the columns we care
# about.
expect(found.attributes).to eq({ 'id' => nil, 'import_jid' => '123' })
# TODO: enable this assertion back again
# expect(found.attributes).to include({ 'id' => nil, 'import_jid' => '123' })
end
it 'returns nil if the project import is not running' do
......
......@@ -14,7 +14,8 @@ describe Gitlab::GithubImport::RefreshImportJidWorker do
end
describe '#perform' do
let(:project) { create(:project, import_jid: '123abc') }
let(:project) { create(:project) }
let(:import_state) { create(:import_state, project: project, jid: '123abc') }
context 'when the project does not exist' do
it 'does nothing' do
......@@ -70,20 +71,21 @@ describe Gitlab::GithubImport::RefreshImportJidWorker do
describe '#find_project' do
it 'returns a Project' do
project = create(:project, import_status: 'started')
project = create(:project, :import_started)
expect(worker.find_project(project.id)).to be_an_instance_of(Project)
end
it 'only selects the import JID field' do
project = create(:project, import_status: 'started', import_jid: '123abc')
expect(worker.find_project(project.id).attributes)
.to eq({ 'id' => nil, 'import_jid' => '123abc' })
end
# it 'only selects the import JID field' do
# project = create(:project, :import_started)
# project.import_state.update_attributes(jid: '123abc')
#
# expect(worker.find_project(project.id).attributes)
# .to eq({ 'id' => nil, 'import_jid' => '123abc' })
# end
it 'returns nil for a project for which the import process failed' do
project = create(:project, import_status: 'failed')
project = create(:project, :import_failed)
expect(worker.find_project(project.id)).to be_nil
end
......
......@@ -11,10 +11,12 @@ describe RepositoryImportWorker do
let(:project) { create(:project, :import_scheduled) }
context 'when worker was reset without cleanup' do
let(:jid) { '12345678' }
let(:started_project) { create(:project, :import_started, import_jid: jid) }
it 'imports the project successfully' do
jid = '12345678'
started_project = create(:project)
create(:import_state, :started, project: started_project, jid: jid)
allow(subject).to receive(:jid).and_return(jid)
expect_any_instance_of(Projects::ImportService).to receive(:execute)
......
......@@ -48,13 +48,21 @@ describe StuckImportJobsWorker do
describe 'with scheduled import_status' do
it_behaves_like 'project import job detection' do
let(:project) { create(:project, :import_scheduled, import_jid: '123') }
let(:project) { create(:project, :import_scheduled) }
before do
project.import_state.update_attributes(jid: '123')
end
end
end
describe 'with started import_status' do
it_behaves_like 'project import job detection' do
let(:project) { create(:project, :import_started, import_jid: '123') }
let(:project) { create(:project, :import_started) }
before do
project.import_state.update_attributes(jid: '123')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment