Commit e9478858 authored by Valery Sizov's avatar Valery Sizov

Geo: Add progressive backoff for file download retries[ci skip]

parent 3ec71075
class Geo::FileRegistry < Geo::BaseRegistry
scope :failed, -> { where(success: false) }
scope :synced, -> { where(success: true) }
scope :to_be_retried, -> { where('retry_at < ?', Time.now) }
end
......@@ -6,6 +6,7 @@ module Geo
class BaseSyncService
include ExclusiveLeaseGuard
include ::Gitlab::Geo::ProjectLogHelpers
include Delay
class << self
attr_accessor :type
......@@ -77,11 +78,6 @@ module Geo
(RETRY_BEFORE_REDOWNLOAD..RETRY_LIMIT) === retry_count
end
# Progressive backoff
def delay(retry_count = 0)
(retry_count ** 4) + 15 + (rand(30) * (retry_count + 1))
end
def sync_repository
raise NotImplementedError, 'This class should implement sync_repository method'
end
......@@ -138,7 +134,7 @@ module Geo
if started_at
attrs["last_#{type}_synced_at"] = started_at
attrs["#{type}_retry_count"] = retry_count + 1
attrs["#{type}_retry_at"] = Time.now + delay(retry_count).seconds
attrs["#{type}_retry_at"] = Time.now + delay(attrs["#{type}_retry_count"]).seconds
end
if finished_at
......
......@@ -2,6 +2,8 @@ module Geo
class FileDownloadService < FileService
LEASE_TIMEOUT = 8.hours.freeze
include Delay
def execute
try_obtain_lease do |lease|
start_time = Time.now
......@@ -45,6 +47,13 @@ module Geo
transfer.bytes = bytes_downloaded
transfer.success = success
unless success
# We don't limit the amount of retries
transfer.retry_count = (transfer.retry_count || 0) + 1
transfer.retry_at = Time.now + delay(transfer.retry_count).seconds
end
transfer.save
end
......
......@@ -29,7 +29,7 @@ module Geo
if with_backup
log_info('Removing backup copy as the repository was redownloaded successfully')
FileUtils.rm_r(backup_path)
FileUtils.rm_rf(backup_path)
end
update_registry(finished_at: DateTime.now)
......@@ -43,11 +43,11 @@ module Geo
rescue Gitlab::Git::Repository::NoRepository => e
log_error('Invalid repository', e)
registry.update(force_to_redownload_repository: true)
log_info('Expiring caches')
project.repository.after_create
expire_repository_caches
ensure
# Backup can only exist if redownload was unsuccessful
if with_backup && File.exist?(backup_path)
FileUtils.rm_rf(actual_path)
FileUtils.mv(backup_path, actual_path)
end
end
......
......@@ -49,11 +49,6 @@ module Geo
end
end
def fetch_wiki_repository_with_backup
# TODO: replace with actual implementation
fetch_wiki_repository
end
def ssh_url_to_wiki
"#{primary_ssh_path_prefix}#{project.full_path}.wiki.git"
end
......
module Delay
# Progressive backoff. It's copied from Sidekiq as is
def delay(retry_count = 0)
(retry_count ** 4) + 15 + (rand(30) * (retry_count + 1))
end
end
......@@ -33,6 +33,7 @@ module Geo
def find_failed_objects(batch_size:)
Geo::FileRegistry
.failed
.to_be_retried
.limit(batch_size)
.pluck(:file_id, :file_type)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment