Commit f56f4d32 authored by Valery Sizov's avatar Valery Sizov

Geo: Improve error handling[ci skip]

parent 2b6aa746
......@@ -4,6 +4,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
validates :project, presence: true, uniqueness: true
scope :dirty, -> { where(arel_table[:resync_repository].eq(true).or(arel_table[:resync_wiki].eq(true))) }
scope :to_be_retried, -> { where(arel_table[:repository_retry_at].lt(Time.now).or(arel_table[:wiki_retry_at].lt(Time.now))) }
def self.failed
repository_sync_failed = arel_table[:last_repository_synced_at].not_eq(nil)
......@@ -39,10 +40,16 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
end
def repository_sync_needed?(timestamp)
resync_repository? && (last_repository_synced_at.nil? || timestamp > last_repository_synced_at)
return false unless resync_repository?
return false if timestamp < repository_retry_at
last_repository_synced_at.nil? || timestamp > last_repository_synced_at
end
def wiki_sync_needed?(timestamp)
resync_wiki? && (last_wiki_synced_at.nil? || timestamp > last_wiki_synced_at)
return false unless resync_wiki?
return false if timestamp < wiki_retry_at
last_wiki_synced_at.nil? || timestamp > last_wiki_synced_at
end
end
......@@ -15,6 +15,8 @@ module Geo
LEASE_TIMEOUT = 8.hours.freeze
LEASE_KEY_PREFIX = 'geo_sync_service'.freeze
RETRY_BEFORE_REDOWNLOAD = 5
RETRY_LIMIT = 8
def initialize(project)
@project = project
......@@ -23,7 +25,18 @@ module Geo
def execute
try_obtain_lease do
log_info("Started #{type} sync")
if should_be_retried?
sync_repository
elsif should_be_redownloaded?
sync_repository(true)
else
# Clean up the state of sync to start a new cycle
registry.delete
log_info("Clean up #{type} sync status")
return
end
log_info("Finished #{type} sync")
end
end
......@@ -48,6 +61,22 @@ module Geo
private
def retry_count
registry.public_send("#{type}_retry_count") || 0
end
def should_be_retried?
retry_count <= RETRY_BEFORE_REDOWNLOAD
end
def should_be_redownloaded?
(RETRY_BEFORE_REDOWNLOAD..RETRY_LIMIT) === retry_count
end
def delay(retry_count = 0)
(retry_count ** 4) + 15 + (rand(30) * (retry_count + 1))
end
def sync_repository
raise NotImplementedError, 'This class should implement sync_repository method'
end
......@@ -101,11 +130,17 @@ module Geo
attrs = {}
attrs["last_#{type}_synced_at"] = started_at if started_at
if started_at
attrs["last_#{type}_synced_at"] = started_at
attrs["#{type}_retry_count"] = retry_count + 1
attrs["#{type}_retry_at"] = Time.now + delay(retry_count).seconds
end
if finished_at
attrs["last_#{type}_successful_sync_at"] = finished_at
attrs["resync_#{type}"] = false
attrs["#{type}_retry_count"] = nil
attrs["#{type}_retry_at"] = nil
end
registry.update!(attrs)
......
require 'tmpdir'
module Geo
class RepositorySyncService < BaseSyncService
self.type = :repository
private
def sync_repository
fetch_project_repository
def sync_repository(with_backup = false)
fetch_project_repository(with_backup)
expire_repository_caches
end
def fetch_project_repository
log_info('Fetching project repository')
def fetch_project_repository(with_backup)
log_info('Trying to fetch project repository')
update_registry(started_at: DateTime.now)
begin
if with_backup
log_info('Backup enabled')
actual_path = project.repository.path_to_repo
backup_path = File.join(Dir.mktmpdir, project.path)
# Creating a backup copy and removing the main repo
FileUtils.mv(actual_path, backup_path)
end
project.ensure_repository
fetch_geo_mirror(project.repository)
update_registry(finished_at: DateTime.now)
log_info("Finished repository sync",
if with_backup
log_info('Removing backup copy as the repository was redownloaded successfully')
FileUtils.rm_r(backup_path)
end
update_registry(finished_at: DateTime.now)
log_info('Finished repository sync',
update_delay_s: update_delay_in_seconds,
download_time_s: download_time_in_seconds)
rescue Gitlab::Shell::Error,
......@@ -29,6 +44,10 @@ module Geo
log_error('Invalid repository', e)
log_info('Expiring caches')
project.repository.after_create
ensure
# Backup can only exist if redownload was unsuccessful
if with_backup && File.exist?(backup_path)
FileUtils.mv(backup_path, actual_path)
end
end
......
......@@ -4,20 +4,34 @@ module Geo
private
def sync_repository
fetch_wiki_repository
def sync_repository(with_backup = false)
fetch_wiki_repository(with_backup)
end
def fetch_wiki_repository
def fetch_wiki_repository(with_backup)
log_info('Fetching wiki repository')
update_registry(started_at: DateTime.now)
begin
if with_backup
log_info('Backup enabled')
actual_path = project.wiki.path_to_repo
backup_path = File.join(Dir.mktmpdir, 'wiki')
# Creating a backup copy and removing the main wiki
FileUtils.mv(actual_path, backup_path)
end
project.wiki.ensure_repository
fetch_geo_mirror(project.wiki.repository)
if with_backup
log_info('Removing backup copy as the repository was redownloaded successfully')
FileUtils.rm_r(backup_path)
end
update_registry(finished_at: DateTime.now)
log_info("Finished wiki sync",
log_info('Finished wiki sync',
update_delay_s: update_delay_in_seconds,
download_time_s: download_time_in_seconds)
rescue Gitlab::Git::Repository::NoRepository,
......@@ -26,7 +40,16 @@ module Geo
ProjectWiki::CouldNotCreateWikiError,
Geo::EmptyCloneUrlPrefixError => e
log_error('Error syncing wiki repository', e)
ensure
# Backup can only exist if redownload was unsuccessful
if with_backup && File.exist?(backup_path)
FileUtils.mv(backup_path, actual_path)
end
end
def fetch_wiki_repository_with_backup
# TODO: replace with actual implementation
fetch_wiki_repository
end
def ssh_url_to_wiki
......
......@@ -33,6 +33,7 @@ module Geo
def find_project_ids_updated_recently(batch_size:)
current_node.project_registries
.dirty
.to_be_retried
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(batch_size)
.pluck(:project_id)
......
class AddRetryCountFieldsToRegistries < ActiveRecord::Migration
def change
add_column :file_registry, :retry_count, :integer
add_column :project_registry, :repository_retry_count, :integer
add_column :project_registry, :wiki_retry_count, :integer
add_column :project_registry, :repository_retry_at, :datetime
add_column :project_registry, :wiki_retry_at, :datetime
add_column :file_registry, :retry_at, :datetime
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20171009162209) do
ActiveRecord::Schema.define(version: 20171101105200) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -26,6 +26,8 @@ ActiveRecord::Schema.define(version: 20171009162209) do
t.string "sha256"
t.datetime "created_at", null: false
t.boolean "success", default: false, null: false
t.integer "retry_count"
t.datetime "retry_at"
end
add_index "file_registry", ["file_type", "file_id"], name: "index_file_registry_on_file_type_and_file_id", unique: true, using: :btree
......@@ -41,6 +43,10 @@ ActiveRecord::Schema.define(version: 20171009162209) do
t.boolean "resync_wiki", default: true, null: false
t.datetime "last_wiki_synced_at"
t.datetime "last_wiki_successful_sync_at"
t.integer "repository_retry_count"
t.integer "wiki_retry_count"
t.datetime "repository_retry_at"
t.datetime "wiki_retry_at"
end
add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment