Commit f56f4d32 authored by Valery Sizov's avatar Valery Sizov

Geo: Improve error handling[ci skip]

parent 2b6aa746
...@@ -4,6 +4,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -4,6 +4,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
validates :project, presence: true, uniqueness: true validates :project, presence: true, uniqueness: true
scope :dirty, -> { where(arel_table[:resync_repository].eq(true).or(arel_table[:resync_wiki].eq(true))) } scope :dirty, -> { where(arel_table[:resync_repository].eq(true).or(arel_table[:resync_wiki].eq(true))) }
scope :to_be_retried, -> { where(arel_table[:repository_retry_at].lt(Time.now).or(arel_table[:wiki_retry_at].lt(Time.now))) }
def self.failed def self.failed
repository_sync_failed = arel_table[:last_repository_synced_at].not_eq(nil) repository_sync_failed = arel_table[:last_repository_synced_at].not_eq(nil)
...@@ -39,10 +40,16 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -39,10 +40,16 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
end end
def repository_sync_needed?(timestamp) def repository_sync_needed?(timestamp)
resync_repository? && (last_repository_synced_at.nil? || timestamp > last_repository_synced_at) return false unless resync_repository?
return false if timestamp < repository_retry_at
last_repository_synced_at.nil? || timestamp > last_repository_synced_at
end end
def wiki_sync_needed?(timestamp) def wiki_sync_needed?(timestamp)
resync_wiki? && (last_wiki_synced_at.nil? || timestamp > last_wiki_synced_at) return false unless resync_wiki?
return false if timestamp < wiki_retry_at
last_wiki_synced_at.nil? || timestamp > last_wiki_synced_at
end end
end end
...@@ -15,6 +15,8 @@ module Geo ...@@ -15,6 +15,8 @@ module Geo
LEASE_TIMEOUT = 8.hours.freeze LEASE_TIMEOUT = 8.hours.freeze
LEASE_KEY_PREFIX = 'geo_sync_service'.freeze LEASE_KEY_PREFIX = 'geo_sync_service'.freeze
RETRY_BEFORE_REDOWNLOAD = 5
RETRY_LIMIT = 8
def initialize(project) def initialize(project)
@project = project @project = project
...@@ -23,7 +25,18 @@ module Geo ...@@ -23,7 +25,18 @@ module Geo
def execute def execute
try_obtain_lease do try_obtain_lease do
log_info("Started #{type} sync") log_info("Started #{type} sync")
if should_be_retried?
sync_repository sync_repository
elsif should_be_redownloaded?
sync_repository(true)
else
# Clean up the state of sync to start a new cycle
registry.delete
log_info("Clean up #{type} sync status")
return
end
log_info("Finished #{type} sync") log_info("Finished #{type} sync")
end end
end end
...@@ -48,6 +61,22 @@ module Geo ...@@ -48,6 +61,22 @@ module Geo
private private
def retry_count
registry.public_send("#{type}_retry_count") || 0
end
def should_be_retried?
retry_count <= RETRY_BEFORE_REDOWNLOAD
end
def should_be_redownloaded?
(RETRY_BEFORE_REDOWNLOAD..RETRY_LIMIT) === retry_count
end
def delay(retry_count = 0)
(retry_count ** 4) + 15 + (rand(30) * (retry_count + 1))
end
def sync_repository def sync_repository
raise NotImplementedError, 'This class should implement sync_repository method' raise NotImplementedError, 'This class should implement sync_repository method'
end end
...@@ -101,11 +130,17 @@ module Geo ...@@ -101,11 +130,17 @@ module Geo
attrs = {} attrs = {}
attrs["last_#{type}_synced_at"] = started_at if started_at if started_at
attrs["last_#{type}_synced_at"] = started_at
attrs["#{type}_retry_count"] = retry_count + 1
attrs["#{type}_retry_at"] = Time.now + delay(retry_count).seconds
end
if finished_at if finished_at
attrs["last_#{type}_successful_sync_at"] = finished_at attrs["last_#{type}_successful_sync_at"] = finished_at
attrs["resync_#{type}"] = false attrs["resync_#{type}"] = false
attrs["#{type}_retry_count"] = nil
attrs["#{type}_retry_at"] = nil
end end
registry.update!(attrs) registry.update!(attrs)
......
require 'tmpdir'
module Geo module Geo
class RepositorySyncService < BaseSyncService class RepositorySyncService < BaseSyncService
self.type = :repository self.type = :repository
private private
def sync_repository def sync_repository(with_backup = false)
fetch_project_repository fetch_project_repository(with_backup)
expire_repository_caches expire_repository_caches
end end
def fetch_project_repository def fetch_project_repository(with_backup)
log_info('Fetching project repository') log_info('Trying to fetch project repository')
update_registry(started_at: DateTime.now) update_registry(started_at: DateTime.now)
begin if with_backup
log_info('Backup enabled')
actual_path = project.repository.path_to_repo
backup_path = File.join(Dir.mktmpdir, project.path)
# Creating a backup copy and removing the main repo
FileUtils.mv(actual_path, backup_path)
end
project.ensure_repository project.ensure_repository
fetch_geo_mirror(project.repository) fetch_geo_mirror(project.repository)
update_registry(finished_at: DateTime.now)
log_info("Finished repository sync", if with_backup
log_info('Removing backup copy as the repository was redownloaded successfully')
FileUtils.rm_r(backup_path)
end
update_registry(finished_at: DateTime.now)
log_info('Finished repository sync',
update_delay_s: update_delay_in_seconds, update_delay_s: update_delay_in_seconds,
download_time_s: download_time_in_seconds) download_time_s: download_time_in_seconds)
rescue Gitlab::Shell::Error, rescue Gitlab::Shell::Error,
...@@ -29,6 +44,10 @@ module Geo ...@@ -29,6 +44,10 @@ module Geo
log_error('Invalid repository', e) log_error('Invalid repository', e)
log_info('Expiring caches') log_info('Expiring caches')
project.repository.after_create project.repository.after_create
ensure
# Backup can only exist if redownload was unsuccessful
if with_backup && File.exist?(backup_path)
FileUtils.mv(backup_path, actual_path)
end end
end end
......
...@@ -4,20 +4,34 @@ module Geo ...@@ -4,20 +4,34 @@ module Geo
private private
def sync_repository def sync_repository(with_backup = false)
fetch_wiki_repository fetch_wiki_repository(with_backup)
end end
def fetch_wiki_repository def fetch_wiki_repository(with_backup)
log_info('Fetching wiki repository') log_info('Fetching wiki repository')
update_registry(started_at: DateTime.now) update_registry(started_at: DateTime.now)
begin if with_backup
log_info('Backup enabled')
actual_path = project.wiki.path_to_repo
backup_path = File.join(Dir.mktmpdir, 'wiki')
# Creating a backup copy and removing the main wiki
FileUtils.mv(actual_path, backup_path)
end
project.wiki.ensure_repository project.wiki.ensure_repository
fetch_geo_mirror(project.wiki.repository) fetch_geo_mirror(project.wiki.repository)
if with_backup
log_info('Removing backup copy as the repository was redownloaded successfully')
FileUtils.rm_r(backup_path)
end
update_registry(finished_at: DateTime.now) update_registry(finished_at: DateTime.now)
log_info("Finished wiki sync", log_info('Finished wiki sync',
update_delay_s: update_delay_in_seconds, update_delay_s: update_delay_in_seconds,
download_time_s: download_time_in_seconds) download_time_s: download_time_in_seconds)
rescue Gitlab::Git::Repository::NoRepository, rescue Gitlab::Git::Repository::NoRepository,
...@@ -26,7 +40,16 @@ module Geo ...@@ -26,7 +40,16 @@ module Geo
ProjectWiki::CouldNotCreateWikiError, ProjectWiki::CouldNotCreateWikiError,
Geo::EmptyCloneUrlPrefixError => e Geo::EmptyCloneUrlPrefixError => e
log_error('Error syncing wiki repository', e) log_error('Error syncing wiki repository', e)
ensure
# Backup can only exist if redownload was unsuccessful
if with_backup && File.exist?(backup_path)
FileUtils.mv(backup_path, actual_path)
end
end end
def fetch_wiki_repository_with_backup
# TODO: replace with actual implementation
fetch_wiki_repository
end end
def ssh_url_to_wiki def ssh_url_to_wiki
......
...@@ -33,6 +33,7 @@ module Geo ...@@ -33,6 +33,7 @@ module Geo
def find_project_ids_updated_recently(batch_size:) def find_project_ids_updated_recently(batch_size:)
current_node.project_registries current_node.project_registries
.dirty .dirty
.to_be_retried
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc)) .order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(batch_size) .limit(batch_size)
.pluck(:project_id) .pluck(:project_id)
......
class AddRetryCountFieldsToRegistries < ActiveRecord::Migration
def change
add_column :file_registry, :retry_count, :integer
add_column :project_registry, :repository_retry_count, :integer
add_column :project_registry, :wiki_retry_count, :integer
add_column :project_registry, :repository_retry_at, :datetime
add_column :project_registry, :wiki_retry_at, :datetime
add_column :file_registry, :retry_at, :datetime
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20171009162209) do ActiveRecord::Schema.define(version: 20171101105200) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -26,6 +26,8 @@ ActiveRecord::Schema.define(version: 20171009162209) do ...@@ -26,6 +26,8 @@ ActiveRecord::Schema.define(version: 20171009162209) do
t.string "sha256" t.string "sha256"
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.boolean "success", default: false, null: false t.boolean "success", default: false, null: false
t.integer "retry_count"
t.datetime "retry_at"
end end
add_index "file_registry", ["file_type", "file_id"], name: "index_file_registry_on_file_type_and_file_id", unique: true, using: :btree add_index "file_registry", ["file_type", "file_id"], name: "index_file_registry_on_file_type_and_file_id", unique: true, using: :btree
...@@ -41,6 +43,10 @@ ActiveRecord::Schema.define(version: 20171009162209) do ...@@ -41,6 +43,10 @@ ActiveRecord::Schema.define(version: 20171009162209) do
t.boolean "resync_wiki", default: true, null: false t.boolean "resync_wiki", default: true, null: false
t.datetime "last_wiki_synced_at" t.datetime "last_wiki_synced_at"
t.datetime "last_wiki_successful_sync_at" t.datetime "last_wiki_successful_sync_at"
t.integer "repository_retry_count"
t.integer "wiki_retry_count"
t.datetime "repository_retry_at"
t.datetime "wiki_retry_at"
end end
add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment