Commit b8da28c1 authored by Stan Hu's avatar Stan Hu

Merge branch...

Merge branch '5195-geo-actively-try-to-correct-verification-failures-on-the-secondary' into 'master'

Resolve "Geo: Actively try to correct verification failures on the secondary"

Closes #5195

See merge request gitlab-org/gitlab-ee!6759
parents 8100c031 3ab1648e
...@@ -17,9 +17,9 @@ it from the primary to resolve the issue. ...@@ -17,9 +17,9 @@ it from the primary to resolve the issue.
If verification succeeds on the **primary** but fails on the **secondary**, If verification succeeds on the **primary** but fails on the **secondary**,
this indicates that the object was corrupted during the replication process. this indicates that the object was corrupted during the replication process.
Until [issue #5195][ee-5195] is implemented, Geo won't automatically resolve Geo actively try to correct verification failures marking the repository to
verification failures of this kind, so you should follow be resynced with a backoff period. If you want to reset the verification for
[these instructions][reset-verification] these failures, so you should follow [these instructions][reset-verification].
If verification is lagging significantly behind replication, consider giving If verification is lagging significantly behind replication, consider giving
the node more time before scheduling a planned failover. the node more time before scheduling a planned failover.
...@@ -88,9 +88,10 @@ for every node after every update to make sure that they are all in sync. ...@@ -88,9 +88,10 @@ for every node after every update to make sure that they are all in sync.
# Reset verification for projects where verification has failed # Reset verification for projects where verification has failed
Until [issue #5195][ee-5195] is implemented, Geo won't automatically resolve Geo actively try to correct verification failures marking the repository to
verification failures, so you should reset them manually. This rake task marks be resynced with a backoff period. If you want to reset them manually, this
projects where verification has failed or the checksum mismatch to be resynced: rake task marks projects where verification has failed or the checksum mismatch
to be resynced without the backoff period:
#### For repositories: #### For repositories:
......
...@@ -355,12 +355,9 @@ module Geo ...@@ -355,12 +355,9 @@ module Geo
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification # @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification
def legacy_find_registries_to_verify(batch_size:) def legacy_find_registries_to_verify(batch_size:)
repo_condition = local_repo_condition
wiki_condition = local_wiki_condition
registries = Geo::ProjectRegistry registries = Geo::ProjectRegistry
.where(repo_condition.or(wiki_condition)) .where(local_repo_condition.or(local_wiki_condition))
.pluck(:project_id, repo_condition.to_sql, wiki_condition.to_sql) .pluck(:project_id, local_repo_condition.to_sql, local_wiki_condition.to_sql)
return Geo::ProjectRegistry.none if registries.empty? return Geo::ProjectRegistry.none if registries.empty?
...@@ -410,12 +407,22 @@ module Geo ...@@ -410,12 +407,22 @@ module Geo
local_registry_table[:repository_verification_checksum_sha].eq(nil) local_registry_table[:repository_verification_checksum_sha].eq(nil)
.and(local_registry_table[:last_repository_verification_failure].eq(nil)) .and(local_registry_table[:last_repository_verification_failure].eq(nil))
.and(local_registry_table[:resync_repository].eq(false)) .and(local_registry_table[:resync_repository].eq(false))
.and(repository_missing_on_primary_is_not_true)
end end
def local_wiki_condition def local_wiki_condition
local_registry_table[:wiki_verification_checksum_sha].eq(nil) local_registry_table[:wiki_verification_checksum_sha].eq(nil)
.and(local_registry_table[:last_wiki_verification_failure].eq(nil)) .and(local_registry_table[:last_wiki_verification_failure].eq(nil))
.and(local_registry_table[:resync_wiki].eq(false)) .and(local_registry_table[:resync_wiki].eq(false))
.and(wiki_missing_on_primary_is_not_true)
end
def repository_missing_on_primary_is_not_true
Arel::Nodes::SqlLiteral.new("project_registry.repository_missing_on_primary IS NOT TRUE")
end
def wiki_missing_on_primary_is_not_true
Arel::Nodes::SqlLiteral.new("project_registry.wiki_missing_on_primary IS NOT TRUE")
end end
end end
end end
...@@ -115,7 +115,11 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -115,7 +115,11 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
"#{type}_verification_checksum_sha" => nil, "#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil, "last_#{type}_verification_failure" => nil,
"resync_#{type}_was_scheduled_at" => scheduled_at) "#{type}_verification_retry_count" => nil,
"resync_#{type}_was_scheduled_at" => scheduled_at,
"#{type}_retry_count" => nil,
"#{type}_retry_at" => nil
)
end end
def repository_sync_due?(scheduled_time) def repository_sync_due?(scheduled_time)
...@@ -150,6 +154,10 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -150,6 +154,10 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
retry_count(type) > RETRIES_BEFORE_REDOWNLOAD retry_count(type) > RETRIES_BEFORE_REDOWNLOAD
end end
def verification_retry_count(type)
public_send("#{type}_verification_retry_count").to_i # rubocop:disable GitlabSecurity/PublicSend
end
private private
def fetches_since_gc_redis_key def fetches_since_gc_redis_key
...@@ -178,15 +186,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -178,15 +186,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
last_wiki_synced_at && timestamp > last_wiki_synced_at last_wiki_synced_at && timestamp > last_wiki_synced_at
end end
# To prevent the retry time from storing invalid dates in the database,
# cap the max time to a week plus some random jitter value.
def next_retry_time(retry_count)
proposed_time = Time.now + delay(retry_count).seconds
max_future_time = Time.now + 7.days + delay(1).seconds
[proposed_time, max_future_time].min
end
def retry_count(type) def retry_count(type)
public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend
end end
......
# frozen_string_literal: true
module Geo
class BaseRepositoryVerificationService
include Delay
include Gitlab::Geo::ProjectLogHelpers
def execute
raise NotImplementedError
end
private
def calculate_checksum(repository)
repository.checksum
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Git::Repository::InvalidRepository
Gitlab::Git::Repository::EMPTY_REPOSITORY_CHECKSUM
end
def calculate_next_retry_attempt(resource, type)
retry_count = resource.public_send("#{type}_retry_count").to_i + 1 # rubocop:disable GitlabSecurity/PublicSend
[next_retry_time(retry_count), retry_count]
end
end
end
module Geo # frozen_string_literal: true
class RepositoryVerificationPrimaryService
include Delay
include Gitlab::Geo::ProjectLogHelpers
module Geo
class RepositoryVerificationPrimaryService < BaseRepositoryVerificationService
def initialize(project) def initialize(project)
@project = project @project = project
end end
def execute def execute
calculate_repository_checksum verify_checksum(:repository, project.repository)
calculate_wiki_checksum verify_checksum(:wiki, project.wiki.repository)
end end
private private
attr_reader :project attr_reader :project
def calculate_repository_checksum def verify_checksum(type, repository)
calculate_checksum(:repository, project.repository) checksum = calculate_checksum(repository)
end update_repository_state!(type, checksum: checksum)
def calculate_wiki_checksum
calculate_checksum(:wiki, project.wiki.repository)
end
def calculate_checksum(type, repository)
update_repository_state!(type, checksum: repository.checksum)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Git::Repository::InvalidRepository
update_repository_state!(type, checksum: Gitlab::Git::Repository::EMPTY_REPOSITORY_CHECKSUM)
rescue => e rescue => e
log_error('Error calculating the repository checksum', e, type: type) log_error("Error calculating the #{type} checksum", e, type: type)
update_repository_state!(type, failure: e.message) update_repository_state!(type, failure: e.message)
end end
def update_repository_state!(type, checksum: nil, failure: nil) def update_repository_state!(type, checksum: nil, failure: nil)
retry_at, retry_count = retry_at, retry_count =
if failure.present? if failure.present?
retry_count = repository_state.public_send("#{type}_retry_count").to_i + 1 # rubocop:disable GitlabSecurity/PublicSend calculate_next_retry_attempt(repository_state, type)
[next_retry_time(retry_count), retry_count]
end end
repository_state.update!( repository_state.update!(
...@@ -48,15 +37,6 @@ module Geo ...@@ -48,15 +37,6 @@ module Geo
) )
end end
# To prevent the retry time from storing invalid dates in the database,
# cap the max time to a week plus some random jitter value.
def next_retry_time(retry_count)
proposed_time = Time.now + delay(retry_count).seconds
max_future_time = Time.now + 7.days + delay(1).seconds
[proposed_time, max_future_time].min
end
def repository_state def repository_state
@repository_state ||= project.repository_state || project.build_repository_state @repository_state ||= project.repository_state || project.build_repository_state
end end
......
...@@ -47,7 +47,9 @@ module Geo ...@@ -47,7 +47,9 @@ module Geo
"resync_#{type}" => true, "resync_#{type}" => true,
"#{type}_verification_checksum_sha" => nil, "#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil "last_#{type}_verification_failure" => nil,
"#{type}_verification_retry_count" => nil,
"#{type}_missing_on_primary" => nil
} }
end end
end end
......
module Geo # frozen_string_literal: true
class RepositoryVerificationSecondaryService
include Gitlab::Geo::ProjectLogHelpers
module Geo
class RepositoryVerificationSecondaryService < BaseRepositoryVerificationService
def initialize(registry, type) def initialize(registry, type)
@registry = registry @registry = registry
@type = type.to_sym @type = type.to_sym
...@@ -23,8 +23,9 @@ module Geo ...@@ -23,8 +23,9 @@ module Geo
def should_verify_checksum? def should_verify_checksum?
return false if resync? return false if resync?
return false unless primary_checksum.present?
primary_checksum.present? && primary_checksum != secondary_checksum mismatch?(secondary_checksum)
end end
def resync? def resync?
...@@ -39,44 +40,45 @@ module Geo ...@@ -39,44 +40,45 @@ module Geo
registry.public_send("#{type}_verification_checksum_sha") # rubocop:disable GitlabSecurity/PublicSend registry.public_send("#{type}_verification_checksum_sha") # rubocop:disable GitlabSecurity/PublicSend
end end
def mismatch?(checksum)
primary_checksum != checksum
end
def verify_checksum def verify_checksum
checksum = calculate_checksum checksum = calculate_checksum(repository)
if mismatch?(checksum) if mismatch?(checksum)
update_registry!(mismatch: true, failure: "#{type.to_s.capitalize} checksum mismatch: #{repository.disk_path}") update_registry!(mismatch: true, failure: "#{type.to_s.capitalize} checksum mismatch")
else else
update_registry!(checksum: checksum) update_registry!(checksum: checksum)
end end
rescue ::Gitlab::Git::Repository::ChecksumError, Timeout::Error => e rescue => e
update_registry!(failure: "Error verifying #{type.to_s.capitalize} checksum: #{repository.disk_path}", exception: e) update_registry!(failure: "Error calculating #{type} checksum", exception: e)
end end
def calculate_checksum def update_registry!(checksum: nil, mismatch: false, failure: nil, exception: nil)
repository.checksum reverify, verification_retry_count =
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Git::Repository::InvalidRepository if mismatch || failure.present?
Gitlab::Git::Repository::EMPTY_REPOSITORY_CHECKSUM log_error(failure, exception, type: type)
end [true, registry.verification_retry_count(type) + 1]
else
[false, nil]
end
def mismatch?(checksum) resync_retry_at, resync_retry_count =
primary_checksum != checksum if reverify
end [*calculate_next_retry_attempt(registry, type)]
end
def update_registry!(checksum: nil, mismatch: false, failure: nil, exception: nil, details: {}) registry.update!(
attrs = {
"#{type}_verification_checksum_sha" => checksum, "#{type}_verification_checksum_sha" => checksum,
"#{type}_checksum_mismatch" => mismatch, "#{type}_checksum_mismatch" => mismatch,
"last_#{type}_verification_failure" => failure "last_#{type}_verification_failure" => failure,
} "#{type}_verification_retry_count" => verification_retry_count,
"resync_#{type}" => reverify,
if failure "#{type}_retry_at" => resync_retry_at,
log_error(failure, exception, "#{type}_retry_count" => resync_retry_count
type: type, )
repository_shard: project.repository_storage,
repsitory_disk_path: repository.disk_path
)
end
registry.update!(attrs)
end end
def repository def repository
......
---
title: Geo - Actively try to correct verification failures on the secondary
merge_request: 6759
author:
type: added
# frozen_string_literal: true
class AddRetryVerificationFieldsToProjectRegistry < ActiveRecord::Migration
def change
add_column :project_registry, :repository_verification_retry_count, :integer
add_column :project_registry, :wiki_verification_retry_count, :integer
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180727221937) do ActiveRecord::Schema.define(version: 20180802215313) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -80,6 +80,8 @@ ActiveRecord::Schema.define(version: 20180727221937) do ...@@ -80,6 +80,8 @@ ActiveRecord::Schema.define(version: 20180727221937) do
t.datetime_with_timezone "resync_wiki_was_scheduled_at" t.datetime_with_timezone "resync_wiki_was_scheduled_at"
t.boolean "repository_missing_on_primary" t.boolean "repository_missing_on_primary"
t.boolean "wiki_missing_on_primary" t.boolean "wiki_missing_on_primary"
t.integer "repository_verification_retry_count"
t.integer "wiki_verification_retry_count"
end end
add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree
......
...@@ -3,4 +3,13 @@ module Delay ...@@ -3,4 +3,13 @@ module Delay
def delay(retry_count = 0) def delay(retry_count = 0)
(retry_count**4) + 15 + (rand(30) * (retry_count + 1)) (retry_count**4) + 15 + (rand(30) * (retry_count + 1))
end end
# To prevent the retry time from storing invalid dates in the database,
# cap the max time to a week plus some random jitter value.
def next_retry_time(retry_count)
proposed_time = Time.now + delay(retry_count).seconds
max_future_time = Time.now + 7.days + delay(1).seconds
[proposed_time, max_future_time].min
end
end end
...@@ -677,6 +677,20 @@ describe Geo::ProjectRegistryFinder, :geo do ...@@ -677,6 +677,20 @@ describe Geo::ProjectRegistryFinder, :geo do
expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty
end end
it 'does not return registries when the repository is missing on primary' do
project_verified = create(:repository_state, :repository_verified).project
create(:geo_project_registry, :synced, project: project_verified, repository_missing_on_primary: true)
expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty
end
it 'does not return registries when the wiki is missing on primary' do
project_verified = create(:repository_state, :wiki_verified).project
create(:geo_project_registry, :synced, project: project_verified, wiki_missing_on_primary: true)
expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty
end
end end
end end
......
...@@ -388,11 +388,13 @@ describe Geo::ProjectRegistry do ...@@ -388,11 +388,13 @@ describe Geo::ProjectRegistry do
before do before do
subject.start_sync!(type) subject.start_sync!(type)
subject.update!(repository_retry_at: 1.day.from_now, subject.update!(repository_retry_at: 1.day.from_now,
repository_retry_count: 1,
force_to_redownload_repository: true, force_to_redownload_repository: true,
last_repository_sync_failure: 'foo', last_repository_sync_failure: 'foo',
repository_verification_checksum_sha: 'abc123', repository_verification_checksum_sha: 'abc123',
repository_checksum_mismatch: true, repository_checksum_mismatch: true,
last_repository_verification_failure: 'bar') last_repository_verification_failure: 'bar',
repository_verification_retry_count: 1)
end end
it 'sets last_repository_successful_sync_at to now' do it 'sets last_repository_successful_sync_at to now' do
...@@ -408,10 +410,10 @@ describe Geo::ProjectRegistry do ...@@ -408,10 +410,10 @@ describe Geo::ProjectRegistry do
expect(subject.reload).to have_attributes( expect(subject.reload).to have_attributes(
resync_repository: false, resync_repository: false,
repository_retry_count: be_nil, repository_retry_count: nil,
repository_retry_at: be_nil, repository_retry_at: nil,
force_to_redownload_repository: false, force_to_redownload_repository: false,
last_repository_sync_failure: be_nil, last_repository_sync_failure: nil,
repository_missing_on_primary: false repository_missing_on_primary: false
) )
end end
...@@ -419,9 +421,17 @@ describe Geo::ProjectRegistry do ...@@ -419,9 +421,17 @@ describe Geo::ProjectRegistry do
it 'resets verification state' do it 'resets verification state' do
subject.finish_sync!(type) subject.finish_sync!(type)
expect(subject.reload.repository_verification_checksum_sha).to be_nil expect(subject.reload).to have_attributes(
expect(subject.reload.repository_checksum_mismatch).to be false repository_verification_checksum_sha: nil,
expect(subject.reload.last_repository_verification_failure).to be_nil repository_checksum_mismatch: false,
last_repository_verification_failure: nil
)
end
it 'does not reset repository_verification_retry_count' do
subject.finish_sync!(type)
expect(subject.reload.repository_verification_retry_count).to eq 1
end end
context 'when a repository was missing on primary' do context 'when a repository was missing on primary' do
...@@ -444,15 +454,25 @@ describe Geo::ProjectRegistry do ...@@ -444,15 +454,25 @@ describe Geo::ProjectRegistry do
end end
it 'resets the other sync state fields' do it 'resets the other sync state fields' do
expect(subject.reload.repository_retry_count).to be_nil expect(subject.reload).to have_attributes(
expect(subject.reload.repository_retry_at).to be_nil repository_retry_count: nil,
expect(subject.reload.force_to_redownload_repository).to be false repository_retry_at: nil,
force_to_redownload_repository: false,
last_repository_sync_failure: nil,
repository_missing_on_primary: false
)
end end
it 'resets the verification state' do it 'resets the verification state' do
expect(subject.reload.repository_verification_checksum_sha).to be_nil expect(subject.reload).to have_attributes(
expect(subject.reload.repository_checksum_mismatch).to be false repository_verification_checksum_sha: nil,
expect(subject.reload.last_repository_verification_failure).to be_nil repository_checksum_mismatch: false,
last_repository_verification_failure: nil
)
end
it 'does not reset repository_verification_retry_count' do
expect(subject.reload.repository_verification_retry_count).to eq 1
end end
end end
end end
...@@ -463,11 +483,13 @@ describe Geo::ProjectRegistry do ...@@ -463,11 +483,13 @@ describe Geo::ProjectRegistry do
before do before do
subject.start_sync!(type) subject.start_sync!(type)
subject.update!(wiki_retry_at: 1.day.from_now, subject.update!(wiki_retry_at: 1.day.from_now,
wiki_retry_count: 1,
force_to_redownload_wiki: true, force_to_redownload_wiki: true,
last_wiki_sync_failure: 'foo', last_wiki_sync_failure: 'foo',
wiki_verification_checksum_sha: 'abc123', wiki_verification_checksum_sha: 'abc123',
wiki_checksum_mismatch: true, wiki_checksum_mismatch: true,
last_wiki_verification_failure: 'bar') last_wiki_verification_failure: 'bar',
wiki_verification_retry_count: 1)
end end
it 'sets last_wiki_successful_sync_at to now' do it 'sets last_wiki_successful_sync_at to now' do
...@@ -483,10 +505,10 @@ describe Geo::ProjectRegistry do ...@@ -483,10 +505,10 @@ describe Geo::ProjectRegistry do
expect(subject.reload).to have_attributes( expect(subject.reload).to have_attributes(
resync_wiki: false, resync_wiki: false,
wiki_retry_count: be_nil, wiki_retry_count: nil,
wiki_retry_at: be_nil, wiki_retry_at: nil,
force_to_redownload_wiki: false, force_to_redownload_wiki: false,
last_wiki_sync_failure: be_nil, last_wiki_sync_failure: nil,
wiki_missing_on_primary: false wiki_missing_on_primary: false
) )
end end
...@@ -494,9 +516,17 @@ describe Geo::ProjectRegistry do ...@@ -494,9 +516,17 @@ describe Geo::ProjectRegistry do
it 'resets verification state' do it 'resets verification state' do
subject.finish_sync!(type) subject.finish_sync!(type)
expect(subject.reload.wiki_verification_checksum_sha).to be_nil expect(subject.reload).to have_attributes(
expect(subject.reload.wiki_checksum_mismatch).to be false wiki_verification_checksum_sha: nil,
expect(subject.reload.last_wiki_verification_failure).to be_nil wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil
)
end
it 'does not reset wiki_verification_retry_count' do
subject.finish_sync!(type)
expect(subject.reload.wiki_verification_retry_count).to eq 1
end end
context 'when a wiki was missing on primary' do context 'when a wiki was missing on primary' do
...@@ -519,15 +549,25 @@ describe Geo::ProjectRegistry do ...@@ -519,15 +549,25 @@ describe Geo::ProjectRegistry do
end end
it 'resets the other sync state fields' do it 'resets the other sync state fields' do
expect(subject.reload.wiki_retry_count).to be_nil expect(subject.reload).to have_attributes(
expect(subject.reload.wiki_retry_at).to be_nil wiki_retry_count: nil,
expect(subject.reload.force_to_redownload_wiki).to be false wiki_retry_at: nil,
force_to_redownload_wiki: false,
last_wiki_sync_failure: nil,
wiki_missing_on_primary: false
)
end end
it 'resets the verification state' do it 'resets the verification state' do
expect(subject.reload.wiki_verification_checksum_sha).to be_nil expect(subject.reload).to have_attributes(
expect(subject.reload.wiki_checksum_mismatch).to be false wiki_verification_checksum_sha: nil,
expect(subject.reload.last_wiki_verification_failure).to be_nil wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil
)
end
it 'does not reset wiki_verification_retry_count' do
expect(subject.reload.wiki_verification_retry_count).to eq 1
end end
end end
end end
...@@ -646,8 +686,6 @@ describe Geo::ProjectRegistry do ...@@ -646,8 +686,6 @@ describe Geo::ProjectRegistry do
end end
describe '#repository_updated!' do describe '#repository_updated!' do
let(:now) { Time.now }
context 'for a repository' do context 'for a repository' do
let(:event) { double(:event, source: 'repository') } let(:event) { double(:event, source: 'repository') }
...@@ -656,29 +694,33 @@ describe Geo::ProjectRegistry do ...@@ -656,29 +694,33 @@ describe Geo::ProjectRegistry do
repository_verification_checksum_sha: 'abc123', repository_verification_checksum_sha: 'abc123',
repository_checksum_mismatch: true, repository_checksum_mismatch: true,
last_repository_verification_failure: 'foo', last_repository_verification_failure: 'foo',
resync_repository_was_scheduled_at: nil) resync_repository_was_scheduled_at: nil,
repository_retry_at: 1.hour.from_now,
subject.repository_updated!(event, now) repository_retry_count: 1,
end repository_verification_retry_count: 1)
it 'sets resync_repository to true' do
expect(subject.resync_repository).to be true
end
it 'sets repository_verification_checksum_sha to nil' do
expect(subject.repository_verification_checksum_sha).to be_nil
end
it 'sets repository_checksum_mismatch to false' do subject.repository_updated!(event, Time.now)
expect(subject.repository_checksum_mismatch).to be false
end end
it 'sets last_repository_verification_failure to nil' do it 'resets sync state' do
expect(subject.last_repository_verification_failure).to be_nil expect(subject.reload).to have_attributes(
resync_repository: true,
repository_retry_count: nil,
repository_retry_at: nil,
force_to_redownload_repository: nil,
last_repository_sync_failure: nil,
repository_missing_on_primary: nil,
resync_repository_was_scheduled_at: be_within(1.minute).of(Time.now)
)
end end
it 'sets resync_repository_was_scheduled_at to scheduled_at' do it 'resets verification state' do
expect(subject.resync_repository_was_scheduled_at).to eq(now) expect(subject).to have_attributes(
repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false,
last_repository_verification_failure: nil,
repository_verification_retry_count: nil
)
end end
end end
...@@ -690,29 +732,33 @@ describe Geo::ProjectRegistry do ...@@ -690,29 +732,33 @@ describe Geo::ProjectRegistry do
wiki_verification_checksum_sha: 'abc123', wiki_verification_checksum_sha: 'abc123',
wiki_checksum_mismatch: true, wiki_checksum_mismatch: true,
last_wiki_verification_failure: 'foo', last_wiki_verification_failure: 'foo',
resync_wiki_was_scheduled_at: nil) resync_wiki_was_scheduled_at: nil,
wiki_retry_at: 1.hour.from_now,
wiki_retry_count: 1,
wiki_verification_retry_count: 1)
subject.repository_updated!(event, now) subject.repository_updated!(event, Time.now)
end end
it 'sets resync_wiki to true' do it 'resets sync state' do
expect(subject.resync_wiki).to be true expect(subject.reload).to have_attributes(
end resync_wiki: true,
wiki_retry_count: nil,
it 'sets wiki_verification_checksum_sha to nil' do wiki_retry_at: nil,
expect(subject.wiki_verification_checksum_sha).to be_nil force_to_redownload_wiki: nil,
end last_wiki_sync_failure: nil,
wiki_missing_on_primary: nil,
it 'sets wiki_checksum_mismatch to false' do resync_wiki_was_scheduled_at: be_within(1.minute).of(Time.now)
expect(subject.wiki_checksum_mismatch).to be false )
end
it 'sets last_wiki_verification_failure to nil' do
expect(subject.last_wiki_verification_failure).to be_nil
end end
it 'sets resync_wiki_was_scheduled_at to scheduled_at' do it 'resets verification state' do
expect(subject.resync_wiki_was_scheduled_at).to eq(now) expect(subject).to have_attributes(
wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil
)
end end
end end
end end
......
...@@ -53,7 +53,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -53,7 +53,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: true, resync_repository: true,
repository_verification_checksum_sha: nil, repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
...@@ -67,7 +69,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -67,7 +69,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: true, resync_repository: true,
repository_verification_checksum_sha: nil, repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
...@@ -81,7 +85,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -81,7 +85,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: false, resync_repository: false,
repository_verification_checksum_sha: be_present, repository_verification_checksum_sha: be_present,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
...@@ -95,7 +101,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -95,7 +101,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: false, resync_repository: false,
repository_verification_checksum_sha: nil, repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
end end
...@@ -121,7 +129,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -121,7 +129,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: true, resync_wiki: true,
wiki_verification_checksum_sha: nil, wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
...@@ -135,7 +145,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -135,7 +145,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: true, resync_wiki: true,
wiki_verification_checksum_sha: nil, wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
...@@ -149,7 +161,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -149,7 +161,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: false, resync_wiki: false,
wiki_verification_checksum_sha: be_present, wiki_verification_checksum_sha: be_present,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
...@@ -163,7 +177,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -163,7 +177,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: false, resync_wiki: false,
wiki_verification_checksum_sha: nil, wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
end end
......
...@@ -3,19 +3,13 @@ require 'spec_helper' ...@@ -3,19 +3,13 @@ require 'spec_helper'
describe Geo::RepositoryVerificationSecondaryService, :geo do describe Geo::RepositoryVerificationSecondaryService, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
let(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
shared_examples 'verify checksums for repositories/wikis' do |type| shared_examples 'verify checksums for repositories/wikis' do |type|
let(:repository) { find_repository(type) } let(:repository) { find_repository(type) }
subject(:service) { described_class.new(registry, type) } subject(:service) { described_class.new(registry, type) }
it 'does not calculate the checksum when not running on a secondary' do it 'does not calculate the checksum when not running on a secondary' do
allow(Gitlab::Geo).to receive(:secondary?) { false } allow(Gitlab::Geo).to receive(:secondary?).and_return(false)
expect(repository).not_to receive(:checksum) expect(repository).not_to receive(:checksum)
...@@ -38,7 +32,7 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -38,7 +32,7 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
service.execute service.execute
end end
it 'does not verify the checksum if the checksums already match' do it 'does not verify the checksum if the current checksum matches' do
repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum') repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum')
registry.assign_attributes("#{type}_verification_checksum_sha" => 'my_checksum') registry.assign_attributes("#{type}_verification_checksum_sha" => 'my_checksum')
...@@ -48,14 +42,18 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -48,14 +42,18 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
end end
it 'sets checksum when the checksum matches' do it 'sets checksum when the checksum matches' do
expect(repository).to receive(:checksum).and_return('my_checksum') allow(repository).to receive(:checksum).and_return('my_checksum')
service.execute service.execute
expect(registry).to have_attributes( expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => 'my_checksum', "#{type}_verification_checksum_sha" => 'my_checksum',
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil "last_#{type}_verification_failure" => nil,
"#{type}_verification_retry_count" => nil,
"resync_#{type}" => false,
"#{type}_retry_at" => nil,
"#{type}_retry_count" => nil
) )
end end
...@@ -69,20 +67,76 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -69,20 +67,76 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
expect(registry).to have_attributes( expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => '0000000000000000000000000000000000000000', "#{type}_verification_checksum_sha" => '0000000000000000000000000000000000000000',
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil "last_#{type}_verification_failure" => nil,
"#{type}_verification_retry_count" => nil,
"resync_#{type}" => false,
"#{type}_retry_at" => nil,
"#{type}_retry_count" => nil
) )
end end
it 'keeps track of failure when the checksum mismatch' do context 'when the checksum mismatch' do
expect(repository).to receive(:checksum).and_return('other_checksum') before do
allow(repository).to receive(:checksum).and_return('other_checksum')
end
service.execute it 'keeps track of failures' do
service.execute
expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => true,
"last_#{type}_verification_failure" => "#{type.to_s.capitalize} checksum mismatch",
"#{type}_verification_retry_count" => 1,
"resync_#{type}" => true,
"#{type}_retry_at" => be_present,
"#{type}_retry_count" => 1
)
end
expect(registry).to have_attributes( it 'ensures the next retry time is capped properly' do
"#{type}_verification_checksum_sha" => nil, registry.update("#{type}_retry_count" => 30)
"#{type}_checksum_mismatch" => true,
"last_#{type}_verification_failure" => /#{Regexp.quote(type.to_s.capitalize)} checksum mismatch/ service.execute
)
expect(registry).to have_attributes(
"resync_#{type}" => true,
"#{type}_retry_at" => be_within(100.seconds).of(Time.now + 7.days),
"#{type}_retry_count" => 31
)
end
end
context 'when checksum calculation fails' do
before do
allow(repository).to receive(:checksum).and_raise("Something went wrong with #{type}")
end
it 'keeps track of failures' do
service.execute
expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => "Error calculating #{type} checksum",
"#{type}_verification_retry_count" => 1,
"resync_#{type}" => true,
"#{type}_retry_at" => be_present,
"#{type}_retry_count" => 1
)
end
it 'ensures the next retry time is capped properly' do
registry.update("#{type}_retry_count" => 30)
service.execute
expect(registry).to have_attributes(
"resync_#{type}" => true,
"#{type}_retry_at" => be_within(100.seconds).of(Time.now + 7.days),
"#{type}_retry_count" => 31
)
end
end end
def find_repository(type) def find_repository(type)
...@@ -93,16 +147,22 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -93,16 +147,22 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
end end
end end
let(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
describe '#execute' do describe '#execute' do
let(:project) { create(:project, :repository, :wiki_repo) } let(:project) { create(:project, :repository, :wiki_repo) }
let!(:repository_state) { create(:repository_state, project: project, repository_verification_checksum: 'my_checksum', wiki_verification_checksum: 'my_checksum') } let!(:repository_state) { create(:repository_state, project: project, repository_verification_checksum: 'my_checksum', wiki_verification_checksum: 'my_checksum') }
let(:registry) { create(:geo_project_registry, :synced, project: project) } let(:registry) { create(:geo_project_registry, :synced, project: project) }
context 'repository' do context 'for a repository' do
include_examples 'verify checksums for repositories/wikis', :repository include_examples 'verify checksums for repositories/wikis', :repository
end end
context 'wiki' do context 'for a wiki' do
include_examples 'verify checksums for repositories/wikis', :wiki include_examples 'verify checksums for repositories/wikis', :wiki
end end
end end
......
...@@ -52,6 +52,18 @@ describe Geo::RepositoryVerification::Secondary::ShardWorker, :postgresql, :clea ...@@ -52,6 +52,18 @@ describe Geo::RepositoryVerification::Secondary::ShardWorker, :postgresql, :clea
subject.perform(shard_name) subject.perform(shard_name)
end end
it 'does not schedule jobs for projects missing repositories on primary' do
other_project = create(:project)
create(:repository_state, :repository_verified, project: project)
create(:repository_state, :wiki_verified, project: other_project)
create(:geo_project_registry, :synced, project: project, repository_missing_on_primary: true)
create(:geo_project_registry, :synced, project: other_project, wiki_missing_on_primary: true)
expect(secondary_singleworker).not_to receive(:perform_async)
subject.perform(shard_name)
end
# test that when jobs are always moving forward and we're not querying the same things # test that when jobs are always moving forward and we're not querying the same things
# over and over # over and over
describe 'resource loading' do describe 'resource loading' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment