Add query to return projects verified before minimum interval

To constantly re-verify repositories we should find projects
whe the last verification attempt happened before the minimum
re-verification interval. This interval says after how many
days (at least) a repository should be re-rverified.
parent 56a67316
......@@ -48,11 +48,18 @@ module Geo
.where(repository_never_verified)
.limit(batch_size)
relation = apply_shard_restriction(relation) if shard_name.present?
relation
apply_shard_restriction(relation)
end
# rubocop: enable CodeReuse/ActiveRecord
def find_reverifiable_repositories(interval:, batch_size:)
build_query_to_find_reverifiable_projects(type: :repository, interval: interval, batch_size: batch_size)
end
def find_reverifiable_wikis(interval:, batch_size:)
build_query_to_find_reverifiable_projects(type: :wiki, interval: interval, batch_size: batch_size)
end
def count_verified_repositories
Project.verified_repos.count
end
......@@ -84,8 +91,7 @@ module Geo
.and(repository_state_table["last_#{type}_verification_failure"].not_eq(nil))
).take(batch_size)
query = apply_shard_restriction(query) if shard_name.present?
query
apply_shard_restriction(query)
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -98,8 +104,29 @@ module Geo
.where(repository_outdated.or(wiki_outdated))
.take(batch_size)
query = apply_shard_restriction(query) if shard_name.present?
query
apply_shard_restriction(query)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def build_query_to_find_reverifiable_projects(type:, interval:, batch_size:)
verification_succeded =
repository_state_table["#{type}_verification_checksum"].not_eq(nil)
.and(repository_state_table["last_#{type}_verification_failure"].eq(nil))
verified_before_interval =
repository_state_table["last_#{type}_verification_ran_at"].eq(nil).or(
repository_state_table["last_#{type}_verification_ran_at"].lteq(interval))
# We should prioritize less active projects first because high active
# projects have their repositories verified more frequently.
query =
Project.joins(:repository_state)
.where(verification_succeded.and(verified_before_interval))
.order(last_repository_updated_at_asc)
.limit(batch_size)
apply_shard_restriction(query)
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -141,8 +168,10 @@ module Geo
end
# rubocop: disable CodeReuse/ActiveRecord
def apply_shard_restriction(relation)
relation.where(projects_table[:repository_storage].eq(shard_name))
def apply_shard_restriction(query)
return query unless shard_name.present?
query.where(projects_table[:repository_storage].eq(shard_name))
end
# rubocop: enable CodeReuse/ActiveRecord
end
......
......@@ -4,6 +4,7 @@ FactoryBot.define do
trait :repository_failed do
repository_verification_checksum nil
last_repository_verification_ran_at { Time.now }
last_repository_verification_failure 'Could not calculate the checksum'
repository_retry_count 1
repository_retry_at { 5.minutes.ago }
......@@ -11,11 +12,13 @@ FactoryBot.define do
trait :repository_outdated do
repository_verification_checksum nil
last_repository_verification_ran_at { 1.day.ago }
last_repository_verification_failure nil
end
trait :repository_verified do
repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee'
last_repository_verification_ran_at { 1.day.ago }
last_repository_verification_failure nil
repository_retry_count nil
repository_retry_at nil
......@@ -23,6 +26,7 @@ FactoryBot.define do
trait :wiki_failed do
wiki_verification_checksum nil
last_wiki_verification_ran_at { Time.now }
last_wiki_verification_failure 'Could not calculate the checksum'
wiki_retry_count 1
wiki_retry_at { 5.minutes.ago }
......@@ -30,11 +34,13 @@ FactoryBot.define do
trait :wiki_outdated do
wiki_verification_checksum nil
last_wiki_verification_ran_at { 1.day.ago }
last_wiki_verification_failure nil
end
trait :wiki_verified do
wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef'
last_wiki_verification_ran_at { 1.day.ago }
last_wiki_verification_failure nil
wiki_retry_count nil
wiki_retry_at nil
......
......@@ -188,4 +188,104 @@ describe Geo::RepositoryVerificationFinder, :postgresql do
end
end
end
describe '#find_reverifiable_repositories' do
it 'returns projects where repository was verified before the minimum re-verification interval' do
project_recently_verified = create(:project)
create(:repository_state, :repository_verified, project: project, last_repository_verification_ran_at: 2.days.ago)
create(:repository_state, :repository_verified, project: project_recently_verified, last_repository_verification_ran_at: Time.now)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
it 'does not return projects where repository verification is outdated' do
create(:repository_state, :repository_outdated, project: project, last_repository_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'does not return projects where repository verification failed' do
create(:repository_state, :repository_failed, project: project, last_repository_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'returns less active projects first' do
less_active_project = create(:project)
create(:repository_state, :repository_verified, project: project, last_repository_verification_ran_at: 2.days.ago)
create(:repository_state, :repository_verified, project: less_active_project, last_repository_verification_ran_at: 2.days.ago)
project.update_column(:last_repository_updated_at, 30.minutes.ago)
less_active_project.update_column(:last_repository_updated_at, 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to eq [less_active_project, project]
end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
create(:repository_state, :repository_verified, project: project, last_repository_verification_ran_at: 2.days.ago)
create(:repository_state, :repository_verified, project: project_other_shard, last_repository_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
end
end
describe '#find_reverifiable_wikis' do
it 'returns projects where wiki was verified before the minimum re-verification interval' do
project_recently_verified = create(:project)
create(:repository_state, :wiki_verified, project: project, last_wiki_verification_ran_at: 2.days.ago)
create(:repository_state, :wiki_verified, project: project_recently_verified, last_wiki_verification_ran_at: Time.now)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
it 'does not return projects where wiki verification is outdated' do
create(:repository_state, :wiki_outdated, project: project, last_wiki_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'does not return projects where wiki verification failed' do
create(:repository_state, :wiki_failed, project: project, last_wiki_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'returns less active projects first' do
less_active_project = create(:project)
create(:repository_state, :wiki_verified, project: project, last_wiki_verification_ran_at: 2.days.ago)
create(:repository_state, :wiki_verified, project: less_active_project, last_wiki_verification_ran_at: 2.days.ago)
project.update_column(:last_repository_updated_at, 30.minutes.ago)
less_active_project.update_column(:last_repository_updated_at, 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to eq [less_active_project, project]
end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
create(:repository_state, :wiki_verified, project: project, last_wiki_verification_ran_at: 2.days.ago)
create(:repository_state, :wiki_verified, project: project_other_shard, last_wiki_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
end
end
end
......@@ -4,10 +4,42 @@ require 'spec_helper'
describe Gitlab::BackgroundMigration::ResetChecksumFromProjectRepositoryStates, :migration, schema: 20180914195058 do
describe '#perform' do
let(:users) { table(:users) }
let(:projects) { table(:projects) }
let(:repository_states) { table(:project_repository_states) }
def create_repository_state(params = {})
attrs = {
repository_verification_checksum: 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee',
last_repository_verification_failure: nil,
repository_retry_count: nil,
repository_retry_at: nil,
wiki_verification_checksum: 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef',
last_wiki_verification_failure: nil,
wiki_retry_count: nil,
wiki_retry_at: nil
}.merge(params)
repository_states.create!(attrs)
end
it 'processes all repository states in batch' do
repository_state_1 = create(:repository_state, :repository_verified, :wiki_verified)
repository_state_2 = create(:repository_state, :repository_failed, :wiki_failed)
repository_state_3 = create(:repository_state, :repository_verified, :wiki_verified)
users.create!(email: 'test@example.com', projects_limit: 100, username: 'test')
projects.create!(id: 1, name: 'project-1', path: 'project-1', visibility_level: 0, namespace_id: 1)
projects.create!(id: 2, name: 'project-2', path: 'project-2', visibility_level: 0, namespace_id: 1)
projects.create!(id: 3, name: 'project-3', path: 'project-3', visibility_level: 0, namespace_id: 1)
repository_state_1 = create_repository_state(project_id: 1)
repository_state_2 = create_repository_state(
project_id: 2,
wiki_verification_checksum: nil,
last_wiki_verification_failure: 'Could not calculate the checksum',
wiki_retry_count: 1,
wiki_retry_at: Time.now + 5.minutes
)
repository_state_3 = create_repository_state(project_id: 3)
subject.perform(repository_state_1.project_id, repository_state_2.project_id)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment