Add finder to find projects udpated recently using FDW queries

parent 86c40199
# frozen_string_literal: true
# Finder for retrieving projects updated recently that belong to a specific
# shard using FDW queries.
#
# Basic usage:
#
# Geo::ProjectUpdatedRecentlyFinder
# .new(current_node: Gitlab::Geo.current_node, shard_name: 'default', batch_size: 1000)
# .execute.
module Geo
class ProjectUpdatedRecentlyFinder
def initialize(current_node:, shard_name:, batch_size:)
@current_node = Geo::Fdw::GeoNode.find(current_node.id)
@shard_name = shard_name
@batch_size = batch_size
end
# rubocop:disable CodeReuse/ActiveRecord
def execute
return Geo::Fdw::Project.none unless valid_shard?
current_node
.projects
.recently_updated
.within_shards(shard_name)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
attr_reader :current_node, :shard_name, :batch_size
def valid_shard?
return true unless current_node.selective_sync_by_shards?
current_node.selective_sync_shards.include?(shard_name)
end
end
end
...@@ -13,6 +13,12 @@ module Geo ...@@ -13,6 +13,12 @@ module Geo
.where(Geo::ProjectRegistry.arel_table[:project_id].eq(nil)) .where(Geo::ProjectRegistry.arel_table[:project_id].eq(nil))
end end
def recently_updated
inner_join_project_registry
.merge(Geo::ProjectRegistry.dirty)
.merge(Geo::ProjectRegistry.retry_due)
end
# Searches for a list of projects based on the query given in `query`. # Searches for a list of projects based on the query given in `query`.
# #
# On PostgreSQL this method uses "ILIKE" to perform a case-insensitive # On PostgreSQL this method uses "ILIKE" to perform a case-insensitive
...@@ -33,6 +39,15 @@ module Geo ...@@ -33,6 +39,15 @@ module Geo
private private
def inner_join_project_registry
join_statement =
arel_table
.join(Geo::ProjectRegistry.arel_table, Arel::Nodes::InnerJoin)
.on(arel_table[:id].eq(Geo::ProjectRegistry.arel_table[:project_id]))
joins(join_statement.join_sources)
end
def left_outer_join_project_registry def left_outer_join_project_registry
join_statement = join_statement =
arel_table arel_table
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::ProjectUpdatedRecentlyFinder, :geo do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#execute', :delete do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let!(:project_1) { create(:project, group: group_1) }
let!(:project_2) { create(:project, group: nested_group_1) }
let!(:project_3) { create(:project, group: group_2) }
let!(:project_4) { create(:project, group: group_1) }
before do
skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled?
project_4.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
create(:geo_project_registry, :synced, :repository_dirty, project: project_2)
create(:geo_project_registry, :synced, project: project_3)
create(:geo_project_registry, :synced, :wiki_dirty, project: project_4)
end
subject { described_class.new(current_node: node, shard_name: 'default', batch_size: 100) }
context 'without selective sync' do
it 'returns projects with a dirty entry on the tracking database' do
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by namespace' do
it 'returns projects that belong to the namespaces with a dirty entry on the tracking database' do
node.update!(selective_sync_type: 'namespaces', namespaces: [group_1])
expect(subject.execute).to match_ids(project_1, project_2)
end
end
context 'with selective sync by shard' do
before do
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['foo'])
end
it 'does not return registries when selected shards to sync does not include the shard_name' do
subject = described_class.new(current_node: node, shard_name: 'default', batch_size: 100)
expect(subject.execute).to be_empty
end
it 'returns projects that belong to the shards with a dirty entry on the tracking database' do
project_5 = create(:project, group: group_1)
project_5.update_column(:repository_storage, 'foo')
create(:geo_project_registry, :synced, project: project_5)
subject = described_class.new(current_node: node, shard_name: 'foo', batch_size: 100)
expect(subject.execute).to match_ids(project_4)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment