Commit c92f37e8 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch...

Merge branch '11504-improve-geo-selective-sync-worker-to-avoid-repeated-individual-selects-on-project-registry-geo' into 'master'

Geo: Improve performance of selective sync worker

Closes #11504

See merge request gitlab-org/gitlab-ee!13222
parents cdecd7b1 b83f297f
......@@ -14,6 +14,18 @@ module Geo
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links
def projects_outside_selective_sync
projects = if selective_sync_by_namespaces?
projects_outside_selected_namespaces
elsif selective_sync_by_shards?
projects_outside_selected_shards
else
Geo::Fdw::Project.none
end
projects.inner_join_project_registry
end
def job_artifacts
Geo::Fdw::Ci::JobArtifact.all unless selective_sync?
......
......@@ -3,12 +3,15 @@
module Geo
module Fdw
class Namespace < ::Geo::BaseFdw
include Routable
self.primary_key = :id
self.inheritance_column = nil
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('namespaces')
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :geo_nodes, class_name: 'Geo::Fdw::GeoNode', through: :geo_node_namespace_links
belongs_to :parent, class_name: "Namespace"
end
end
end
......@@ -4,6 +4,7 @@ module Geo
module Fdw
class Project < ::Geo::BaseFdw
include Gitlab::SQL::Pattern
include Routable
self.primary_key = :id
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('projects')
......@@ -11,6 +12,32 @@ module Geo
has_many :job_artifacts, class_name: 'Geo::Fdw::Ci::JobArtifact'
has_many :lfs_objects_projects, class_name: 'Geo::Fdw::LfsObjectsProject'
has_many :lfs_objects, class_name: 'Geo::Fdw::LfsObject', through: :lfs_objects_projects
belongs_to :namespace, class_name: 'Geo::Fdw::Namespace'
scope :outside_shards, -> (shard_names) { where.not(repository_storage: Array(shard_names)) }
alias_method :parent, :namespace
delegate :disk_path, to: :storage
def hashed_storage?(feature)
raise ArgumentError, _("Invalid feature") unless ::Project::HASHED_STORAGE_FEATURES.include?(feature)
self.storage_version && self.storage_version >= ::Project::HASHED_STORAGE_FEATURES[feature]
end
def repository
@repository ||= Repository.new(full_path, self, disk_path: disk_path)
end
def storage
@storage ||=
if hashed_storage?(:repository)
Storage::HashedProject.new(self)
else
Storage::LegacyProject.new(self)
end
end
class << self
def missing_project_registry
......@@ -42,8 +69,6 @@ module Geo
where(repository_storage: Array(shard_names))
end
private
def inner_join_project_registry
join_statement =
arel_table
......@@ -53,6 +78,8 @@ module Geo
joins(join_statement.join_sources)
end
private
def left_outer_join_project_registry
join_statement =
arel_table
......
......@@ -14,10 +14,10 @@ module Geo
# rubocop:disable CodeReuse/ActiveRecord
def perform(geo_node_id)
try_obtain_lease do
node = GeoNode.find(geo_node_id)
node = Geo::Fdw::GeoNode.find(geo_node_id)
break unless node.selective_sync?
projects_to_clean_up(node).find_in_batches(batch_size: BATCH_SIZE) do |batch|
node.projects_outside_selective_sync.find_in_batches(batch_size: BATCH_SIZE) do |batch|
batch.each do |project|
clean_up_repositories(project)
end
......@@ -30,20 +30,7 @@ module Geo
private
def projects_to_clean_up(node)
if node.selective_sync_by_namespaces?
node.projects_outside_selected_namespaces
elsif node.selective_sync_by_shards?
node.projects_outside_selected_shards
else
Project.none
end
end
# rubocop:disable CodeReuse/ActiveRecord
def clean_up_repositories(project)
return unless Geo::ProjectRegistry.exists?(project_id: project.id)
job_id = ::Geo::RepositoryCleanupWorker.perform_async(project.id, project.name, project.disk_path, project.repository.storage)
if job_id
......@@ -52,7 +39,6 @@ module Geo
log_error('Could not schedule a repository clean up', project_id: project.id, shard: project.repository.storage, disk_path: project.disk_path)
end
end
# rubocop:enable CodeReuse/ActiveRecord
def lease_timeout
LEASE_TIMEOUT
......
---
title: 'Geo: Improve performance of clean up worker for selective sync'
merge_request:
author:
type: performance
......@@ -3,20 +3,20 @@
require 'spec_helper'
RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
context 'relationships' do
it { is_expected.to have_many(:geo_node_namespace_links).class_name('Geo::Fdw::GeoNodeNamespaceLink') }
it { is_expected.to have_many(:namespaces).class_name('Geo::Fdw::Namespace').through(:geo_node_namespace_links) }
end
describe '#projects', :geo_fdw do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
subject { described_class.find(node.id) }
it 'returns all registries without selective sync' do
......@@ -45,13 +45,6 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#project_registries', :geo_fdw do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
let!(:registry_1) { create(:geo_project_registry, project: project_1) }
let!(:registry_2) { create(:geo_project_registry, project: project_2) }
let!(:registry_3) { create(:geo_project_registry, project: project_3) }
......@@ -80,4 +73,62 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
expect(subject.project_registries).to be_empty
end
end
describe '#projects_outside_selective_sync', :geo_fdw do
subject { described_class.find(node.id) }
let(:synced_group) { create(:group) }
let(:synced_subgroup) { create(:group, parent: synced_group) }
let(:unsynced_group) { create(:group) }
let(:project_1) { create(:project, group: synced_group) }
let(:project_2) { create(:project, group: synced_group) }
let!(:project_3) { create(:project, :repository, group: unsynced_group) }
let(:project_4) { create(:project, :repository, group: unsynced_group) }
let(:project_5) { create(:project, group: synced_subgroup) }
let(:project_6) { create(:project, group: synced_subgroup) }
let(:project_7) { create(:project) }
let(:project_8) { create(:project) }
before do
create(:geo_project_registry, project: project_1)
create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_4)
create(:geo_project_registry, project: project_5)
create(:geo_project_registry, project: project_6)
create(:geo_project_registry, project: project_7)
create(:geo_project_registry, project: project_8)
end
def projects_to_fdw(projects)
projects.map { |project| Geo::Fdw::Project.find(project.id) }
end
context 'with selective sync by namespace' do
before do
node.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'returns projects that does not belong to the selected namespaces' do
expected_projects = projects_to_fdw([project_4, project_7, project_8])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
context 'with selective sync by shard' do
before do
project_7.update_column(:repository_storage, 'broken')
project_8.update_column(:repository_storage, 'broken')
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
end
it 'returns synced projects that does not belong to the selected shards' do
expected_projects = projects_to_fdw([project_1, project_2, project_4, project_5, project_6])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment