Commit 9910e714 authored by James Fargher's avatar James Fargher

Allow setting max concurrency for gitaly-backup

Uses the existing env var GITLAB_BACKUP_MAX_CONCURRENCY to control
concurrency in gitaly-backup. gitaly-backup is behind a feature flag.
parent 669b4712
......@@ -3,8 +3,9 @@
module Backup
# Backup and restores repositories using gitaly-backup
class GitalyBackup
def initialize(progress)
def initialize(progress, parallel: nil)
@progress = progress
@parallel = parallel
end
def start(type)
......@@ -19,8 +20,11 @@ module Backup
raise Error, "unknown backup type: #{type}"
end
args = []
args += ['-parallel', @parallel.to_s] if type == :create && @parallel
@read_io, @write_io = IO.pipe
@pid = Process.spawn(bin_path, command, '-path', backup_repos_path, in: @read_io, out: progress)
@pid = Process.spawn(bin_path, command, '-path', backup_repos_path, *args, in: @read_io, out: @progress)
end
def wait
......@@ -48,9 +52,11 @@ module Backup
}.merge(Gitlab::GitalyClient.connection_data(repository.storage)).to_json)
end
private
def parallel_enqueue?
false
end
attr_reader :progress
private
def started?
@pid.present?
......
......@@ -44,6 +44,10 @@ module Backup
end
end
def parallel_enqueue?
true
end
private
attr_reader :progress
......
......@@ -12,7 +12,10 @@ module Backup
def dump(max_concurrency:, max_storage_concurrency:)
strategy.start(:create)
if max_concurrency <= 1 && max_storage_concurrency <= 1
# gitaly-backup is designed to handle concurrency on its own. So we want
# to avoid entering the buggy concurrency code here when gitaly-backup
# is enabled.
if (max_concurrency <= 1 && max_storage_concurrency <= 1) || !strategy.parallel_enqueue?
return enqueue_consecutive
end
......
......@@ -297,7 +297,8 @@ namespace :gitlab do
def repository_backup_strategy
if Feature.enabled?(:gitaly_backup)
Backup::GitalyBackup.new(progress)
max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence
Backup::GitalyBackup.new(progress, parallel: max_concurrency)
else
Backup::GitalyRpcBackup.new(progress)
end
......
......@@ -3,6 +3,7 @@
require 'spec_helper'
RSpec.describe Backup::GitalyBackup do
let(:parallel) { nil }
let(:progress) do
Tempfile.new('progress').tap do |progress|
progress.unlink
......@@ -13,7 +14,7 @@ RSpec.describe Backup::GitalyBackup do
progress.close
end
subject { described_class.new(progress) }
subject { described_class.new(progress, parallel: parallel) }
context 'unknown' do
it 'fails to start unknown' do
......@@ -30,6 +31,8 @@ RSpec.describe Backup::GitalyBackup do
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
expect(Process).to receive(:spawn).with(anything, 'create', '-path', anything, { in: anything, out: progress }).and_call_original
subject.start(:create)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
......@@ -45,6 +48,17 @@ RSpec.describe Backup::GitalyBackup do
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end
context 'parallel option set' do
let(:parallel) { 3 }
it 'passes parallel option through' do
expect(Process).to receive(:spawn).with(anything, 'create', '-path', anything, '-parallel', '3', { in: anything, out: progress }).and_call_original
subject.start(:create)
subject.wait
end
end
it 'raises when the exit code not zero' do
expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false'))
......@@ -83,6 +97,8 @@ RSpec.describe Backup::GitalyBackup do
copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle')
copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle')
expect(Process).to receive(:spawn).with(anything, 'restore', '-path', anything, { in: anything, out: progress }).and_call_original
subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI)
......@@ -100,6 +116,17 @@ RSpec.describe Backup::GitalyBackup do
expect(collect_commit_shas.call(project_snippet.repository)).to eq(['6e44ba56a4748be361a841e759c20e421a1651a1'])
end
context 'parallel option set' do
let(:parallel) { 3 }
it 'does not pass parallel option through' do
expect(Process).to receive(:spawn).with(anything, 'restore', '-path', anything, { in: anything, out: progress }).and_call_original
subject.start(:restore)
subject.wait
end
end
it 'raises when the exit code not zero' do
expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false'))
......
......@@ -4,7 +4,8 @@ require 'spec_helper'
RSpec.describe Backup::Repositories do
let(:progress) { spy(:stdout) }
let(:strategy) { spy(:strategy) }
let(:parallel_enqueue) { true }
let(:strategy) { spy(:strategy, parallel_enqueue?: parallel_enqueue) }
subject { described_class.new(progress, strategy: strategy) }
......@@ -80,6 +81,22 @@ RSpec.describe Backup::Repositories do
end
end
context 'concurrency with a strategy without parallel enqueueing support' do
let(:parallel_enqueue) { false }
it 'enqueues all projects sequentially' do
expect(Thread).not_to receive(:new)
expect(strategy).to receive(:start).with(:create)
projects.each do |project|
expect(strategy).to receive(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 2, max_storage_concurrency: 2)
end
end
[4, 10].each do |max_storage_concurrency|
context "max_storage_concurrency #{max_storage_concurrency}", quarantine: 'https://gitlab.com/gitlab-org/gitlab/-/issues/241701' do
let(:storage_keys) { %w[default test_second_storage] }
......
......@@ -377,6 +377,11 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end
it 'passes through concurrency environment variables' do
# The way concurrency is handled will change with the `gitaly_backup`
# feature flag. For now we need to check that both ways continue to
# work. This will be cleaned up in the rollout issue.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/333034
stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5)
stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2)
......@@ -385,6 +390,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
.with(max_concurrency: 5, max_storage_concurrency: 2)
.and_call_original
end
expect(::Backup::GitalyBackup).to receive(:new).with(anything, parallel: 5).and_call_original
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment