Commit cdc479e7 authored by Max Woolf's avatar Max Woolf

Merge branch 'gitaly-backup-parallel' into 'master'

Allow setting max concurrency for gitaly-backup

See merge request gitlab-org/gitlab!64466
parents 09439a4a 9910e714
...@@ -3,8 +3,9 @@ ...@@ -3,8 +3,9 @@
module Backup module Backup
# Backup and restores repositories using gitaly-backup # Backup and restores repositories using gitaly-backup
class GitalyBackup class GitalyBackup
def initialize(progress) def initialize(progress, parallel: nil)
@progress = progress @progress = progress
@parallel = parallel
end end
def start(type) def start(type)
...@@ -19,8 +20,11 @@ module Backup ...@@ -19,8 +20,11 @@ module Backup
raise Error, "unknown backup type: #{type}" raise Error, "unknown backup type: #{type}"
end end
args = []
args += ['-parallel', @parallel.to_s] if type == :create && @parallel
@read_io, @write_io = IO.pipe @read_io, @write_io = IO.pipe
@pid = Process.spawn(bin_path, command, '-path', backup_repos_path, in: @read_io, out: progress) @pid = Process.spawn(bin_path, command, '-path', backup_repos_path, *args, in: @read_io, out: @progress)
end end
def wait def wait
...@@ -48,9 +52,11 @@ module Backup ...@@ -48,9 +52,11 @@ module Backup
}.merge(Gitlab::GitalyClient.connection_data(repository.storage)).to_json) }.merge(Gitlab::GitalyClient.connection_data(repository.storage)).to_json)
end end
private def parallel_enqueue?
false
end
attr_reader :progress private
def started? def started?
@pid.present? @pid.present?
......
...@@ -44,6 +44,10 @@ module Backup ...@@ -44,6 +44,10 @@ module Backup
end end
end end
def parallel_enqueue?
true
end
private private
attr_reader :progress attr_reader :progress
......
...@@ -12,7 +12,10 @@ module Backup ...@@ -12,7 +12,10 @@ module Backup
def dump(max_concurrency:, max_storage_concurrency:) def dump(max_concurrency:, max_storage_concurrency:)
strategy.start(:create) strategy.start(:create)
if max_concurrency <= 1 && max_storage_concurrency <= 1 # gitaly-backup is designed to handle concurrency on its own. So we want
# to avoid entering the buggy concurrency code here when gitaly-backup
# is enabled.
if (max_concurrency <= 1 && max_storage_concurrency <= 1) || !strategy.parallel_enqueue?
return enqueue_consecutive return enqueue_consecutive
end end
......
...@@ -298,7 +298,8 @@ namespace :gitlab do ...@@ -298,7 +298,8 @@ namespace :gitlab do
def repository_backup_strategy def repository_backup_strategy
if Feature.enabled?(:gitaly_backup) if Feature.enabled?(:gitaly_backup)
Backup::GitalyBackup.new(progress) max_concurrency = ENV['GITLAB_BACKUP_MAX_CONCURRENCY'].presence
Backup::GitalyBackup.new(progress, parallel: max_concurrency)
else else
Backup::GitalyRpcBackup.new(progress) Backup::GitalyRpcBackup.new(progress)
end end
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Backup::GitalyBackup do RSpec.describe Backup::GitalyBackup do
let(:parallel) { nil }
let(:progress) do let(:progress) do
Tempfile.new('progress').tap do |progress| Tempfile.new('progress').tap do |progress|
progress.unlink progress.unlink
...@@ -13,7 +14,7 @@ RSpec.describe Backup::GitalyBackup do ...@@ -13,7 +14,7 @@ RSpec.describe Backup::GitalyBackup do
progress.close progress.close
end end
subject { described_class.new(progress) } subject { described_class.new(progress, parallel: parallel) }
context 'unknown' do context 'unknown' do
it 'fails to start unknown' do it 'fails to start unknown' do
...@@ -30,6 +31,8 @@ RSpec.describe Backup::GitalyBackup do ...@@ -30,6 +31,8 @@ RSpec.describe Backup::GitalyBackup do
project_snippet = create(:project_snippet, :repository, project: project) project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner) personal_snippet = create(:personal_snippet, :repository, author: project.owner)
expect(Process).to receive(:spawn).with(anything, 'create', '-path', anything, { in: anything, out: progress }).and_call_original
subject.start(:create) subject.start(:create)
subject.enqueue(project, Gitlab::GlRepository::PROJECT) subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI) subject.enqueue(project, Gitlab::GlRepository::WIKI)
...@@ -45,6 +48,17 @@ RSpec.describe Backup::GitalyBackup do ...@@ -45,6 +48,17 @@ RSpec.describe Backup::GitalyBackup do
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end end
context 'parallel option set' do
let(:parallel) { 3 }
it 'passes parallel option through' do
expect(Process).to receive(:spawn).with(anything, 'create', '-path', anything, '-parallel', '3', { in: anything, out: progress }).and_call_original
subject.start(:create)
subject.wait
end
end
it 'raises when the exit code not zero' do it 'raises when the exit code not zero' do
expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false')) expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false'))
...@@ -83,6 +97,8 @@ RSpec.describe Backup::GitalyBackup do ...@@ -83,6 +97,8 @@ RSpec.describe Backup::GitalyBackup do
copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle') copy_bundle_to_backup_path('personal_snippet_repo.bundle', personal_snippet.disk_path + '.bundle')
copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle') copy_bundle_to_backup_path('project_snippet_repo.bundle', project_snippet.disk_path + '.bundle')
expect(Process).to receive(:spawn).with(anything, 'restore', '-path', anything, { in: anything, out: progress }).and_call_original
subject.start(:restore) subject.start(:restore)
subject.enqueue(project, Gitlab::GlRepository::PROJECT) subject.enqueue(project, Gitlab::GlRepository::PROJECT)
subject.enqueue(project, Gitlab::GlRepository::WIKI) subject.enqueue(project, Gitlab::GlRepository::WIKI)
...@@ -100,6 +116,17 @@ RSpec.describe Backup::GitalyBackup do ...@@ -100,6 +116,17 @@ RSpec.describe Backup::GitalyBackup do
expect(collect_commit_shas.call(project_snippet.repository)).to eq(['6e44ba56a4748be361a841e759c20e421a1651a1']) expect(collect_commit_shas.call(project_snippet.repository)).to eq(['6e44ba56a4748be361a841e759c20e421a1651a1'])
end end
context 'parallel option set' do
let(:parallel) { 3 }
it 'does not pass parallel option through' do
expect(Process).to receive(:spawn).with(anything, 'restore', '-path', anything, { in: anything, out: progress }).and_call_original
subject.start(:restore)
subject.wait
end
end
it 'raises when the exit code not zero' do it 'raises when the exit code not zero' do
expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false')) expect(subject).to receive(:bin_path).and_return(Gitlab::Utils.which('false'))
......
...@@ -4,7 +4,8 @@ require 'spec_helper' ...@@ -4,7 +4,8 @@ require 'spec_helper'
RSpec.describe Backup::Repositories do RSpec.describe Backup::Repositories do
let(:progress) { spy(:stdout) } let(:progress) { spy(:stdout) }
let(:strategy) { spy(:strategy) } let(:parallel_enqueue) { true }
let(:strategy) { spy(:strategy, parallel_enqueue?: parallel_enqueue) }
subject { described_class.new(progress, strategy: strategy) } subject { described_class.new(progress, strategy: strategy) }
...@@ -80,6 +81,22 @@ RSpec.describe Backup::Repositories do ...@@ -80,6 +81,22 @@ RSpec.describe Backup::Repositories do
end end
end end
context 'concurrency with a strategy without parallel enqueueing support' do
let(:parallel_enqueue) { false }
it 'enqueues all projects sequentially' do
expect(Thread).not_to receive(:new)
expect(strategy).to receive(:start).with(:create)
projects.each do |project|
expect(strategy).to receive(:enqueue).with(project, Gitlab::GlRepository::PROJECT)
end
expect(strategy).to receive(:wait)
subject.dump(max_concurrency: 2, max_storage_concurrency: 2)
end
end
[4, 10].each do |max_storage_concurrency| [4, 10].each do |max_storage_concurrency|
context "max_storage_concurrency #{max_storage_concurrency}", quarantine: 'https://gitlab.com/gitlab-org/gitlab/-/issues/241701' do context "max_storage_concurrency #{max_storage_concurrency}", quarantine: 'https://gitlab.com/gitlab-org/gitlab/-/issues/241701' do
let(:storage_keys) { %w[default test_second_storage] } let(:storage_keys) { %w[default test_second_storage] }
......
...@@ -394,6 +394,11 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -394,6 +394,11 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end end
it 'passes through concurrency environment variables' do it 'passes through concurrency environment variables' do
# The way concurrency is handled will change with the `gitaly_backup`
# feature flag. For now we need to check that both ways continue to
# work. This will be cleaned up in the rollout issue.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/333034
stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5) stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5)
stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2)
...@@ -402,6 +407,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -402,6 +407,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
.with(max_concurrency: 5, max_storage_concurrency: 2) .with(max_concurrency: 5, max_storage_concurrency: 2)
.and_call_original .and_call_original
end end
expect(::Backup::GitalyBackup).to receive(:new).with(anything, parallel: 5).and_call_original
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout_from_any_process
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment