Commit bab05d82 authored by Francisco Javier López's avatar Francisco Javier López Committed by Jan Provaznik

Add snippets to gitlab backups

In this commit, we add snippets to GitLab backups. Well, not
exactly snippets but the snippet repositories which weren't
added before.
parent 3dc10a64
......@@ -5,6 +5,10 @@ module Shardable
included do
belongs_to :shard
scope :for_repository_storage, -> (repository_storage) { joins(:shard).where(shards: { name: repository_storage }) }
scope :excluding_repository_storage, -> (repository_storage) { joins(:shard).where.not(shards: { name: repository_storage }) }
validates :shard, presence: true
end
......
......@@ -676,8 +676,6 @@ class Project < ApplicationRecord
scope :joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :for_group, -> (group) { where(group: group) }
scope :for_group_and_its_subgroups, ->(group) { where(namespace_id: group.self_and_descendants.select(:id)) }
scope :for_repository_storage, -> (repository_storage) { where(repository_storage: repository_storage) }
scope :excluding_repository_storage, -> (repository_storage) { where.not(repository_storage: repository_storage) }
class << self
# Searches for a list of projects based on the query given in `query`.
......
---
title: Add snippets to GitLab backups
merge_request: 43694
author:
type: changed
......@@ -54,6 +54,7 @@ including:
- LFS objects
- Container Registry images
- GitLab Pages content
- Snippets
CAUTION: **Warning:**
GitLab does not back up any configuration files, SSL certificates, or system
......@@ -1081,7 +1082,7 @@ For more information, see:
- PostgreSQL issue tracker:
- [Not being a superuser](https://www.postgresql.org/message-id/201110220712.30886.adrian.klaver@gmail.com).
- [Having different owners](https://www.postgresql.org/message-id/2039.1177339749@sss.pgh.pa.us).
- Stack Overflow: [Resulting errors](https://stackoverflow.com/questions/4368789/error-must-be-owner-of-language-plpgsql).
### When the secrets file is lost
......
......@@ -17,9 +17,7 @@ module Backup
return dump_consecutive
end
if Project.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
raise Error, 'repositories.storages in gitlab.yml is misconfigured'
end
check_valid_storages!
semaphore = Concurrent::Semaphore.new(max_concurrency)
errors = Queue.new
......@@ -53,16 +51,16 @@ module Backup
private
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
def check_valid_storages!
[ProjectRepository, SnippetRepository].each do |klass|
if klass.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
raise Error, "repositories.storages in gitlab.yml does not include all storages used by #{klass}"
end
end
end
def backup_repos_path
File.join(Gitlab.config.backup.path, 'repositories')
@backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end
def prepare
......@@ -72,11 +70,20 @@ module Backup
end
def dump_consecutive
Project.includes(:route, :group, namespace: :owner).find_each(batch_size: 1000) do |project|
dump_consecutive_projects
dump_consecutive_snippets
end
def dump_consecutive_projects
project_relation.find_each(batch_size: 1000) do |project|
dump_project(project)
end
end
def dump_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| dump_snippet(snippet) }
end
def dump_storage(storage, semaphore, max_storage_concurrency:)
errors = Queue.new
queue = InterlockSizedQueue.new(1)
......@@ -84,13 +91,18 @@ module Backup
threads = Array.new(max_storage_concurrency) do
Thread.new do
Rails.application.executor.wrap do
while project = queue.pop
while container = queue.pop
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
semaphore.acquire
end
begin
dump_project(project)
case container
when Project
dump_project(container)
when Snippet
dump_snippet(container)
end
rescue => e
errors << e
break
......@@ -102,11 +114,7 @@ module Backup
end
end
Project.for_repository_storage(storage).includes(:route, :group, namespace: :owner).find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
enqueue_records_for_storage(storage, queue, errors)
raise errors.pop unless errors.empty?
ensure
......@@ -122,6 +130,36 @@ module Backup
backup_repository(project, Gitlab::GlRepository::DESIGN)
end
def dump_snippet(snippet)
backup_repository(snippet, Gitlab::GlRepository::SNIPPET)
end
def enqueue_records_for_storage(storage, queue, errors)
records_to_enqueue(storage).each do |relation|
relation.find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
end
end
def records_to_enqueue(storage)
[projects_in_storage(storage), snippets_in_storage(storage)]
end
def projects_in_storage(storage)
project_relation.id_in(ProjectRepository.for_repository_storage(storage).select(:project_id))
end
def project_relation
Project.includes(:route, :group, namespace: :owner)
end
def snippets_in_storage(storage)
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def backup_repository(container, type)
BackupRestore.new(
progress,
......@@ -130,6 +168,14 @@ module Backup
).backup
end
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
end
def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..."
......
......@@ -21,15 +21,19 @@ RSpec.describe Backup::Repositories do
RSpec.shared_examples 'creates repository bundles' do
specify :aggregate_failures do
# Add data to the wiki and design repositories, so they will be included in the dump.
# Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end
end
......
......@@ -8,6 +8,11 @@ RSpec.describe ProjectRepository do
it { is_expected.to belong_to(:project) }
end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:project_repository) }
let_it_be(:record_2, reload: true) { create(:project_repository) }
end
describe '.find_project' do
it 'finds project by disk path' do
project = create(:project)
......
......@@ -5567,32 +5567,6 @@ RSpec.describe Project do
end
end
describe '.for_repository_storage' do
it 'returns the projects for a given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'default')
create(:project, repository_storage: 'test_second_storage')
expect(described_class.for_repository_storage('default')).to eq([expected_project])
end
end
describe '.excluding_repository_storage' do
it 'returns the projects excluding the given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'test_second_storage')
create(:project, repository_storage: 'default')
expect(described_class.excluding_repository_storage('default')).to eq([expected_project])
end
end
describe '.deployments' do
subject { project.deployments }
......
......@@ -13,6 +13,11 @@ RSpec.describe SnippetRepository do
it { is_expected.to belong_to(:snippet) }
end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:snippet_repository) }
let_it_be(:record_2, reload: true) { create(:snippet_repository) }
end
describe '.find_snippet' do
it 'finds snippet by disk path' do
snippet = create(:snippet, author: user)
......
# frozen_string_literal: true
RSpec.shared_examples 'shardable scopes' do
let_it_be(:secondary_shard) { create(:shard, name: 'test_second_storage') }
before do
record_2.update!(shard: secondary_shard)
end
describe '.for_repository_storage' do
it 'returns the objects for a given repository storage' do
expect(described_class.for_repository_storage('default')).to eq([record_1])
end
end
describe '.excluding_repository_storage' do
it 'returns the objects excluding the given repository storage' do
expect(described_class.excluding_repository_storage('default')).to eq([record_2])
end
end
end
......@@ -284,67 +284,74 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end
context 'multiple repository storages' do
let_it_be(:default_storage_hash) { Gitlab.config.repositories.storages.default.to_h }
include StubConfiguration
let(:default_storage_name) { 'default' }
let(:second_storage_name) { 'test_second_storage' }
before do
# We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,registry')
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
# Avoid asking gitaly about the root ref (which will fail because of the
# mocked storages)
allow_any_instance_of(Repository).to receive(:empty?).and_return(false)
FileUtils.mkdir_p(b_storage_dir)
# Even when overriding the storage, we have to move it there, so it exists
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
FileUtils.mv(
File.join(Settings.absolute(storages['default'].legacy_disk_path), project_b.repository.disk_path + '.git'),
Rails.root.join(storages['test_second_storage'].legacy_disk_path, project_b.repository.disk_path + '.git')
)
end
stub_storage_settings( second_storage_name => {
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address,
'path' => TestEnv::SECOND_STORAGE_PATH
})
end
after do
FileUtils.rm_rf(test_second_storage_dir)
end
let(:test_second_storage_dir) { Dir.mktmpdir }
let(:test_second_storage) do
Gitlab::GitalyClient::StorageSettings.new(default_storage_hash.merge('path' => test_second_storage_dir))
end
let(:storages) do
{
'default' => Gitlab.config.repositories.storages.default,
'test_second_storage' => test_second_storage
}
end
let!(:project_a) { create(:project, :repository) }
let!(:project_a_wiki_page) { create(:wiki_page, container: project_a) }
let!(:project_a_design) { create(:design, :with_file, issue: create(:issue, project: project_a)) }
let!(:project_b) { create(:project, :repository, repository_storage: 'test_second_storage') }
let!(:b_storage_dir) { File.join(test_second_storage_dir, File.dirname(project_b.disk_path)) }
shared_examples 'includes repositories in all repository storages' do
specify :aggregate_failures do
project_a = create(:project, :repository)
project_a.track_project_repository
project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.owner)
project_b = create(:project, :repository, repository_storage: second_storage_name)
project_b.track_project_repository
project_snippet_b = create(:project_snippet, :repository, project: project_b, author: project_b.owner)
project_snippet_b.snippet_repository.update!(shard: project_b.project_repository.shard)
create(:wiki_page, container: project_a)
create(:design, :with_file, issue: create(:issue, project: project_a))
move_repository_to_secondary(project_b)
move_repository_to_secondary(project_snippet_b)
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout
tar_contents, exit_status = Gitlab::Popen.popen(
%W{tar -tvf #{backup_tar} repositories}
)
tar_lines = tar_contents.lines.grep(/\.bundle/)
expect(exit_status).to eq(0)
expect(tar_contents).to include(
"repositories/#{project_a.disk_path}.bundle",
"repositories/#{project_a.disk_path}.wiki.bundle",
"repositories/#{project_a.disk_path}.design.bundle",
"repositories/#{project_b.disk_path}.bundle"
)
[
"#{project_a.disk_path}.bundle",
"#{project_a.disk_path}.wiki.bundle",
"#{project_a.disk_path}.design.bundle",
"#{project_b.disk_path}.bundle",
"#{project_snippet_a.disk_path}.bundle",
"#{project_snippet_b.disk_path}.bundle"
].each do |repo_name|
repo_lines = tar_lines.grep(/#{repo_name}/)
expect(repo_lines.size).to eq 1
# Checking that the size of the bundle is bigger than 0
expect(repo_lines.first.split[4].to_i > 0).to be true
end
end
def move_repository_to_secondary(record)
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
default_shard_legacy_path = Gitlab.config.repositories.storages.default.legacy_disk_path
secondary_legacy_path = Gitlab.config.repositories.storages[second_storage_name].legacy_disk_path
dst_dir = File.join(secondary_legacy_path, File.dirname(record.disk_path))
FileUtils.mkdir_p(dst_dir) unless Dir.exist?(dst_dir)
FileUtils.mv(
File.join(default_shard_legacy_path, record.disk_path + '.git'),
File.join(secondary_legacy_path, record.disk_path + '.git')
)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment