Commit edab429c authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'backup-improvements' into 'master'

Reduce disk IO and space usage during backups

This is based on improvements made to the GitLab CI 8.0 backup script.

- Avoid creating many small intermediate files while backing up builds and uploads by using tar and light gzip compression
- Use same backup/restore code for uploads and builds
- Only store a compressed intermediate DB dump

See merge request !1520
parents 329e067f 58260a03
module Backup module Backup
class Builds class Builds < Files
attr_reader :app_builds_dir, :backup_builds_dir, :backup_dir
def initialize def initialize
@app_builds_dir = Settings.gitlab_ci.builds_path super('builds', Settings.gitlab_ci.builds_path)
@backup_dir = Gitlab.config.backup.path
@backup_builds_dir = File.join(Gitlab.config.backup.path, 'builds')
end
# Copy builds from builds directory to backup/builds
def dump
FileUtils.rm_rf(backup_builds_dir)
# Ensure the parent dir of backup_builds_dir exists
FileUtils.mkdir_p(Gitlab.config.backup.path)
# Fail if somebody raced to create backup_builds_dir before us
FileUtils.mkdir(backup_builds_dir, mode: 0700)
FileUtils.cp_r(app_builds_dir, backup_dir)
end
def restore
backup_existing_builds_dir
FileUtils.cp_r(backup_builds_dir, app_builds_dir)
end end
def backup_existing_builds_dir def create_files_dir
timestamped_builds_path = File.join(app_builds_dir, '..', "builds.#{Time.now.to_i}") Dir.mkdir(app_files_dir, 0700)
if File.exists?(app_builds_dir)
FileUtils.mv(app_builds_dir, File.expand_path(timestamped_builds_path))
end
end end
end end
end end
...@@ -2,26 +2,26 @@ require 'yaml' ...@@ -2,26 +2,26 @@ require 'yaml'
module Backup module Backup
class Database class Database
attr_reader :config, :db_dir attr_reader :config, :db_file_name
def initialize def initialize
@config = YAML.load_file(File.join(Rails.root,'config','database.yml'))[Rails.env] @config = YAML.load_file(File.join(Rails.root,'config','database.yml'))[Rails.env]
@db_dir = File.join(Gitlab.config.backup.path, 'db') @db_file_name = File.join(Gitlab.config.backup.path, 'db', 'database.sql.gz')
end end
def dump def dump
FileUtils.rm_rf(@db_dir) FileUtils.mkdir_p(File.dirname(db_file_name))
# Ensure the parent dir of @db_dir exists FileUtils.rm_f(db_file_name)
FileUtils.mkdir_p(Gitlab.config.backup.path) compress_rd, compress_wr = IO.pipe
# Fail if somebody raced to create @db_dir before us compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600])
FileUtils.mkdir(@db_dir, mode: 0700) compress_rd.close
success = case config["adapter"] dump_pid = case config["adapter"]
when /^mysql/ then when /^mysql/ then
$progress.print "Dumping MySQL database #{config['database']} ... " $progress.print "Dumping MySQL database #{config['database']} ... "
# Workaround warnings from MySQL 5.6 about passwords on cmd line # Workaround warnings from MySQL 5.6 about passwords on cmd line
ENV['MYSQL_PWD'] = config["password"].to_s if config["password"] ENV['MYSQL_PWD'] = config["password"].to_s if config["password"]
system('mysqldump', *mysql_args, config['database'], out: db_file_name) spawn('mysqldump', *mysql_args, config['database'], out: compress_wr)
when "postgresql" then when "postgresql" then
$progress.print "Dumping PostgreSQL database #{config['database']} ... " $progress.print "Dumping PostgreSQL database #{config['database']} ... "
pg_env pg_env
...@@ -30,48 +30,42 @@ module Backup ...@@ -30,48 +30,42 @@ module Backup
pgsql_args << "-n" pgsql_args << "-n"
pgsql_args << Gitlab.config.backup.pg_schema pgsql_args << Gitlab.config.backup.pg_schema
end end
system('pg_dump', *pgsql_args, config['database'], out: db_file_name) spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr)
end end
report_success(success) compress_wr.close
abort 'Backup failed' unless success
success = [compress_pid, dump_pid].all? { |pid| Process.waitpid(pid); $?.success? }
$progress.print 'Compressing database ... '
success = system('gzip', db_file_name)
report_success(success) report_success(success)
abort 'Backup failed: compress error' unless success abort 'Backup failed' unless success
end end
def restore def restore
$progress.print 'Decompressing database ... ' decompress_rd, decompress_wr = IO.pipe
success = system('gzip', '-d', db_file_name_gz) decompress_pid = spawn(*%W(gzip -cd), out: decompress_wr, in: db_file_name)
report_success(success) decompress_wr.close
abort 'Restore failed: decompress error' unless success
success = case config["adapter"] restore_pid = case config["adapter"]
when /^mysql/ then when /^mysql/ then
$progress.print "Restoring MySQL database #{config['database']} ... " $progress.print "Restoring MySQL database #{config['database']} ... "
# Workaround warnings from MySQL 5.6 about passwords on cmd line # Workaround warnings from MySQL 5.6 about passwords on cmd line
ENV['MYSQL_PWD'] = config["password"].to_s if config["password"] ENV['MYSQL_PWD'] = config["password"].to_s if config["password"]
system('mysql', *mysql_args, config['database'], in: db_file_name) spawn('mysql', *mysql_args, config['database'], in: decompress_rd)
when "postgresql" then when "postgresql" then
$progress.print "Restoring PostgreSQL database #{config['database']} ... " $progress.print "Restoring PostgreSQL database #{config['database']} ... "
pg_env pg_env
system('psql', config['database'], '-f', db_file_name) spawn('psql', config['database'], in: decompress_rd)
end end
decompress_rd.close
success = [decompress_pid, restore_pid].all? { |pid| Process.waitpid(pid); $?.success? }
report_success(success) report_success(success)
abort 'Restore failed' unless success abort 'Restore failed' unless success
end end
protected protected
def db_file_name
File.join(db_dir, 'database.sql')
end
def db_file_name_gz
File.join(db_dir, 'database.sql.gz')
end
def mysql_args def mysql_args
args = { args = {
'host' => '--host', 'host' => '--host',
......
require 'open3'
module Backup
class Files
attr_reader :name, :app_files_dir, :backup_tarball, :files_parent_dir
def initialize(name, app_files_dir)
@name = name
@app_files_dir = File.realpath(app_files_dir)
@files_parent_dir = File.realpath(File.join(@app_files_dir, '..'))
@backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz')
end
# Copy files from public/files to backup/files
def dump
FileUtils.mkdir_p(Gitlab.config.backup.path)
FileUtils.rm_f(backup_tarball)
run_pipeline!([%W(tar -C #{app_files_dir} -cf - .), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600])
end
def restore
backup_existing_files_dir
create_files_dir
run_pipeline!([%W(gzip -cd), %W(tar -C #{app_files_dir} -xf -)], in: backup_tarball)
end
def backup_existing_files_dir
timestamped_files_path = File.join(files_parent_dir, "#{name}.#{Time.now.to_i}")
if File.exists?(app_files_dir)
FileUtils.mv(app_files_dir, File.expand_path(timestamped_files_path))
end
end
def run_pipeline!(cmd_list, options={})
status_list = Open3.pipeline(*cmd_list, options)
abort 'Backup failed' unless status_list.compact.all?(&:success?)
end
end
end
...@@ -150,11 +150,11 @@ module Backup ...@@ -150,11 +150,11 @@ module Backup
private private
def backup_contents def backup_contents
folders_to_backup + ["backup_information.yml"] folders_to_backup + ["uploads.tar.gz", "builds.tar.gz", "backup_information.yml"]
end end
def folders_to_backup def folders_to_backup
folders = %w{repositories db uploads builds} folders = %w{repositories db}
if ENV["SKIP"] if ENV["SKIP"]
return folders.reject{ |folder| ENV["SKIP"].include?(folder) } return folders.reject{ |folder| ENV["SKIP"].include?(folder) }
......
module Backup module Backup
class Uploads class Uploads < Files
attr_reader :app_uploads_dir, :backup_uploads_dir, :backup_dir
def initialize def initialize
@app_uploads_dir = File.realpath(Rails.root.join('public', 'uploads')) super('uploads', Rails.root.join('public/uploads'))
@backup_dir = Gitlab.config.backup.path
@backup_uploads_dir = File.join(Gitlab.config.backup.path, 'uploads')
end end
# Copy uploads from public/uploads to backup/uploads def create_files_dir
def dump Dir.mkdir(app_files_dir)
FileUtils.rm_rf(backup_uploads_dir)
# Ensure the parent dir of backup_uploads_dir exists
FileUtils.mkdir_p(Gitlab.config.backup.path)
# Fail if somebody raced to create backup_uploads_dir before us
FileUtils.mkdir(backup_uploads_dir, mode: 0700)
FileUtils.cp_r(app_uploads_dir, backup_dir)
end
def restore
backup_existing_uploads_dir
FileUtils.cp_r(backup_uploads_dir, app_uploads_dir)
end
def backup_existing_uploads_dir
timestamped_uploads_path = File.join(app_uploads_dir, '..', "uploads.#{Time.now.to_i}")
if File.exists?(app_uploads_dir)
FileUtils.mv(app_uploads_dir, File.expand_path(timestamped_uploads_path))
end
end end
end end
end end
...@@ -55,6 +55,7 @@ describe 'gitlab:app namespace rake task' do ...@@ -55,6 +55,7 @@ describe 'gitlab:app namespace rake task' do
expect(Rake::Task["gitlab:backup:db:restore"]).to receive(:invoke) expect(Rake::Task["gitlab:backup:db:restore"]).to receive(:invoke)
expect(Rake::Task["gitlab:backup:repo:restore"]).to receive(:invoke) expect(Rake::Task["gitlab:backup:repo:restore"]).to receive(:invoke)
expect(Rake::Task["gitlab:backup:builds:restore"]).to receive(:invoke) expect(Rake::Task["gitlab:backup:builds:restore"]).to receive(:invoke)
expect(Rake::Task["gitlab:backup:uploads:restore"]).to receive(:invoke)
expect(Rake::Task["gitlab:shell:setup"]).to receive(:invoke) expect(Rake::Task["gitlab:shell:setup"]).to receive(:invoke)
expect { run_rake_task('gitlab:backup:restore') }.not_to raise_error expect { run_rake_task('gitlab:backup:restore') }.not_to raise_error
end end
...@@ -112,14 +113,14 @@ describe 'gitlab:app namespace rake task' do ...@@ -112,14 +113,14 @@ describe 'gitlab:app namespace rake task' do
it 'should set correct permissions on the tar contents' do it 'should set correct permissions on the tar contents' do
tar_contents, exit_status = Gitlab::Popen.popen( tar_contents, exit_status = Gitlab::Popen.popen(
%W{tar -tvf #{@backup_tar} db uploads repositories builds} %W{tar -tvf #{@backup_tar} db uploads.tar.gz repositories builds.tar.gz}
) )
expect(exit_status).to eq(0) expect(exit_status).to eq(0)
expect(tar_contents).to match('db/') expect(tar_contents).to match('db/')
expect(tar_contents).to match('uploads/') expect(tar_contents).to match('uploads.tar.gz')
expect(tar_contents).to match('repositories/') expect(tar_contents).to match('repositories/')
expect(tar_contents).to match('builds/') expect(tar_contents).to match('builds.tar.gz')
expect(tar_contents).not_to match(/^.{4,9}[rwx].* (db|uploads|repositories|builds)\/$/) expect(tar_contents).not_to match(/^.{4,9}[rwx].* (database.sql.gz|uploads.tar.gz|repositories|builds.tar.gz)\/$/)
end end
it 'should delete temp directories' do it 'should delete temp directories' do
...@@ -160,12 +161,12 @@ describe 'gitlab:app namespace rake task' do ...@@ -160,12 +161,12 @@ describe 'gitlab:app namespace rake task' do
it "does not contain skipped item" do it "does not contain skipped item" do
tar_contents, _exit_status = Gitlab::Popen.popen( tar_contents, _exit_status = Gitlab::Popen.popen(
%W{tar -tvf #{@backup_tar} db uploads repositories builds} %W{tar -tvf #{@backup_tar} db uploads.tar.gz repositories builds.tar.gz}
) )
expect(tar_contents).to match('db/') expect(tar_contents).to match('db/')
expect(tar_contents).to match('uploads/') expect(tar_contents).to match('uploads.tar.gz')
expect(tar_contents).to match('builds/') expect(tar_contents).to match('builds.tar.gz')
expect(tar_contents).not_to match('repositories/') expect(tar_contents).not_to match('repositories/')
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment