Commit 81f4dc05 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'zj-cleanup-port-gitaly' into 'master'

Port cleanup tasks to use Gitaly

Closes #40529 and gitaly#954

See merge request gitlab-org/gitlab-ce!21588
parents 30311a8b 3aedccb1
---
title: Administrative cleanup rake tasks now leverage Gitaly
merge_request: 21588
author:
type: changed
......@@ -5,6 +5,14 @@ module Gitlab
@storage = storage
end
# Returns all directories in the git storage directory, lexically ordered
def list_directories(depth: 1)
request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
GitalyClient.call(@storage, :storage_service, :list_directories, request)
.flat_map(&:paths)
end
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
......
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954
#
# frozen_string_literal: true
require 'set'
namespace :gitlab do
namespace :cleanup do
HASHED_REPOSITORY_NAME = '@hashed'.freeze
desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do
warn_user_is_not_gitlab
namespaces = Set.new(Namespace.pluck(:path))
namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
namespaces = Namespace.pluck(:path)
namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored
Gitlab.config.repositories.storages.each do |name, repository_storage|
git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
all_dirs = Dir.glob(git_base_path + '/*')
Gitaly::Server.all.each do |server|
all_dirs = Gitlab::GitalyClient::StorageService
.new(server.storage)
.list_directories(depth: 0)
.reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
puts git_base_path.color(:yellow)
puts "Looking for directories to remove... "
all_dirs.reject! do |dir|
# skip if git repo
dir =~ /.git$/
end
all_dirs.reject! do |dir|
dir_name = File.basename dir
# skip if namespace present
namespaces.include?(dir_name)
end
all_dirs.each do |dir_path|
if remove?
if FileUtils.rm_rf dir_path
puts "Removed...#{dir_path}".color(:red)
else
puts "Cannot remove #{dir_path}".color(:red)
begin
Gitlab::GitalyClient::NamespaceService.new(server.storage)
.remove(dir_path)
puts "Removed...#{dir_path}"
rescue StandardError => e
puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end
else
puts "Can be removed: #{dir_path}".color(:red)
......@@ -49,29 +38,29 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do
warn_user_is_not_gitlab
move_suffix = "+orphaned+#{Time.now.to_i}"
Gitlab.config.repositories.storages.each do |name, repository_storage|
repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
# Look for global repos (legacy, depth 1) and normal repos (depth 2)
IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find|
find.each_line do |path|
path.chomp!
repo_with_namespace = path
.sub(repo_root, '')
.sub(%r{^/*}, '')
.chomp('.git')
.chomp('.wiki')
# TODO ignoring hashed repositories for now. But revisit to fully support
# possible orphaned hashed repos
next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
new_path = path + move_suffix
puts path.inspect + ' -> ' + new_path.inspect
File.rename(path, new_path)
Gitaly::Server.all.each do |server|
Gitlab::GitalyClient::StorageService
.new(server.storage)
.list_directories
.each do |path|
repo_with_namespace = path.chomp('.git').chomp('.wiki')
# TODO ignoring hashed repositories for now. But revisit to fully support
# possible orphaned hashed repos
next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
next if Project.find_by_full_path(repo_with_namespace)
new_path = path + move_suffix
puts path.inspect + ' -> ' + new_path.inspect
begin
Gitlab::GitalyClient::NamespaceService
.new(server.storage)
.rename(path, new_path)
rescue StandardError => e
puts "Error occured while moving the repository: #{e.message}".color(:red)
end
end
end
......
......@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do
end
describe 'cleanup namespaces and repos' do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:storage) { storages.keys.first }
let(:storages) do
{
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
......@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do
end
before do
FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage'))
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
end
after do
FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage'))
Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories
end
describe 'cleanup:repos' do
before do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git'))
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
gitlab_shell.add_namespace(storage, 'broken/project.git')
gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
end
it 'moves it to an orphaned path' do
run_rake_task('gitlab:cleanup:repos')
repo_list = Dir['tmp/tests/default_storage/broken/*']
now = Time.now
Timecop.freeze(now) do
run_rake_task('gitlab:cleanup:repos')
repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0)
expect(repo_list.first).to include('+orphaned+')
expect(repo_list.last).to include("broken+orphaned+#{now.to_i}")
end
end
it 'ignores @hashed repos' do
run_rake_task('gitlab:cleanup:repos')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
describe 'cleanup:dirs' do
it 'removes missing namespaces' do
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git"))
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git"))
allow(Namespace).to receive(:pluck).and_return('namespace_1')
gitlab_shell.add_namespace(storage, "namespace_1/project.git")
gitlab_shell.add_namespace(storage, "namespace_2/project.git")
allow(Namespace).to receive(:pluck).and_return(['namespace_1'])
stub_env('REMOVE', 'true')
run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey
expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true)
expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false)
end
it 'ignores @hashed directory' do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment