Commit ed000b5f authored by Nick Thomas's avatar Nick Thomas

Merge branch 'feature/hashed-storage-repo-import' into 'master'

Improve GitLab Import rake task to work with Hashed Storage and Subgroups

Closes #36509

See merge request gitlab-org/gitlab-ce!15157
parents 3a8cf276 1c8af321
---
title: Improve GitLab Import rake task to work with Hashed Storage and Subgroups
merge_request:
author:
type: changed
......@@ -3,49 +3,47 @@
## Notes
- The owner of the project will be the first admin
- The groups will be created as needed
- The groups will be created as needed, including subgroups
- The owner of the group will be the first admin
- Existing projects will be skipped
- The existing Git repos will be moved from disk (removed from the original path)
## How to use
### Create a new folder inside the git repositories path. This will be the name of the new group.
### Create a new folder to import your Git repositories from.
- For omnibus-gitlab, it is located at: `/var/opt/gitlab/git-data/repositories` by default, unless you changed
it in the `/etc/gitlab/gitlab.rb` file.
- For installations from source, it is usually located at: `/home/git/repositories` or you can see where
your repositories are located by looking at `config/gitlab.yml` under the `repositories => storages` entries
(you'll usually use the `default` storage path to start).
New folder needs to have git user ownership and read/write/execute access for git user and its group:
The new folder needs to have git user ownership and read/write/execute access for git user and its group:
```
sudo -u git mkdir /var/opt/gitlab/git-data/repositories/new_group
sudo -u git mkdir /var/opt/gitlab/git-data/repository-import-<date>/new_group
```
If you are using an installation from source, replace `/var/opt/gitlab/git-data`
with `/home/git`.
### Copy your bare repositories inside this newly created folder:
- Any .git repositories found on any of the subfolders will be imported as projects
- Groups will be created as needed, these could be nested folders. Example:
If we copy the repos to `/var/opt/gitlab/git-data/repository-import-<date>`, and repo A needs to be under the groups G1 and G2, it will
have to be created under those folders: `/var/opt/gitlab/git-data/repository-import-<date>/G1/G2/A.git`.
```
sudo cp -r /old/git/foo.git /var/opt/gitlab/git-data/repositories/new_group/
sudo cp -r /old/git/foo.git /var/opt/gitlab/git-data/repository-import-<date>/new_group/
# Do this once when you are done copying git repositories
sudo chown -R git:git /var/opt/gitlab/git-data/repositories/new_group/
sudo chown -R git:git /var/opt/gitlab/git-data/repository-import-<date>
```
`foo.git` needs to be owned by the git user and git users group.
If you are using an installation from source, replace `/var/opt/gitlab/git-data`
with `/home/git`.
If you are using an installation from source, replace `/var/opt/gitlab/` with `/home/git`.
### Run the command below depending on your type of installation:
#### Omnibus Installation
```
$ sudo gitlab-rake gitlab:import:repos
$ sudo gitlab-rake gitlab:import:repos['/var/opt/gitlab/git-data/repository-import-<date>']
```
#### Installation from source
......@@ -54,16 +52,21 @@ Before running this command you need to change the directory to where your GitLa
```
$ cd /home/git/gitlab
$ sudo -u git -H bundle exec rake gitlab:import:repos RAILS_ENV=production
$ sudo -u git -H bundle exec rake gitlab:import:repos['/var/opt/gitlab/git-data/repository-import-<date>'] RAILS_ENV=production
```
#### Example output
```
Processing abcd.git
Processing /var/opt/gitlab/git-data/repository-import-1/a/b/c/blah.git
* Using namespace: a/b/c
* Created blah (a/b/c/blah)
* Skipping repo /var/opt/gitlab/git-data/repository-import-1/a/b/c/blah.wiki.git
Processing /var/opt/gitlab/git-data/repository-import-1/abcd.git
* Created abcd (abcd.git)
Processing group/xyz.git
* Created Group group (2)
Processing /var/opt/gitlab/git-data/repository-import-1/group/xyz.git
* Using namespace: group (2)
* Created xyz (group/xyz.git)
* Skipping repo /var/opt/gitlab/git-data/repository-import-1/@shared/a/b/abcd.git
[...]
```
module Gitlab
module BareRepositoryImport
class Importer
NoAdminError = Class.new(StandardError)
def self.execute(import_path)
import_path << '/' unless import_path.ends_with?('/')
repos_to_import = Dir.glob(import_path + '**/*.git')
unless user = User.admins.order_id_asc.first
raise NoAdminError.new('No admin user found to import repositories')
end
repos_to_import.each do |repo_path|
bare_repo = Gitlab::BareRepositoryImport::Repository.new(import_path, repo_path)
if bare_repo.hashed? || bare_repo.wiki?
log " * Skipping repo #{bare_repo.repo_path}".color(:yellow)
next
end
log "Processing #{repo_path}".color(:yellow)
new(user, bare_repo).create_project_if_needed
end
end
attr_reader :user, :project_name, :bare_repo
delegate :log, to: :class
delegate :project_name, :project_full_path, :group_path, :repo_path, :wiki_path, to: :bare_repo
def initialize(user, bare_repo)
@user = user
@bare_repo = bare_repo
end
def create_project_if_needed
if project = Project.find_by_full_path(project_full_path)
log " * #{project.name} (#{project_full_path}) exists"
return project
end
create_project
end
private
def create_project
group = find_or_create_groups
project = Projects::CreateService.new(user,
name: project_name,
path: project_name,
skip_disk_validation: true,
namespace_id: group&.id).execute
if project.persisted? && mv_repo(project)
log " * Created #{project.name} (#{project_full_path})".color(:green)
ProjectCacheWorker.perform_async(project.id)
else
log " * Failed trying to create #{project.name} (#{project_full_path})".color(:red)
log " Errors: #{project.errors.messages}".color(:red) if project.errors.any?
end
project
end
def mv_repo(project)
FileUtils.mv(repo_path, File.join(project.repository_storage_path, project.disk_path + '.git'))
if bare_repo.wiki_exists?
FileUtils.mv(wiki_path, File.join(project.repository_storage_path, project.disk_path + '.wiki.git'))
end
true
rescue => e
log " * Failed to move repo: #{e.message}".color(:red)
false
end
def find_or_create_groups
return nil unless group_path.present?
log " * Using namespace: #{group_path}"
Groups::NestedCreateService.new(user, group_path: group_path).execute
end
# This is called from within a rake task only used by Admins, so allow writing
# to STDOUT
def self.log(message)
puts message # rubocop:disable Rails/Output
end
end
end
end
module Gitlab
module BareRepositoryImport
class Repository
attr_reader :group_path, :project_name, :repo_path
def initialize(root_path, repo_path)
@root_path = root_path
@repo_path = repo_path
# Split path into 'all/the/namespaces' and 'project_name'
@group_path, _, @project_name = repo_relative_path.rpartition('/')
end
def wiki_exists?
File.exist?(wiki_path)
end
def wiki?
@wiki ||= repo_path.end_with?('.wiki.git')
end
def wiki_path
@wiki_path ||= repo_path.sub(/\.git$/, '.wiki.git')
end
def hashed?
@hashed ||= group_path.start_with?('@hashed')
end
def project_full_path
@project_full_path ||= "#{group_path}/#{project_name}"
end
private
def repo_relative_path
# Remove root path and `.git` at the end
repo_path[@root_path.size...-4]
end
end
end
end
module Gitlab
class BareRepositoryImporter
NoAdminError = Class.new(StandardError)
def self.execute
Gitlab.config.repositories.storages.each do |storage_name, repository_storage|
git_base_path = repository_storage['path']
repos_to_import = Dir.glob(git_base_path + '/**/*.git')
repos_to_import.each do |repo_path|
if repo_path.end_with?('.wiki.git')
log " * Skipping wiki repo"
next
end
log "Processing #{repo_path}".color(:yellow)
repo_relative_path = repo_path[repository_storage['path'].length..-1]
.sub(/^\//, '') # Remove leading `/`
.sub(/\.git$/, '') # Remove `.git` at the end
new(storage_name, repo_relative_path).create_project_if_needed
end
end
end
attr_reader :storage_name, :full_path, :group_path, :project_path, :user
delegate :log, to: :class
def initialize(storage_name, repo_path)
@storage_name = storage_name
@full_path = repo_path
unless @user = User.admins.order_id_asc.first
raise NoAdminError.new('No admin user found to import repositories')
end
@group_path, @project_path = File.split(repo_path)
@group_path = nil if @group_path == '.'
end
def create_project_if_needed
if project = Project.find_by_full_path(full_path)
log " * #{project.name} (#{full_path}) exists"
return project
end
create_project
end
private
def create_project
group = find_or_create_group
project_params = {
name: project_path,
path: project_path,
repository_storage: storage_name,
namespace_id: group&.id,
skip_disk_validation: true
}
project = Projects::CreateService.new(user, project_params).execute
if project.persisted?
log " * Created #{project.name} (#{full_path})".color(:green)
ProjectCacheWorker.perform_async(project.id)
else
log " * Failed trying to create #{project.name} (#{full_path})".color(:red)
log " Errors: #{project.errors.messages}".color(:red)
end
project
end
def find_or_create_group
return nil unless group_path
if namespace = Namespace.find_by_full_path(group_path)
log " * Namespace #{group_path} exists.".color(:green)
return namespace
end
log " * Creating Group: #{group_path}"
Groups::NestedCreateService.new(user, group_path: group_path).execute
end
# This is called from within a rake task only used by Admins, so allow writing
# to STDOUT
#
# rubocop:disable Rails/Output
def self.log(message)
puts message
end
# rubocop:enable Rails/Output
end
end
......@@ -2,23 +2,21 @@ namespace :gitlab do
namespace :import do
# How to use:
#
# 1. copy the bare repos under the repository storage paths (commonly the default path is /home/git/repositories)
# 2. run: bundle exec rake gitlab:import:repos RAILS_ENV=production
# 1. copy the bare repos to a specific path that contain the group or subgroups structure as folders
# 2. run: bundle exec rake gitlab:import:repos[/path/to/repos] RAILS_ENV=production
#
# Notes:
# * The project owner will set to the first administator of the system
# * Existing projects will be skipped
#
#
desc "GitLab | Import bare repositories from repositories -> storages into GitLab project instance"
task repos: :environment do
if Project.current_application_settings.hashed_storage_enabled
puts 'Cannot import repositories when Hashed Storage is enabled'.color(:red)
task :repos, [:import_path] => :environment do |_t, args|
unless args.import_path
puts 'Please specify an import path that contains the repositories'.color(:red)
exit 1
end
Gitlab::BareRepositoryImporter.execute
Gitlab::BareRepositoryImport::Importer.execute(args.import_path)
end
end
end
require 'spec_helper'
describe Gitlab::BareRepositoryImporter, repository: true do
subject(:importer) { described_class.new('default', project_path) }
describe Gitlab::BareRepositoryImport::Importer, repository: true do
let!(:admin) { create(:admin) }
let!(:base_dir) { Dir.mktmpdir + '/' }
let(:bare_repository) { Gitlab::BareRepositoryImport::Repository.new(base_dir, File.join(base_dir, "#{project_path}.git")) }
subject(:importer) { described_class.new(admin, bare_repository) }
before do
allow(described_class).to receive(:log)
end
after do
FileUtils.rm_rf(base_dir)
end
shared_examples 'importing a repository' do
describe '.execute' do
it 'creates a project for a repository in storage' do
FileUtils.mkdir_p(File.join(TestEnv.repos_path, "#{project_path}.git"))
FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git"))
fake_importer = double
expect(described_class).to receive(:new).with('default', project_path)
.and_return(fake_importer)
expect(described_class).to receive(:new).and_return(fake_importer)
expect(fake_importer).to receive(:create_project_if_needed)
described_class.execute
described_class.execute(base_dir)
end
it 'skips wiki repos' do
FileUtils.mkdir_p(File.join(TestEnv.repos_path, 'the-group', 'the-project.wiki.git'))
repo_dir = File.join(base_dir, 'the-group', 'the-project.wiki.git')
FileUtils.mkdir_p(File.join(repo_dir))
expect(described_class).to receive(:log).with(' * Skipping wiki repo')
expect(described_class).to receive(:log).with(" * Skipping repo #{repo_dir}")
expect(described_class).not_to receive(:new)
described_class.execute
end
described_class.execute(base_dir)
end
describe '#initialize' do
context 'without admin users' do
let(:admin) { nil }
it 'raises an error' do
expect { importer }.to raise_error(Gitlab::BareRepositoryImporter::NoAdminError)
expect { described_class.execute(base_dir) }.to raise_error(Gitlab::BareRepositoryImport::Importer::NoAdminError)
end
end
end
......@@ -63,6 +67,26 @@ describe Gitlab::BareRepositoryImporter, repository: true do
expect(Project.find_by_full_path(project_path)).not_to be_nil
end
it 'creates the Git repo in disk' do
FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git"))
importer.create_project_if_needed
project = Project.find_by_full_path(project_path)
expect(File).to exist(File.join(project.repository_storage_path, project.disk_path + '.git'))
end
context 'hashed storage enabled' do
it 'creates a project with the correct path in the database' do
stub_application_setting(hashed_storage_enabled: true)
importer.create_project_if_needed
expect(Project.find_by_full_path(project_path)).not_to be_nil
end
end
end
end
......@@ -84,6 +108,50 @@ describe Gitlab::BareRepositoryImporter, repository: true do
it_behaves_like 'importing a repository'
end
context 'without groups' do
let(:project_path) { 'a-project' }
it 'starts an import for a project that did not exist' do
expect(importer).to receive(:create_project)
importer.create_project_if_needed
end
it 'creates a project with the correct path in the database' do
importer.create_project_if_needed
expect(Project.find_by_full_path("#{admin.full_path}/#{project_path}")).not_to be_nil
end
it 'creates the Git repo in disk' do
FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git"))
importer.create_project_if_needed
project = Project.find_by_full_path("#{admin.full_path}/#{project_path}")
expect(File).to exist(File.join(project.repository_storage_path, project.disk_path + '.git'))
end
end
context 'with Wiki' do
let(:project_path) { 'a-group/a-project' }
let(:existing_group) { create(:group, path: 'a-group') }
it_behaves_like 'importing a repository'
it 'creates the Wiki git repo in disk' do
FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.git"))
FileUtils.mkdir_p(File.join(base_dir, "#{project_path}.wiki.git"))
importer.create_project_if_needed
project = Project.find_by_full_path(project_path)
expect(File).to exist(File.join(project.repository_storage_path, project.disk_path + '.wiki.git'))
end
end
context 'when subgroups are not available' do
let(:project_path) { 'a-group/a-sub-group/a-project' }
......
require 'spec_helper'
describe ::Gitlab::BareRepositoryImport::Repository do
let(:project_repo_path) { described_class.new('/full/path/', '/full/path/to/repo.git') }
it 'stores the repo path' do
expect(project_repo_path.repo_path).to eq('/full/path/to/repo.git')
end
it 'stores the group path' do
expect(project_repo_path.group_path).to eq('to')
end
it 'stores the project name' do
expect(project_repo_path.project_name).to eq('repo')
end
it 'stores the wiki path' do
expect(project_repo_path.wiki_path).to eq('/full/path/to/repo.wiki.git')
end
describe '#wiki?' do
it 'returns true if it is a wiki' do
wiki_path = described_class.new('/full/path/', '/full/path/to/a/b/my.wiki.git')
expect(wiki_path.wiki?).to eq(true)
end
it 'returns false if it is not a wiki' do
expect(project_repo_path.wiki?).to eq(false)
end
end
describe '#hashed?' do
it 'returns true if it is a hashed folder' do
path = described_class.new('/full/path/', '/full/path/@hashed/my.repo.git')
expect(path.hashed?).to eq(true)
end
it 'returns false if it is not a hashed folder' do
expect(project_repo_path.hashed?).to eq(false)
end
end
describe '#project_full_path' do
it 'returns the project full path' do
expect(project_repo_path.repo_path).to eq('/full/path/to/repo.git')
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment