Commit 385e8f71 authored by Stan Hu's avatar Stan Hu

Merge branch 'tc-backfill-full-path-config' into 'master'

Migration to write fullpath in all repository configs

Closes #41776

See merge request gitlab-org/gitlab-ce!22322
parents 979bd916 f35ff1ea
---
title: Migration to write fullpath in all repository configs
merge_request: 22322
author:
type: other
# frozen_string_literal: true
class BackfillStoreProjectFullPathInRepo < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 1_000
DELAY_INTERVAL = 5.minutes
UP_MIGRATION = 'BackfillProjectFullpathInRepoConfig::Up'
DOWN_MIGRATION = 'BackfillProjectFullpathInRepoConfig::Down'
disable_ddl_transaction!
class Project < ActiveRecord::Base
self.table_name = 'projects'
include EachBatch
end
def up
queue_background_migration_jobs_by_range_at_intervals(Project, UP_MIGRATION, DELAY_INTERVAL)
end
def down
queue_background_migration_jobs_by_range_at_intervals(Project, DOWN_MIGRATION, DELAY_INTERVAL)
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This module is used to write the full path of all projects to
# the git repository config file.
# Storing the full project path in the git config allows admins to
# easily identify a project when it is using hashed storage.
module BackfillProjectFullpathInRepoConfig
OrphanedNamespaceError = Class.new(StandardError)
module Storage
# Class that returns the disk path for a project using hashed storage
class HashedProject
attr_accessor :project
ROOT_PATH_PREFIX = '@hashed'
def initialize(project)
@project = project
end
def disk_path
"#{ROOT_PATH_PREFIX}/#{disk_hash[0..1]}/#{disk_hash[2..3]}/#{disk_hash}"
end
def disk_hash
@disk_hash ||= Digest::SHA2.hexdigest(project.id.to_s) if project.id
end
end
# Class that returns the disk path for a project using legacy storage
class LegacyProject
attr_accessor :project
def initialize(project)
@project = project
end
def disk_path
project.full_path
end
end
end
# Concern used by Project and Namespace to determine the full
# route to the project
module Routable
extend ActiveSupport::Concern
def full_path
@full_path ||= build_full_path
end
def build_full_path
return path unless has_parent?
raise OrphanedNamespaceError if parent.nil?
parent.full_path + '/' + path
end
def has_parent?
read_attribute(association(:parent).reflection.foreign_key)
end
end
# Class used to interact with repository using Gitaly
class Repository
attr_reader :storage
def initialize(storage, relative_path)
@storage = storage
@relative_path = relative_path
end
def gitaly_repository
Gitaly::Repository.new(storage_name: @storage, relative_path: @relative_path)
end
end
# Namespace can be a user or group. It can be the root or a
# child of another namespace.
class Namespace < ActiveRecord::Base
self.table_name = 'namespaces'
self.inheritance_column = nil
include Routable
belongs_to :parent, class_name: 'Namespace', inverse_of: 'namespaces'
has_many :projects, inverse_of: :parent
has_many :namespaces, inverse_of: :parent
end
# Project is where the repository (etc.) is stored
class Project < ActiveRecord::Base
self.table_name = 'projects'
include Routable
include EachBatch
FULLPATH_CONFIG_KEY = 'gitlab.fullpath'
belongs_to :parent, class_name: 'Namespace', foreign_key: :namespace_id, inverse_of: 'projects'
delegate :disk_path, to: :storage
def add_fullpath_config
entries = { FULLPATH_CONFIG_KEY => full_path }
repository_service.set_config(entries)
end
def remove_fullpath_config
repository_service.delete_config([FULLPATH_CONFIG_KEY])
end
def cleanup_repository
repository_service.cleanup
end
def storage
@storage ||=
if hashed_storage?
Storage::HashedProject.new(self)
else
Storage::LegacyProject.new(self)
end
end
def hashed_storage?
self.storage_version && self.storage_version >= 1
end
def repository
@repository ||= Repository.new(repository_storage, disk_path + '.git')
end
def repository_service
@repository_service ||= Gitlab::GitalyClient::RepositoryService.new(repository)
end
end
# Base class for Up and Down migration classes
class BackfillFullpathMigration
RETRY_DELAY = 15.minutes
MAX_RETRIES = 2
# Base class for retrying one project
class BaseRetryOne
def perform(project_id, retry_count)
project = Project.find(project_id)
return unless project
migration_class.new.safe_perform_one(project, retry_count)
end
end
def perform(start_id, end_id)
Project.includes(:parent).where(id: start_id..end_id).each do |project|
safe_perform_one(project)
end
end
def safe_perform_one(project, retry_count = 0)
perform_one(project)
rescue GRPC::NotFound, GRPC::InvalidArgument, OrphanedNamespaceError
nil
rescue GRPC::BadStatus
schedule_retry(project, retry_count + 1) if retry_count < MAX_RETRIES
end
def schedule_retry(project, retry_count)
BackgroundMigrationWorker.perform_in(RETRY_DELAY, self.class::RetryOne.name, [project.id, retry_count])
end
end
# Class to add the fullpath to the git repo config
class Up < BackfillFullpathMigration
# Class used to retry
class RetryOne < BaseRetryOne
def migration_class
Up
end
end
def perform_one(project)
project.cleanup_repository
project.add_fullpath_config
end
end
# Class to rollback adding the fullpath to the git repo config
class Down < BackfillFullpathMigration
# Class used to retry
class RetryOne < BaseRetryOne
def migration_class
Down
end
end
def perform_one(project)
project.cleanup_repository
project.remove_fullpath_config
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::BackfillProjectFullpathInRepoConfig, :migration, schema: 20181010133639 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:subgroup) { namespaces.create!(name: 'bar', path: 'bar', parent_id: group.id) }
describe described_class::Storage::HashedProject do
let(:project) { double(id: 555) }
subject(:project_storage) { described_class.new(project) }
it 'has the correct disk_path' do
expect(project_storage.disk_path).to eq('@hashed/91/a7/91a73fd806ab2c005c13b4dc19130a884e909dea3f72d46e30266fe1a1f588d8')
end
end
describe described_class::Storage::LegacyProject do
let(:project) { double(full_path: 'this/is/the/full/path') }
subject(:project_storage) { described_class.new(project) }
it 'has the correct disk_path' do
expect(project_storage.disk_path).to eq('this/is/the/full/path')
end
end
describe described_class::Project do
let(:project_record) { projects.create!(namespace_id: subgroup.id, name: 'baz', path: 'baz') }
subject(:project) { described_class.find(project_record.id) }
describe '#full_path' do
it 'returns path containing all parent namespaces' do
expect(project.full_path).to eq('foo/bar/baz')
end
it 'raises OrphanedNamespaceError when any parent namespace does not exist' do
subgroup.update_attribute(:parent_id, namespaces.maximum(:id).succ)
expect { project.full_path }.to raise_error(Gitlab::BackgroundMigration::BackfillProjectFullpathInRepoConfig::OrphanedNamespaceError)
end
end
end
describe described_class::Up do
describe '#perform' do
subject(:migrate) { described_class.new.perform(projects.minimum(:id), projects.maximum(:id)) }
it 'asks the gitaly client to set config' do
projects.create!(namespace_id: subgroup.id, name: 'baz', path: 'baz')
projects.create!(namespace_id: subgroup.id, name: 'buzz', path: 'buzz', storage_version: 1)
expect_next_instance_of(Gitlab::GitalyClient::RepositoryService) do |repository_service|
allow(repository_service).to receive(:cleanup)
expect(repository_service).to receive(:set_config).with('gitlab.fullpath' => 'foo/bar/baz')
end
expect_next_instance_of(Gitlab::GitalyClient::RepositoryService) do |repository_service|
allow(repository_service).to receive(:cleanup)
expect(repository_service).to receive(:set_config).with('gitlab.fullpath' => 'foo/bar/buzz')
end
migrate
end
end
end
describe described_class::Down do
describe '#perform' do
subject(:migrate) { described_class.new.perform(projects.minimum(:id), projects.maximum(:id)) }
it 'asks the gitaly client to set config' do
projects.create!(namespace_id: subgroup.id, name: 'baz', path: 'baz')
expect_next_instance_of(Gitlab::GitalyClient::RepositoryService) do |repository_service|
allow(repository_service).to receive(:cleanup)
expect(repository_service).to receive(:delete_config).with(['gitlab.fullpath'])
end
migrate
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20181010133639_backfill_store_project_full_path_in_repo.rb')
describe BackfillStoreProjectFullPathInRepo, :migration do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:group) { namespaces.create!(name: 'foo', path: 'foo') }
let(:subgroup) { namespaces.create!(name: 'bar', path: 'bar', parent_id: group.id) }
subject(:migration) { described_class.new }
around do |example|
Sidekiq::Testing.inline! do
example.run
end
end
describe '#up' do
shared_examples_for 'writes the full path to git config' do
it 'writes the git config' do
expect_next_instance_of(Gitlab::GitalyClient::RepositoryService) do |repository_service|
allow(repository_service).to receive(:cleanup)
expect(repository_service).to receive(:set_config).with('gitlab.fullpath' => expected_path)
end
migration.up
end
it 'retries in case of failure' do
repository_service = spy(:repository_service)
allow(Gitlab::GitalyClient::RepositoryService).to receive(:new).and_return(repository_service)
allow(repository_service).to receive(:set_config).and_raise(GRPC::BadStatus, 'Retry me')
expect(repository_service).to receive(:set_config).exactly(3).times
migration.up
end
it 'cleans up repository before writing the config' do
expect_next_instance_of(Gitlab::GitalyClient::RepositoryService) do |repository_service|
expect(repository_service).to receive(:cleanup).ordered
expect(repository_service).to receive(:set_config).ordered
end
migration.up
end
context 'legacy storage' do
it 'finds the repository at the correct location' do
Project.find(project.id).create_repository
expect { migration.up }.not_to raise_error
end
end
context 'hashed storage' do
it 'finds the repository at the correct location' do
project.update_attribute(:storage_version, 1)
Project.find(project.id).create_repository
expect { migration.up }.not_to raise_error
end
end
end
context 'project in group' do
let!(:project) { projects.create!(namespace_id: group.id, name: 'baz', path: 'baz') }
let(:expected_path) { 'foo/baz' }
it_behaves_like 'writes the full path to git config'
end
context 'project in subgroup' do
let!(:project) { projects.create!(namespace_id: subgroup.id, name: 'baz', path: 'baz') }
let(:expected_path) { 'foo/bar/baz' }
it_behaves_like 'writes the full path to git config'
end
end
describe '#down' do
context 'project in group' do
let!(:project) { projects.create!(namespace_id: group.id, name: 'baz', path: 'baz') }
it 'deletes the gitlab full config value' do
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService)
.to receive(:delete_config).with(['gitlab.fullpath'])
migration.down
end
end
end
end
...@@ -18,33 +18,33 @@ describe MigrateIssuesToGhostUser, :migration do ...@@ -18,33 +18,33 @@ describe MigrateIssuesToGhostUser, :migration do
let!(:ghost) { users.create(ghost: true, email: 'ghost@example.com') } let!(:ghost) { users.create(ghost: true, email: 'ghost@example.com') }
it 'does not create a new user' do it 'does not create a new user' do
expect { schema_migrate_up! }.not_to change { User.count } expect { migrate! }.not_to change { User.count }
end end
it 'migrates issues where author = nil to the ghost user' do it 'migrates issues where author = nil to the ghost user' do
schema_migrate_up! migrate!
expect(issues.first.reload.author_id).to eq(ghost.id) expect(issues.first.reload.author_id).to eq(ghost.id)
end end
it 'does not change issues authored by an existing user' do it 'does not change issues authored by an existing user' do
expect { schema_migrate_up! }.not_to change { issues.second.reload.author_id} expect { migrate! }.not_to change { issues.second.reload.author_id}
end end
end end
context 'when ghost user does not exist' do context 'when ghost user does not exist' do
it 'creates a new user' do it 'creates a new user' do
expect { schema_migrate_up! }.to change { User.count }.by(1) expect { migrate! }.to change { User.count }.by(1)
end end
it 'migrates issues where author = nil to the ghost user' do it 'migrates issues where author = nil to the ghost user' do
schema_migrate_up! migrate!
expect(issues.first.reload.author_id).to eq(User.ghost.id) expect(issues.first.reload.author_id).to eq(User.ghost.id)
end end
it 'does not change issues authored by an existing user' do it 'does not change issues authored by an existing user' do
expect { schema_migrate_up! }.not_to change { issues.second.reload.author_id} expect { migrate! }.not_to change { issues.second.reload.author_id}
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment