Commit 582c1225 authored by Nick Thomas's avatar Nick Thomas

Merge branch 'bulk_project_move_api' into 'master'

Adds bulk project repository storage move API

See merge request gitlab-org/gitlab!47142
parents 4186e412 4322a686
...@@ -8,6 +8,7 @@ module Shardable ...@@ -8,6 +8,7 @@ module Shardable
scope :for_repository_storage, -> (repository_storage) { joins(:shard).where(shards: { name: repository_storage }) } scope :for_repository_storage, -> (repository_storage) { joins(:shard).where(shards: { name: repository_storage }) }
scope :excluding_repository_storage, -> (repository_storage) { joins(:shard).where.not(shards: { name: repository_storage }) } scope :excluding_repository_storage, -> (repository_storage) { joins(:shard).where.not(shards: { name: repository_storage }) }
scope :for_shard, -> (shard) { where(shard_id: shard) }
validates :shard, presence: true validates :shard, presence: true
end end
......
...@@ -2099,10 +2099,10 @@ class Project < ApplicationRecord ...@@ -2099,10 +2099,10 @@ class Project < ApplicationRecord
# already in that state. # already in that state.
# #
# @return nil. Failures will raise an exception # @return nil. Failures will raise an exception
def set_repository_read_only! def set_repository_read_only!(skip_git_transfer_check: false)
with_lock do with_lock do
raise RepositoryReadOnlyError, _('Git transfer in progress') if raise RepositoryReadOnlyError, _('Git transfer in progress') if
git_transfer_in_progress? !skip_git_transfer_check && git_transfer_in_progress?
raise RepositoryReadOnlyError, _('Repository already read-only') if raise RepositoryReadOnlyError, _('Repository already read-only') if
self.class.where(id: id).pick(:repository_read_only) self.class.where(id: id).pick(:repository_read_only)
......
# frozen_string_literal: true # frozen_string_literal: true
class ProjectRepository < ApplicationRecord class ProjectRepository < ApplicationRecord
include EachBatch
include Shardable include Shardable
belongs_to :project, inverse_of: :project_repository belongs_to :project, inverse_of: :project_repository
......
...@@ -50,7 +50,7 @@ class ProjectRepositoryStorageMove < ApplicationRecord ...@@ -50,7 +50,7 @@ class ProjectRepositoryStorageMove < ApplicationRecord
block.call block.call
begin begin
storage_move.project.set_repository_read_only! storage_move.project.set_repository_read_only!(skip_git_transfer_check: true)
rescue => err rescue => err
errors.add(:project, err.message) errors.add(:project, err.message)
next false next false
......
# frozen_string_literal: true
module Projects
# Tries to schedule a move for every project with repositories on the source shard
class ScheduleBulkRepositoryShardMovesService
include BaseServiceUtility
def execute(source_storage_name, destination_storage_name = nil)
shard = Shard.find_by_name!(source_storage_name)
ProjectRepository.for_shard(shard).each_batch(column: :project_id) do |relation|
Project.id_in(relation.select(:project_id)).each do |project|
project.with_lock do
next if project.repository_storage != source_storage_name
storage_move = project.repository_storage_moves.build(
source_storage_name: source_storage_name,
destination_storage_name: destination_storage_name
)
unless storage_move.schedule
log_info("Project #{project.full_path} (#{project.id}) was skipped: #{storage_move.errors.full_messages.to_sentence}")
end
end
end
end
success
end
def self.enqueue(source_storage_name, destination_storage_name = nil)
::ProjectScheduleBulkRepositoryShardMovesWorker.perform_async(source_storage_name, destination_storage_name)
end
end
end
...@@ -1847,6 +1847,14 @@ ...@@ -1847,6 +1847,14 @@
:weight: 1 :weight: 1
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: project_schedule_bulk_repository_shard_moves
:feature_category: :gitaly
:has_external_dependencies:
:urgency: :throttled
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: project_service - :name: project_service
:feature_category: :integrations :feature_category: :integrations
:has_external_dependencies: true :has_external_dependencies: true
......
# frozen_string_literal: true
class ProjectScheduleBulkRepositoryShardMovesWorker
include ApplicationWorker
idempotent!
feature_category :gitaly
urgency :throttled
def perform(source_storage_name, destination_storage_name = nil)
Projects::ScheduleBulkRepositoryShardMovesService.new.execute(source_storage_name, destination_storage_name)
end
end
---
title: Adds bulk project repository storage move API
merge_request: 47142
author:
type: added
...@@ -250,6 +250,8 @@ ...@@ -250,6 +250,8 @@
- 1 - 1
- - project_import_schedule - - project_import_schedule
- 1 - 1
- - project_schedule_bulk_repository_shard_moves
- 1
- - project_service - - project_service
- 1 - 1
- - project_template_export - - project_template_export
......
# frozen_string_literal: true
class AddIndexToProjectRepositoriesShardIdProjectId < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :project_repositories, [:shard_id, :project_id]
end
def down
remove_concurrent_index :project_repositories, [:shard_id, :project_id], name: 'index_project_repositories_on_shard_id_and_project_id'
end
end
7988d01be5fac0f2a28cc97e309bfa16450d2e376888401fc2ad521aa0082020
\ No newline at end of file
...@@ -21771,6 +21771,8 @@ CREATE UNIQUE INDEX index_project_repositories_on_project_id ON project_reposito ...@@ -21771,6 +21771,8 @@ CREATE UNIQUE INDEX index_project_repositories_on_project_id ON project_reposito
CREATE INDEX index_project_repositories_on_shard_id ON project_repositories USING btree (shard_id); CREATE INDEX index_project_repositories_on_shard_id ON project_repositories USING btree (shard_id);
CREATE INDEX index_project_repositories_on_shard_id_and_project_id ON project_repositories USING btree (shard_id, project_id);
CREATE UNIQUE INDEX index_project_repository_states_on_project_id ON project_repository_states USING btree (project_id); CREATE UNIQUE INDEX index_project_repository_states_on_project_id ON project_repository_states USING btree (project_id);
CREATE INDEX index_project_repository_storage_moves_on_project_id ON project_repository_storage_moves USING btree (project_id); CREATE INDEX index_project_repository_storage_moves_on_project_id ON project_repository_storage_moves USING btree (project_id);
......
...@@ -1255,23 +1255,27 @@ Gitaly Cluster automatically. ...@@ -1255,23 +1255,27 @@ Gitaly Cluster automatically.
Repositories may be moved from one storage location using the [Project repository storage moves API](../../api/project_repository_storage_moves.md): Repositories may be moved from one storage location using the [Project repository storage moves API](../../api/project_repository_storage_moves.md):
NOTE: **Note:**
The Project repository storage moves API [cannot move all repository types](../../api/project_repository_storage_moves.md#limitations).
To move repositories to Gitaly Cluster: To move repositories to Gitaly Cluster:
1. [Schedule a move](../../api/project_repository_storage_moves.md#schedule-a-repository-storage-move-for-a-project) 1. [Schedule repository storage moves for all projects on a storage shard](../../api/project_repository_storage_moves.md#schedule-repository-storage-moves-for-all-projects-on-a-storage-shard) using the API. For example:
for the first repository using the API. For example:
```shell ```shell
curl --request POST --header "Private-Token: <your_access_token>" --header "Content-Type: application/json" \ curl --request POST --header "Private-Token: <your_access_token>" --header "Content-Type: application/json" \
--data '{"destination_storage_name":"praefect"}' "https://gitlab.example.com/api/v4/projects/123/repository_storage_moves" --data '{"source_storage_name":"gitaly","destination_storage_name":"praefect"}' "https://gitlab.example.com/api/v4/project_repository_storage_moves"
``` ```
1. Using the ID that is returned, [query the repository move](../../api/project_repository_storage_moves.md#get-a-single-repository-storage-move-for-a-project) 1. [Query the most recent repository moves](../../api/project_repository_storage_moves.md#retrieve-all-project-repository-storage-moves)
using the API. The query indicates either: using the API. The query indicates either:
- The move has completed successfully. The `state` field is `finished`. - The moves have completed successfully. The `state` field is `finished`.
- The move is in progress. Re-query the repository move until it completes successfully. - The moves are in progress. Re-query the repository move until it completes successfully.
- The move has failed. Most failures are temporary and are solved by rescheduling the move. - The moves have failed. Most failures are temporary and are solved by rescheduling the move.
1. Once the move is successful, repeat these steps for all repositories for your projects. 1. Once the moves are complete, [query projects](../../api/projects.md#list-all-projects)
using the API to confirm that all projects have moved. No projects should be returned
with `repository_storage` field set to the old storage.
## Debugging Praefect ## Debugging Praefect
......
...@@ -239,3 +239,35 @@ Example response: ...@@ -239,3 +239,35 @@ Example response:
"created_at": "2020-05-07T04:27:17.016Z" "created_at": "2020-05-07T04:27:17.016Z"
} }
``` ```
## Schedule repository storage moves for all projects on a storage shard
> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/47142) in GitLab 13.7.
Schedules repository storage moves for each project repository stored on the source storage shard.
```plaintext
POST /project_repository_storage_moves
```
Parameters:
| Attribute | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `source_storage_name` | string | yes | Name of the source storage shard. |
| `destination_storage_name` | string | no | Name of the destination storage shard. The storage is selected automatically if not provided. |
Example request:
```shell
curl --request POST --header "PRIVATE-TOKEN: <your_access_token>" --header "Content-Type: application/json" \
--data '{"source_storage_name":"default"}' "https://gitlab.example.com/api/v4/project_repository_storage_moves"
```
Example response:
```json
{
"message": "202 Accepted"
}
```
...@@ -34,6 +34,22 @@ module API ...@@ -34,6 +34,22 @@ module API
present storage_move, with: Entities::ProjectRepositoryStorageMove, current_user: current_user present storage_move, with: Entities::ProjectRepositoryStorageMove, current_user: current_user
end end
desc 'Schedule bulk project repository storage moves' do
detail 'This feature was introduced in GitLab 13.7.'
end
params do
requires :source_storage_name, type: String, desc: 'The source storage shard', values: -> { Gitlab.config.repositories.storages.keys }
optional :destination_storage_name, type: String, desc: 'The destination storage shard', values: -> { Gitlab.config.repositories.storages.keys }
end
post do
::Projects::ScheduleBulkRepositoryShardMovesService.enqueue(
declared_params[:source_storage_name],
declared_params[:destination_storage_name]
)
accepted!
end
end end
params do params do
......
...@@ -3024,6 +3024,17 @@ RSpec.describe Project, factory_default: :keep do ...@@ -3024,6 +3024,17 @@ RSpec.describe Project, factory_default: :keep do
expect { project.set_repository_read_only! }.to raise_error(described_class::RepositoryReadOnlyError, /in progress/) expect { project.set_repository_read_only! }.to raise_error(described_class::RepositoryReadOnlyError, /in progress/)
end end
context 'skip_git_transfer_check is true' do
it 'makes the project read-only when git transfers are in progress' do
allow(project).to receive(:git_transfer_in_progress?) { true }
expect { project.set_repository_read_only!(skip_git_transfer_check: true) }
.to change(project, :repository_read_only?)
.from(false)
.to(true)
end
end
end end
describe '#set_repository_writable!' do describe '#set_repository_writable!' do
......
...@@ -6,7 +6,7 @@ RSpec.describe API::ProjectRepositoryStorageMoves do ...@@ -6,7 +6,7 @@ RSpec.describe API::ProjectRepositoryStorageMoves do
include AccessMatchersForRequest include AccessMatchersForRequest
let_it_be(:user) { create(:admin) } let_it_be(:user) { create(:admin) }
let_it_be(:project) { create(:project) } let_it_be(:project) { create(:project, :repository).tap { |project| project.track_project_repository } }
let_it_be(:storage_move) { create(:project_repository_storage_move, :scheduled, project: project) } let_it_be(:storage_move) { create(:project_repository_storage_move, :scheduled, project: project) }
shared_examples 'get single project repository storage move' do shared_examples 'get single project repository storage move' do
...@@ -159,4 +159,64 @@ RSpec.describe API::ProjectRepositoryStorageMoves do ...@@ -159,4 +159,64 @@ RSpec.describe API::ProjectRepositoryStorageMoves do
end end
end end
end end
describe 'POST /project_repository_storage_moves' do
let(:source_storage_name) { 'default' }
let(:destination_storage_name) { 'test_second_storage' }
def create_project_repository_storage_moves
post api('/project_repository_storage_moves', user), params: {
source_storage_name: source_storage_name,
destination_storage_name: destination_storage_name
}
end
before do
stub_storage_settings('test_second_storage' => { 'path' => 'tmp/tests/extra_storage' })
end
it 'schedules the worker' do
expect(ProjectScheduleBulkRepositoryShardMovesWorker).to receive(:perform_async).with(source_storage_name, destination_storage_name)
create_project_repository_storage_moves
expect(response).to have_gitlab_http_status(:accepted)
end
context 'source_storage_name is invalid' do
let(:destination_storage_name) { 'not-a-real-storage' }
it 'gives an error' do
create_project_repository_storage_moves
expect(response).to have_gitlab_http_status(:bad_request)
end
end
context 'destination_storage_name is missing' do
let(:destination_storage_name) { nil }
it 'schedules the worker' do
expect(ProjectScheduleBulkRepositoryShardMovesWorker).to receive(:perform_async).with(source_storage_name, destination_storage_name)
create_project_repository_storage_moves
expect(response).to have_gitlab_http_status(:accepted)
end
end
context 'destination_storage_name is invalid' do
let(:destination_storage_name) { 'not-a-real-storage' }
it 'gives an error' do
create_project_repository_storage_moves
expect(response).to have_gitlab_http_status(:bad_request)
end
end
describe 'normal user' do
it { expect { create_project_repository_storage_moves }.to be_denied_for(:user) }
end
end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Projects::ScheduleBulkRepositoryShardMovesService do
before do
stub_storage_settings('test_second_storage' => { 'path' => 'tmp/tests/extra_storage' })
end
let!(:project) { create(:project, :repository).tap { |project| project.track_project_repository } }
let(:source_storage_name) { 'default' }
let(:destination_storage_name) { 'test_second_storage' }
describe '#execute' do
it 'schedules project repository storage moves' do
expect { subject.execute(source_storage_name, destination_storage_name) }
.to change(ProjectRepositoryStorageMove, :count).by(1)
storage_move = project.repository_storage_moves.last!
expect(storage_move).to have_attributes(
source_storage_name: source_storage_name,
destination_storage_name: destination_storage_name,
state_name: :scheduled
)
end
context 'read-only repository' do
let!(:project) { create(:project, :repository, :read_only).tap { |project| project.track_project_repository } }
it 'does not get scheduled' do
expect(subject).to receive(:log_info)
.with("Project #{project.full_path} (#{project.id}) was skipped: Project is read only")
expect { subject.execute(source_storage_name, destination_storage_name) }
.to change(ProjectRepositoryStorageMove, :count).by(0)
end
end
end
describe '.enqueue' do
it 'defers to the worker' do
expect(::ProjectScheduleBulkRepositoryShardMovesWorker).to receive(:perform_async).with(source_storage_name, destination_storage_name)
described_class.enqueue(source_storage_name, destination_storage_name)
end
end
end
...@@ -18,4 +18,10 @@ RSpec.shared_examples 'shardable scopes' do ...@@ -18,4 +18,10 @@ RSpec.shared_examples 'shardable scopes' do
expect(described_class.excluding_repository_storage('default')).to eq([record_2]) expect(described_class.excluding_repository_storage('default')).to eq([record_2])
end end
end end
describe '.for_shard' do
it 'returns the objects for a given shard' do
expect(described_class.for_shard(record_1.shard)).to eq([record_1])
end
end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ProjectScheduleBulkRepositoryShardMovesWorker do
describe "#perform" do
before do
stub_storage_settings('test_second_storage' => { 'path' => 'tmp/tests/extra_storage' })
allow(ProjectUpdateRepositoryStorageWorker).to receive(:perform_async)
end
let!(:project) { create(:project, :repository).tap { |project| project.track_project_repository } }
let(:source_storage_name) { 'default' }
let(:destination_storage_name) { 'test_second_storage' }
include_examples 'an idempotent worker' do
let(:job_args) { [source_storage_name, destination_storage_name] }
it 'schedules project repository storage moves' do
expect { subject }.to change(ProjectRepositoryStorageMove, :count).by(1)
storage_move = project.repository_storage_moves.last!
expect(storage_move).to have_attributes(
source_storage_name: source_storage_name,
destination_storage_name: destination_storage_name,
state_name: :scheduled
)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment