Commit 0db58ea8 authored by Nick Thomas's avatar Nick Thomas

Merge branch '37256-bump-wh-version' into 'master'

Enable workhorse upload acceleration for Project Import uploads via API

See merge request gitlab-org/gitlab!26914
parents 26e13666 1a1c954a
---
title: Enable Workhorse upload acceleration for Project Import uploads via API
merge_request: 26914
author:
type: performance
...@@ -4,41 +4,40 @@ require 'spec_helper' ...@@ -4,41 +4,40 @@ require 'spec_helper'
describe API::ProjectImport do describe API::ProjectImport do
include ExternalAuthorizationServiceHelpers include ExternalAuthorizationServiceHelpers
include WorkhorseHelpers
let(:export_path) { "#{Dir.tmpdir}/project_export_spec" }
let(:user) { create(:user) } let(:user) { create(:user) }
let(:file) { File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') }
let(:namespace) { create(:group) } let(:namespace) { create(:group) }
let(:file) { File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') }
let(:file_name) { 'project_export.tar.gz' }
let(:workhorse_token) { JWT.encode({ 'iss' => 'gitlab-workhorse' }, Gitlab::Workhorse.secret, 'HS256') }
let(:workhorse_headers) { { 'GitLab-Workhorse' => '1.0', Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER => workhorse_token } }
let(:file_upload) { fixture_file_upload(file) }
before do before do
allow_next_instance_of(Gitlab::ImportExport) do |instance| enable_external_authorization_service_check
allow(instance).to receive(:storage_path).and_return(export_path) stub_licensed_features(external_authorization_service_api_management: true)
end
namespace.add_owner(user) namespace.add_owner(user)
end end
after do
FileUtils.rm_rf(export_path, secure: true)
end
describe 'POST /projects/import' do describe 'POST /projects/import' do
let(:override_params) { { 'external_authorization_classification_label' => 'Hello world' } } let(:params) do
{
before do path: 'test-import',
enable_external_authorization_service_check namespace: namespace.id,
stub_licensed_features(external_authorization_service_api_management: true) override_params: override_params
}
end end
let(:override_params) { { 'external_authorization_classification_label' => 'Hello world' } }
subject do subject do
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
post api('/projects/import', user), upload_archive(file_upload, workhorse_headers, params)
params: {
path: 'test-import',
file: fixture_file_upload(file),
namespace: namespace.id,
override_params: override_params
}
end end
end end
...@@ -62,4 +61,15 @@ describe API::ProjectImport do ...@@ -62,4 +61,15 @@ describe API::ProjectImport do
end end
end end
end end
def upload_archive(file, headers = {}, params = {})
workhorse_finalize(
api("/projects/import", user),
method: :post,
file_key: :file,
params: params.merge(file: file_upload),
headers: headers,
send_rewritten_field: true
)
end
end end
...@@ -4,11 +4,12 @@ module API ...@@ -4,11 +4,12 @@ module API
module Helpers module Helpers
module FileUploadHelpers module FileUploadHelpers
def file_is_valid? def file_is_valid?
params[:file] && params[:file]['tempfile'].respond_to?(:read) filename = params[:file]&.original_filename
filename && ImportExportUploader::EXTENSION_WHITELIST.include?(File.extname(filename).delete('.'))
end end
def validate_file! def validate_file!
render_api_error!('Uploaded file is invalid', 400) unless file_is_valid? render_api_error!({ error: _('You need to upload a GitLab project export archive (ending in .gz).') }, 422) unless file_is_valid?
end end
end end
end end
......
...@@ -21,10 +21,6 @@ module API ...@@ -21,10 +21,6 @@ module API
def rate_limiter def rate_limiter
::Gitlab::ApplicationRateLimiter ::Gitlab::ApplicationRateLimiter
end end
def with_workhorse_upload_acceleration?
request.headers[Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER].present?
end
end end
before do before do
...@@ -46,11 +42,7 @@ module API ...@@ -46,11 +42,7 @@ module API
params do params do
requires :path, type: String, desc: 'The new project path and name' requires :path, type: String, desc: 'The new project path and name'
# TODO: remove rubocop disable - https://gitlab.com/gitlab-org/gitlab/issues/14960 requires :file, type: ::API::Validations::Types::WorkhorseFile, desc: 'The project export file to be imported'
# and mark WH fields as required (instead of optional) after the WH version including
# https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/459
# is deployed and GITLAB_WORKHORSE_VERSION is updated accordingly.
requires :file, types: [::API::Validations::Types::WorkhorseFile, File], desc: 'The project export file to be imported' # rubocop:disable Scalability/FileUploads
optional :name, type: String, desc: 'The name of the project to be imported. Defaults to the path of the project if not provided.' optional :name, type: String, desc: 'The name of the project to be imported. Defaults to the path of the project if not provided.'
optional :namespace, type: String, desc: "The ID or name of the namespace that the project will be imported into. Defaults to the current user's namespace." optional :namespace, type: String, desc: "The ID or name of the namespace that the project will be imported into. Defaults to the current user's namespace."
optional :overwrite, type: Boolean, default: false, desc: 'If there is a project in the same namespace and with the same name overwrite it' optional :overwrite, type: Boolean, default: false, desc: 'If there is a project in the same namespace and with the same name overwrite it'
...@@ -75,7 +67,7 @@ module API ...@@ -75,7 +67,7 @@ module API
success Entities::ProjectImportStatus success Entities::ProjectImportStatus
end end
post 'import' do post 'import' do
require_gitlab_workhorse! if with_workhorse_upload_acceleration? require_gitlab_workhorse!
key = "project_import".to_sym key = "project_import".to_sym
...@@ -87,27 +79,19 @@ module API ...@@ -87,27 +79,19 @@ module API
Gitlab::QueryLimiting.whitelist('https://gitlab.com/gitlab-org/gitlab-foss/issues/42437') Gitlab::QueryLimiting.whitelist('https://gitlab.com/gitlab-org/gitlab-foss/issues/42437')
validate_file!
namespace = if import_params[:namespace] namespace = if import_params[:namespace]
find_namespace!(import_params[:namespace]) find_namespace!(import_params[:namespace])
else else
current_user.namespace current_user.namespace
end end
# TODO: remove the condition after the WH version including
# https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/459
# is deployed and GITLAB_WORKHORSE_VERSION is updated accordingly.
file = if with_workhorse_upload_acceleration?
import_params[:file] || bad_request!('Unable to process project import file')
else
validate_file!
import_params[:file]['tempfile']
end
project_params = { project_params = {
path: import_params[:path], path: import_params[:path],
namespace_id: namespace.id, namespace_id: namespace.id,
name: import_params[:name], name: import_params[:name],
file: file, file: import_params[:file],
overwrite: import_params[:overwrite] overwrite: import_params[:overwrite]
} }
......
...@@ -5,7 +5,6 @@ require 'spec_helper' ...@@ -5,7 +5,6 @@ require 'spec_helper'
describe API::ProjectImport do describe API::ProjectImport do
include WorkhorseHelpers include WorkhorseHelpers
let(:export_path) { "#{Dir.tmpdir}/project_export_spec" }
let(:user) { create(:user) } let(:user) { create(:user) }
let(:file) { File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') } let(:file) { File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') }
let(:namespace) { create(:group) } let(:namespace) { create(:group) }
...@@ -14,29 +13,39 @@ describe API::ProjectImport do ...@@ -14,29 +13,39 @@ describe API::ProjectImport do
let(:workhorse_headers) { { 'GitLab-Workhorse' => '1.0', Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER => workhorse_token } } let(:workhorse_headers) { { 'GitLab-Workhorse' => '1.0', Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER => workhorse_token } }
before do before do
allow_any_instance_of(Gitlab::ImportExport).to receive(:storage_path).and_return(export_path)
stub_uploads_object_storage(FileUploader)
namespace.add_owner(user) namespace.add_owner(user)
end end
after do
FileUtils.rm_rf(export_path, secure: true)
end
describe 'POST /projects/import' do describe 'POST /projects/import' do
subject { upload_archive(file_upload, workhorse_headers, params) }
let(:file_upload) { fixture_file_upload(file) }
let(:params) do
{
path: 'test-import',
'file.size' => file_upload.size
}
end
before do
allow(ImportExportUploader).to receive(:workhorse_upload_path).and_return('/')
end
it 'schedules an import using a namespace' do it 'schedules an import using a namespace' do
stub_import(namespace) stub_import(namespace)
params[:namespace] = namespace.id
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.id } subject
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
it 'schedules an import using the namespace path' do it 'schedules an import using the namespace path' do
stub_import(namespace) stub_import(namespace)
params[:namespace] = namespace.full_path
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.full_path } subject
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
...@@ -46,24 +55,30 @@ describe API::ProjectImport do ...@@ -46,24 +55,30 @@ describe API::ProjectImport do
it 'schedules an import using a namespace and a different name' do it 'schedules an import using a namespace and a different name' do
stub_import(namespace) stub_import(namespace)
params[:name] = expected_name
params[:namespace] = namespace.id
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.id, name: expected_name } subject
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
it 'schedules an import using the namespace path and a different name' do it 'schedules an import using the namespace path and a different name' do
stub_import(namespace) stub_import(namespace)
params[:name] = expected_name
params[:namespace] = namespace.full_path
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.full_path, name: expected_name } subject
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
it 'sets name correctly' do it 'sets name correctly' do
stub_import(namespace) stub_import(namespace)
params[:name] = expected_name
params[:namespace] = namespace.full_path
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.full_path, name: expected_name } subject
project = Project.find(json_response['id']) project = Project.find(json_response['id'])
expect(project.name).to eq(expected_name) expect(project.name).to eq(expected_name)
...@@ -71,8 +86,11 @@ describe API::ProjectImport do ...@@ -71,8 +86,11 @@ describe API::ProjectImport do
it 'sets name correctly with an overwrite' do it 'sets name correctly with an overwrite' do
stub_import(namespace) stub_import(namespace)
params[:name] = 'new project name'
params[:namespace] = namespace.full_path
params[:overwrite] = true
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.full_path, name: 'new project name', overwrite: true } subject
project = Project.find(json_response['id']) project = Project.find(json_response['id'])
expect(project.name).to eq('new project name') expect(project.name).to eq('new project name')
...@@ -80,8 +98,10 @@ describe API::ProjectImport do ...@@ -80,8 +98,10 @@ describe API::ProjectImport do
it 'schedules an import using the path and name explicitly set to nil' do it 'schedules an import using the path and name explicitly set to nil' do
stub_import(namespace) stub_import(namespace)
params[:name] = nil
params[:namespace] = namespace.full_path
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.full_path, name: nil } subject
project = Project.find(json_response['id']) project = Project.find(json_response['id'])
expect(project.name).to eq('test-import') expect(project.name).to eq('test-import')
...@@ -90,8 +110,9 @@ describe API::ProjectImport do ...@@ -90,8 +110,9 @@ describe API::ProjectImport do
it 'schedules an import at the user namespace level' do it 'schedules an import at the user namespace level' do
stub_import(user.namespace) stub_import(user.namespace)
params[:path] = 'test-import2'
post api('/projects/import', user), params: { path: 'test-import2', file: fixture_file_upload(file) } subject
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
...@@ -100,7 +121,10 @@ describe API::ProjectImport do ...@@ -100,7 +121,10 @@ describe API::ProjectImport do
expect_any_instance_of(ProjectImportState).not_to receive(:schedule) expect_any_instance_of(ProjectImportState).not_to receive(:schedule)
expect(::Projects::CreateService).not_to receive(:new) expect(::Projects::CreateService).not_to receive(:new)
post api('/projects/import', user), params: { namespace: 'nonexistent', path: 'test-import2', file: fixture_file_upload(file) } params[:namespace] = 'nonexistent'
params[:path] = 'test-import2'
subject
expect(response).to have_gitlab_http_status(:not_found) expect(response).to have_gitlab_http_status(:not_found)
expect(json_response['message']).to eq('404 Namespace Not Found') expect(json_response['message']).to eq('404 Namespace Not Found')
...@@ -109,37 +133,40 @@ describe API::ProjectImport do ...@@ -109,37 +133,40 @@ describe API::ProjectImport do
it 'does not schedule an import if the user has no permission to the namespace' do it 'does not schedule an import if the user has no permission to the namespace' do
expect_any_instance_of(ProjectImportState).not_to receive(:schedule) expect_any_instance_of(ProjectImportState).not_to receive(:schedule)
post(api('/projects/import', create(:user)), new_namespace = create(:group)
params: { params[:path] = 'test-import3'
path: 'test-import3', params[:namespace] = new_namespace.full_path
file: fixture_file_upload(file),
namespace: namespace.full_path subject
})
expect(response).to have_gitlab_http_status(:not_found) expect(response).to have_gitlab_http_status(:not_found)
expect(json_response['message']).to eq('404 Namespace Not Found') expect(json_response['message']).to eq('404 Namespace Not Found')
end end
it 'does not schedule an import if the user uploads no valid file' do context 'if user uploads no valid file' do
expect_any_instance_of(ProjectImportState).not_to receive(:schedule) let(:file) { 'README.md' }
post api('/projects/import', user), params: { path: 'test-import3', file: './random/test' } it 'does not schedule an import if the user uploads no valid file' do
expect_any_instance_of(ProjectImportState).not_to receive(:schedule)
params[:path] = 'test-import3'
subject
expect(response).to have_gitlab_http_status(:bad_request) expect(response).to have_gitlab_http_status(:unprocessable_entity)
expect(json_response['error']).to eq('file is invalid') expect(json_response['message']['error']).to eq('You need to upload a GitLab project export archive (ending in .gz).')
end
end end
it 'stores params that can be overridden' do it 'stores params that can be overridden' do
stub_import(namespace) stub_import(namespace)
override_params = { 'description' => 'Hello world' } override_params = { 'description' => 'Hello world' }
post api('/projects/import', user), params[:namespace] = namespace.id
params: { params[:override_params] = override_params
path: 'test-import',
file: fixture_file_upload(file), subject
namespace: namespace.id,
override_params: override_params
}
import_project = Project.find(json_response['id']) import_project = Project.find(json_response['id'])
expect(import_project.import_data.data['override_params']).to eq(override_params) expect(import_project.import_data.data['override_params']).to eq(override_params)
...@@ -149,33 +176,14 @@ describe API::ProjectImport do ...@@ -149,33 +176,14 @@ describe API::ProjectImport do
stub_import(namespace) stub_import(namespace)
override_params = { 'not_allowed' => 'Hello world' } override_params = { 'not_allowed' => 'Hello world' }
post api('/projects/import', user), params[:namespace] = namespace.id
params: { params[:override_params] = override_params
path: 'test-import',
file: fixture_file_upload(file),
namespace: namespace.id,
override_params: override_params
}
import_project = Project.find(json_response['id'])
expect(import_project.import_data.data['override_params']).to be_empty
end
it 'correctly overrides params during the import', :sidekiq_might_not_need_inline do subject
override_params = { 'description' => 'Hello world' }
perform_enqueued_jobs do
post api('/projects/import', user),
params: {
path: 'test-import',
file: fixture_file_upload(file),
namespace: namespace.id,
override_params: override_params
}
end
import_project = Project.find(json_response['id']) import_project = Project.find(json_response['id'])
expect(import_project.description).to eq('Hello world') expect(import_project.import_data.data['override_params']).to be_empty
end end
context 'when target path already exists in namespace' do context 'when target path already exists in namespace' do
...@@ -184,7 +192,9 @@ describe API::ProjectImport do ...@@ -184,7 +192,9 @@ describe API::ProjectImport do
it 'does not schedule an import' do it 'does not schedule an import' do
expect_any_instance_of(ProjectImportState).not_to receive(:schedule) expect_any_instance_of(ProjectImportState).not_to receive(:schedule)
post api('/projects/import', user), params: { path: existing_project.path, file: fixture_file_upload(file) } params[:path] = existing_project.path
subject
expect(response).to have_gitlab_http_status(:bad_request) expect(response).to have_gitlab_http_status(:bad_request)
expect(json_response['message']).to eq('Name has already been taken') expect(json_response['message']).to eq('Name has already been taken')
...@@ -194,7 +204,10 @@ describe API::ProjectImport do ...@@ -194,7 +204,10 @@ describe API::ProjectImport do
it 'schedules an import' do it 'schedules an import' do
stub_import(user.namespace) stub_import(user.namespace)
post api('/projects/import', user), params: { path: existing_project.path, file: fixture_file_upload(file), overwrite: true } params[:path] = existing_project.path
params[:overwrite] = true
subject
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
...@@ -207,16 +220,16 @@ describe API::ProjectImport do ...@@ -207,16 +220,16 @@ describe API::ProjectImport do
end end
it 'prevents users from importing projects' do it 'prevents users from importing projects' do
post api('/projects/import', user), params: { path: 'test-import', file: fixture_file_upload(file), namespace: namespace.id } params[:namespace] = namespace.id
subject
expect(response).to have_gitlab_http_status(:too_many_requests) expect(response).to have_gitlab_http_status(:too_many_requests)
expect(json_response['message']['error']).to eq('This endpoint has been requested too many times. Try again later.') expect(json_response['message']['error']).to eq('This endpoint has been requested too many times. Try again later.')
end end
end end
context 'with direct upload enabled' do context 'when using remote storage' do
subject { upload_archive(file_upload, workhorse_headers, params) }
let(:file_name) { 'project_export.tar.gz' } let(:file_name) { 'project_export.tar.gz' }
let!(:fog_connection) do let!(:fog_connection) do
...@@ -232,21 +245,11 @@ describe API::ProjectImport do ...@@ -232,21 +245,11 @@ describe API::ProjectImport do
let(:file_upload) { fog_to_uploaded_file(tmp_object) } let(:file_upload) { fog_to_uploaded_file(tmp_object) }
let(:params) do it 'schedules an import' do
{ stub_import(namespace)
path: 'test-import-project', params[:namespace] = namespace.id
namespace: namespace.id,
'file.remote_id' => file_name,
'file.size' => file_upload.size
}
end
before do
allow(ImportExportUploader).to receive(:workhorse_upload_path).and_return('/')
end
it 'accepts the request and stores the file' do subject
expect { subject }.to change { Project.count }.by(1)
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
end end
...@@ -257,7 +260,7 @@ describe API::ProjectImport do ...@@ -257,7 +260,7 @@ describe API::ProjectImport do
api("/projects/import", user), api("/projects/import", user),
method: :post, method: :post,
file_key: :file, file_key: :file,
params: params.merge(file: file_upload), params: params.merge(file: file),
headers: headers, headers: headers,
send_rewritten_field: true send_rewritten_field: true
) )
...@@ -301,6 +304,7 @@ describe API::ProjectImport do ...@@ -301,6 +304,7 @@ describe API::ProjectImport do
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE) expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
expect(json_response['TempPath']).to eq(ImportExportUploader.workhorse_local_upload_path)
end end
it 'rejects requests that bypassed gitlab-workhorse' do it 'rejects requests that bypassed gitlab-workhorse' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment