Commit 2072d057 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch 'kassio/project-import-from-s3' into 'master'

ProjectImport: Import remote file from AWS S3

See merge request gitlab-org/gitlab!77259
parents b87fe509 920812c9
# frozen_string_literal: true
module Import
module GitlabProjects
class CreateProjectFromRemoteFileService < CreateProjectFromUploadedFileService
FILE_SIZE_LIMIT = 10.gigabytes
ALLOWED_CONTENT_TYPES = [
'application/gzip', # most common content-type when fetching a tar.gz
'application/x-tar' # aws-s3 uses x-tar for tar.gz files
].freeze
validate :valid_remote_import_url?
validate :validate_file_size
validate :validate_content_type
private
def required_params
[:path, :namespace, :remote_import_url]
end
def project_params
super
.except(:file)
.merge(import_export_upload: ::ImportExportUpload.new(
remote_import_url: params[:remote_import_url]
))
end
def valid_remote_import_url?
::Gitlab::UrlBlocker.validate!(
params[:remote_import_url],
allow_localhost: allow_local_requests?,
allow_local_network: allow_local_requests?,
schemes: %w(http https)
)
true
rescue ::Gitlab::UrlBlocker::BlockedUrlError => e
errors.add(:base, e.message)
false
end
def allow_local_requests?
::Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services?
end
def validate_content_type
# AWS-S3 presigned URLs don't respond to HTTP HEAD requests,
# so file type cannot be validated
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170#note_748059103
return if amazon_s3?
if headers['content-type'].blank?
errors.add(:base, "Missing 'ContentType' header")
elsif !ALLOWED_CONTENT_TYPES.include?(headers['content-type'])
errors.add(:base, "Remote file content type '%{content_type}' not allowed. (Allowed content types: %{allowed})" % {
content_type: headers['content-type'],
allowed: ALLOWED_CONTENT_TYPES.join(', ')
})
end
end
def validate_file_size
# AWS-S3 presigned URLs don't respond to HTTP HEAD requests,
# so file size cannot be validated
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170#note_748059103
return if amazon_s3?
if headers['content-length'].to_i == 0
errors.add(:base, "Missing 'ContentLength' header")
elsif headers['content-length'].to_i > FILE_SIZE_LIMIT
errors.add(:base, 'Remote file larger than limit. (limit %{limit})' % {
limit: ActiveSupport::NumberHelper.number_to_human_size(FILE_SIZE_LIMIT)
})
end
end
def amazon_s3?
headers['Server'] == 'AmazonS3' && headers['x-amz-request-id'].present?
end
def headers
return {} if params[:remote_import_url].blank? || !valid_remote_import_url?
@headers ||= Gitlab::HTTP.head(params[:remote_import_url]).headers
end
end
end
end
# frozen_string_literal: true # frozen_string_literal: true
# Creates a new project with an associated project export file to be imported
# The associated project export file might be associated with different strategies
# to acquire the file to be imported, the default file_acquisition_strategy
# is uploading a file (Import::GitlabProjects::FileAcquisitionStrategies::FileUpload)
module Import module Import
module GitlabProjects module GitlabProjects
class CreateProjectFromUploadedFileService class CreateProjectService
include ActiveModel::Validations include ActiveModel::Validations
include ::Services::ReturnServiceResponses include ::Services::ReturnServiceResponses
validate :required_params_presence validates_presence_of :path, :namespace
def initialize(current_user, params = {}) # Creates a new CreateProjectService.
#
# @param [User] current_user
# @param [Hash] :params
# @param [Import::GitlabProjects::FileAcquisitionStrategies::*] :file_acquisition_strategy
def initialize(current_user, params:, file_acquisition_strategy: FileAcquisitionStrategies::FileUpload)
@current_user = current_user @current_user = current_user
@params = params.dup @params = params.dup
@strategy = file_acquisition_strategy.new(current_user: current_user, params: params)
end end
# Creates a project with the strategy parameters
#
# @return [Services::ServiceReponse]
def execute def execute
return error(errors.full_messages.first) unless valid? return error(errors.full_messages) unless valid?
return error(project.errors.full_messages&.first) unless project.saved? return error(project.errors.full_messages) unless project.saved?
success(project) success(project)
rescue StandardError => e rescue StandardError => e
error(e.message) error(e.message)
end end
# Cascade the validation to strategy
def valid?
super && strategy.valid?
end
# Merge with strategy's errors
def errors
super.tap { _1.merge!(strategy.errors) }
end
def read_attribute_for_validation(key)
params[key]
end
private private
attr_reader :current_user, :params attr_reader :current_user, :params, :strategy
def error(message) def error(messages)
super(message, :bad_request) messages = Array.wrap(messages)
message = messages.shift
super(message, :bad_request, pass_back: { other_errors: messages })
end end
def project def project
...@@ -43,22 +72,9 @@ module Import ...@@ -43,22 +72,9 @@ module Import
name: params[:name], name: params[:name],
path: params[:path], path: params[:path],
namespace_id: params[:namespace].id, namespace_id: params[:namespace].id,
file: params[:file],
overwrite: params[:overwrite], overwrite: params[:overwrite],
import_type: 'gitlab_project' import_type: 'gitlab_project'
} }.merge(strategy.project_params)
end
def required_params
[:path, :namespace, :file]
end
def required_params_presence
required_params
.select { |key| params[key].blank? }
.each do |missing_parameter|
errors.add(:base, "Parameter '#{missing_parameter}' is required")
end
end end
end end
end end
......
# frozen_string_literal: true
module Import
module GitlabProjects
module FileAcquisitionStrategies
class FileUpload
include ActiveModel::Validations
validate :uploaded_file
def initialize(current_user: nil, params:)
@params = params
end
def project_params
@project_params ||= @params.slice(:file)
end
def file
@file ||= @params[:file]
end
private
def uploaded_file
return if file.present? && file.is_a?(UploadedFile)
errors.add(:file, 'must be uploaded')
end
end
end
end
end
# frozen_string_literal: true
module Import
module GitlabProjects
module FileAcquisitionStrategies
class RemoteFile
include ActiveModel::Validations
def self.allow_local_requests?
::Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services?
end
validates :file_url, addressable_url: {
schemes: %w(https),
allow_localhost: allow_local_requests?,
allow_local_network: allow_local_requests?,
dns_rebind_protection: true
}
validate :aws_s3, if: :validate_aws_s3?
# When removing the import_project_from_remote_file_s3 remove the
# whole condition of this validation:
validates_with RemoteFileValidator, if: -> { validate_aws_s3? || !s3_request? }
def initialize(current_user: nil, params:)
@params = params
end
def project_params
@project_parms ||= {
import_export_upload: ::ImportExportUpload.new(remote_import_url: file_url)
}
end
def file_url
@file_url ||= params[:remote_import_url]
end
def content_type
@content_type ||= headers['content-type']
end
def content_length
@content_length ||= headers['content-length'].to_i
end
private
attr_reader :params
def aws_s3
if s3_request?
errors.add(:base, 'To import from AWS S3 use `projects/remote-import-s3`')
end
end
def s3_request?
headers['Server'] == 'AmazonS3' && headers['x-amz-request-id'].present?
end
def validate_aws_s3?
::Feature.enabled?(:import_project_from_remote_file_s3, default_enabled: :yaml)
end
def headers
return {} if file_url.blank?
@headers ||= Gitlab::HTTP.head(file_url, timeout: 1.second).headers
rescue StandardError => e
errors.add(:base, "Failed to retrive headers: #{e.message}")
{}
end
end
end
end
end
# frozen_string_literal: true
module Import
module GitlabProjects
module FileAcquisitionStrategies
class RemoteFileS3
include ActiveModel::Validations
include Gitlab::Utils::StrongMemoize
def self.allow_local_requests?
::Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services?
end
validates_presence_of :region, :bucket_name, :file_key, :access_key_id, :secret_access_key
validates :file_url, addressable_url: {
schemes: %w(https),
allow_localhost: allow_local_requests?,
allow_local_network: allow_local_requests?,
dns_rebind_protection: true
}
validates_with RemoteFileValidator
# The import itself has a limit of 24h, since the URL is created before the import starts
# we add an expiration a bit longer to ensure it won't expire during the import.
URL_EXPIRATION = 28.hours.seconds
def initialize(current_user: nil, params:)
@params = params
end
def project_params
@project_parms ||= {
import_export_upload: ::ImportExportUpload.new(remote_import_url: file_url)
}
end
def file_url
@file_url ||= s3_object&.presigned_url(:get, expires_in: URL_EXPIRATION.to_i)
end
def content_type
@content_type ||= s3_object&.content_type
end
def content_length
@content_length ||= s3_object&.content_length.to_i
end
# Make the validated params/methods accessible
def read_attribute_for_validation(key)
return file_url if key == :file_url
params[key]
end
private
attr_reader :params
def s3_object
strong_memoize(:s3_object) do
build_s3_options
end
end
def build_s3_options
object = Aws::S3::Object.new(
params[:bucket_name],
params[:file_key],
client: Aws::S3::Client.new(
region: params[:region],
access_key_id: params[:access_key_id],
secret_access_key: params[:secret_access_key]
)
)
# Force validate if the object exists and is accessible
# Some exceptions are only raised when trying to access the object data
unless object.exists?
errors.add(:base, "File not found '#{params[:file_key]}' in '#{params[:bucket_name]}'")
return
end
object
rescue StandardError => e
errors.add(:base, "Failed to open '#{params[:file_key]}' in '#{params[:bucket_name]}': #{e.message}")
nil
end
end
end
end
end
# frozen_string_literal: true
module Import
module GitlabProjects
# Validates the given object's #content_type and #content_length accordingly
# with the Project Import requirements
class RemoteFileValidator < ActiveModel::Validator
FILE_SIZE_LIMIT = 10.gigabytes
ALLOWED_CONTENT_TYPES = [
'application/gzip',
# S3 uses different file types
'application/x-tar',
'application/x-gzip'
].freeze
def validate(record)
validate_content_length(record)
validate_content_type(record)
end
private
def validate_content_length(record)
if record.content_length.to_i <= 0
record.errors.add(:content_length, :size_too_small, file_size: humanize(1.byte))
elsif record.content_length > FILE_SIZE_LIMIT
record.errors.add(:content_length, :size_too_big, file_size: humanize(FILE_SIZE_LIMIT))
end
end
def humanize(number)
ActiveSupport::NumberHelper.number_to_human_size(number)
end
def validate_content_type(record)
return if ALLOWED_CONTENT_TYPES.include?(record.content_type)
record.errors.add(:content_type, "'%{content_type}' not allowed. (Allowed: %{allowed})" % {
content_type: record.content_type,
allowed: ALLOWED_CONTENT_TYPES.join(', ')
})
end
end
end
end
---
name: import_project_from_remote_file_s3
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/77259
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/350571
milestone: '14.9'
type: development
group: group::import
default_enabled: false
...@@ -246,6 +246,61 @@ curl --request POST \ ...@@ -246,6 +246,61 @@ curl --request POST \
The `Content-Length` header must return a valid number. The maximum file size is 10 gigabytes. The `Content-Length` header must return a valid number. The maximum file size is 10 gigabytes.
The `Content-Type` header must be `application/gzip`. The `Content-Type` header must be `application/gzip`.
## Import a file from AWS S3
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/348874) in GitLab 14.9 in [Beta](https://about.gitlab.com/handbook/product/gitlab-the-product/#beta), [with a flag](../administration/feature_flags.md) named `import_project_from_remote_file_s3`. Disabled by default.
FLAG:
On self-managed GitLab and GitLab.com, by default this feature is not available. To make it available, ask an administrator to [enable the feature flag](../administration/feature_flags.md) named `import_project_from_remote_file_s3`. This feature is not ready for production use.
```plaintext
POST /projects/remote-import-s3
```
| Attribute | Type | Required | Description |
| ------------------- | -------------- | -------- | ---------------------------------------- |
| `namespace` | integer/string | no | The ID or path of the namespace to import the project to. Defaults to the current user's namespace. |
| `name` | string | no | The name of the project to import. If not provided, defaults to the path of the project. |
| `region` | string | yes | [AWS S3 region name where the file is stored.](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html#Regions) |
| `bucket_name` | string | yes | [AWS S3 bucket name where the file is stored.](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html) |
| `file_key` | string | yes | [AWS S3 file key to identify the file.](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingObjects.html) |
| `access_key_id` | string | yes | [AWS S3 access key ID.](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys). |
| `secret_access_key` | string | yes | [AWS S3 secret access key.](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) |
The passed override parameters take precedence over all values defined in the export file.
```shell
curl --request POST \
--url "http://localhost:3000/api/v4/projects/remote-import-s3" \
--header "PRIVATE-TOKEN: <your gitlab access key>" \
--header 'Content-Type: application/json' \
--data '{
"name": "Sample Project",
"path": "sample-project",
"region": "<Your S3 region name>",
"bucket_name": "<Your S3 bucket name>",
"file_key": "<Your S3 file key>",
"access_key_id": "<Your AWS access key id>",
"secret_access_key": "<Your AWS secret access key>"
}'
```
```json
{
"id": 1,
"description": null,
"name": "Sample project",
"name_with_namespace": "Administrator / sample-project",
"path": "sample-project",
"path_with_namespace": "root/sample-project",
"created_at": "2018-02-13T09:05:58.023Z",
"import_status": "scheduled",
"correlation_id": "mezklWso3Za",
"failed_relations": [],
"import_error": null
}
```
## Import status ## Import status
Get the status of an import. Get the status of an import.
......
...@@ -87,14 +87,16 @@ module API ...@@ -87,14 +87,16 @@ module API
validate_file! validate_file!
response = ::Import::GitlabProjects::CreateProjectFromUploadedFileService.new( response = ::Import::GitlabProjects::CreateProjectService.new(
current_user, current_user,
params: {
path: import_params[:path], path: import_params[:path],
namespace: namespace_from(import_params, current_user), namespace: namespace_from(import_params, current_user),
name: import_params[:name], name: import_params[:name],
file: import_params[:file], file: import_params[:file],
overwrite: import_params[:overwrite], overwrite: import_params[:overwrite],
override: filtered_override_params(import_params) override: filtered_override_params(import_params)
}
).execute ).execute
if response.success? if response.success?
...@@ -137,14 +139,66 @@ module API ...@@ -137,14 +139,66 @@ module API
check_rate_limit! :project_import, scope: [current_user, :project_import] check_rate_limit! :project_import, scope: [current_user, :project_import]
response = ::Import::GitlabProjects::CreateProjectFromRemoteFileService.new( response = ::Import::GitlabProjects::CreateProjectService.new(
current_user, current_user,
params: {
path: import_params[:path], path: import_params[:path],
namespace: namespace_from(import_params, current_user), namespace: namespace_from(import_params, current_user),
name: import_params[:name], name: import_params[:name],
remote_import_url: import_params[:url], remote_import_url: import_params[:url],
overwrite: import_params[:overwrite], overwrite: import_params[:overwrite],
override: filtered_override_params(import_params) override: filtered_override_params(import_params)
},
file_acquisition_strategy: ::Import::GitlabProjects::FileAcquisitionStrategies::RemoteFile
).execute
if response.success?
present(response.payload, with: Entities::ProjectImportStatus)
else
render_api_error!(response.message, response.http_status)
end
end
params do
requires :region, type: String, desc: 'AWS region'
requires :bucket_name, type: String, desc: 'Bucket name'
requires :file_key, type: String, desc: 'File key'
requires :access_key_id, type: String, desc: 'Access key id'
requires :secret_access_key, type: String, desc: 'Secret access key'
requires :path, type: String, desc: 'The new project path and name'
optional :name, type: String, desc: 'The name of the project to be imported. Defaults to the path of the project if not provided.'
optional :namespace, type: String, desc: "The ID or name of the namespace that the project will be imported into. Defaults to the current user's namespace."
optional :overwrite, type: Boolean, default: false, desc: 'If there is a project in the same namespace and with the same name overwrite it'
optional :override_params,
type: Hash,
desc: 'New project params to override values in the export' do
use :optional_project_params
end
end
desc 'Create a new project import using a file from AWS S3' do
detail 'This feature was introduced in GitLab 14.9.'
success Entities::ProjectImportStatus
end
post 'remote-import-s3' do
not_found! unless ::Feature.enabled?(:import_project_from_remote_file_s3, default_enabled: :yaml)
check_rate_limit! :project_import, scope: [current_user, :project_import]
response = ::Import::GitlabProjects::CreateProjectService.new(
current_user,
params: {
path: import_params[:path],
namespace: namespace_from(import_params, current_user),
name: import_params[:name],
overwrite: import_params[:overwrite],
override: filtered_override_params(import_params),
region: import_params[:region],
bucket_name: import_params[:bucket_name],
file_key: import_params[:file_key],
access_key_id: import_params[:access_key_id],
secret_access_key: import_params[:secret_access_key]
},
file_acquisition_strategy: ::Import::GitlabProjects::FileAcquisitionStrategies::RemoteFileS3
).execute ).execute
if response.success? if response.success?
......
...@@ -6,6 +6,8 @@ module Gitlab ...@@ -6,6 +6,8 @@ module Gitlab
UNTAR_MASK = 'u+rwX,go+rX,go-w' UNTAR_MASK = 'u+rwX,go+rX,go-w'
DEFAULT_DIR_MODE = 0700 DEFAULT_DIR_MODE = 0700
FileOversizedError = Class.new(StandardError)
def tar_czf(archive:, dir:) def tar_czf(archive:, dir:)
tar_with_options(archive: archive, dir: dir, options: 'czf') tar_with_options(archive: archive, dir: dir, options: 'czf')
end end
...@@ -51,19 +53,30 @@ module Gitlab ...@@ -51,19 +53,30 @@ module Gitlab
private private
def download_or_copy_upload(uploader, upload_path) def download_or_copy_upload(uploader, upload_path, size_limit: nil)
if uploader.upload.local? if uploader.upload.local?
copy_files(uploader.path, upload_path) copy_files(uploader.path, upload_path)
else else
download(uploader.url, upload_path) download(uploader.url, upload_path, size_limit: size_limit)
end end
end end
def download(url, upload_path) def download(url, upload_path, size_limit: nil)
File.open(upload_path, 'w') do |file| File.open(upload_path, 'w') do |file|
# Download (stream) file from the uploader's location # Download (stream) file from the uploader's location
IO.copy_stream(URI.parse(url).open, file) IO.copy_stream(
URI.parse(url).open(progress_proc: file_size_limiter(size_limit)),
file
)
end
rescue FileOversizedError
nil
end end
def file_size_limiter(limit)
return if limit.blank?
-> (current_size) { raise FileOversizedError if current_size > limit }
end end
def tar_with_options(archive:, dir:, options:) def tar_with_options(archive:, dir:, options:)
......
...@@ -72,9 +72,17 @@ module Gitlab ...@@ -72,9 +72,17 @@ module Gitlab
import_export_upload = @importable.import_export_upload import_export_upload = @importable.import_export_upload
if import_export_upload.remote_import_url.present? if import_export_upload.remote_import_url.present?
download(import_export_upload.remote_import_url, @archive_file) download(
import_export_upload.remote_import_url,
@archive_file,
size_limit: ::Import::GitlabProjects::RemoteFileValidator::FILE_SIZE_LIMIT
)
else else
download_or_copy_upload(import_export_upload.import_file, @archive_file) download_or_copy_upload(
import_export_upload.import_file,
@archive_file,
size_limit: ::Import::GitlabProjects::RemoteFileValidator::FILE_SIZE_LIMIT
)
end end
end end
......
...@@ -17,6 +17,9 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do ...@@ -17,6 +17,9 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do
def initialize def initialize
@shared = Gitlab::ImportExport::Shared.new(nil) @shared = Gitlab::ImportExport::Shared.new(nil)
end end
# Make the included methods public for testing
public :download_or_copy_upload, :download
end.new end.new
end end
...@@ -38,6 +41,61 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do ...@@ -38,6 +41,61 @@ RSpec.describe Gitlab::ImportExport::CommandLineUtil do
expect(file_permissions("#{path}/uploads")).to eq(0755) # originally 555 expect(file_permissions("#{path}/uploads")).to eq(0755) # originally 555
end end
describe '#download_or_copy_upload' do
let(:upload) { instance_double(Upload, local?: local) }
let(:uploader) { instance_double(ImportExportUploader, path: :path, url: :url, upload: upload) }
let(:upload_path) { '/some/path' }
context 'when the upload is local' do
let(:local) { true }
it 'copies the file' do
expect(subject).to receive(:copy_files).with(:path, upload_path)
subject.download_or_copy_upload(uploader, upload_path)
end
end
context 'when the upload is remote' do
let(:local) { false }
it 'downloads the file' do
expect(subject).to receive(:download).with(:url, upload_path, size_limit: nil)
subject.download_or_copy_upload(uploader, upload_path)
end
end
end
describe '#download' do
before do
stub_request(:get, 'http://localhost:3000/file')
.to_return(
status: 200,
body: File.open(archive),
headers: {
'Content-Type' => 'application/x-tar'
}
)
end
let(:tempfile) { Tempfile.new('test', path) }
it 'downloads the file in the given path' do
subject.download('http://localhost:3000/file', tempfile)
expect(File.exist?(tempfile)).to eq(true)
expect(tempfile.size).to eq(File.size(archive))
end
it 'limit the size of the downloaded file' do
subject.download('http://localhost:3000/file', tempfile, size_limit: 1.byte)
expect(File.exist?(tempfile)).to eq(true)
expect(tempfile.size).to eq(0)
end
end
describe '#gzip' do describe '#gzip' do
it 'compresses specified file' do it 'compresses specified file' do
tempfile = Tempfile.new('test', path) tempfile = Tempfile.new('test', path)
......
...@@ -72,6 +72,25 @@ RSpec.describe Gitlab::ImportExport::FileImporter do ...@@ -72,6 +72,25 @@ RSpec.describe Gitlab::ImportExport::FileImporter do
expect(shared.export_path).to include('test/abcd') expect(shared.export_path).to include('test/abcd')
end end
context 'when the import file is not remote' do
include AfterNextHelpers
it 'downloads the file from a remote object storage' do
import_export_upload = build(:import_export_upload)
project = build( :project, import_export_upload: import_export_upload)
expect_next(described_class)
.to receive(:download_or_copy_upload)
.with(
import_export_upload.import_file,
kind_of(String),
size_limit: ::Import::GitlabProjects::RemoteFileValidator::FILE_SIZE_LIMIT
)
described_class.import(importable: project, archive_file: nil, shared: shared)
end
end
context 'when the import file is remote' do context 'when the import file is remote' do
include AfterNextHelpers include AfterNextHelpers
...@@ -82,7 +101,11 @@ RSpec.describe Gitlab::ImportExport::FileImporter do ...@@ -82,7 +101,11 @@ RSpec.describe Gitlab::ImportExport::FileImporter do
expect_next(described_class) expect_next(described_class)
.to receive(:download) .to receive(:download)
.with(file_url, kind_of(String)) .with(
file_url,
kind_of(String),
size_limit: ::Import::GitlabProjects::RemoteFileValidator::FILE_SIZE_LIMIT
)
described_class.import(importable: project, archive_file: nil, shared: shared) described_class.import(importable: project, archive_file: nil, shared: shared)
end end
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe API::ProjectImport do RSpec.describe API::ProjectImport, :aggregate_failures do
include WorkhorseHelpers include WorkhorseHelpers
include AfterNextHelpers include AfterNextHelpers
...@@ -329,7 +329,7 @@ RSpec.describe API::ProjectImport do ...@@ -329,7 +329,7 @@ RSpec.describe API::ProjectImport do
) )
service_response = ServiceResponse.success(payload: project) service_response = ServiceResponse.success(payload: project)
expect_next(::Import::GitlabProjects::CreateProjectFromRemoteFileService) expect_next(::Import::GitlabProjects::CreateProjectService)
.to receive(:execute) .to receive(:execute)
.and_return(service_response) .and_return(service_response)
...@@ -352,7 +352,86 @@ RSpec.describe API::ProjectImport do ...@@ -352,7 +352,86 @@ RSpec.describe API::ProjectImport do
message: 'Failed to import', message: 'Failed to import',
http_status: :bad_request http_status: :bad_request
) )
expect_next(::Import::GitlabProjects::CreateProjectFromRemoteFileService) expect_next(::Import::GitlabProjects::CreateProjectService)
.to receive(:execute)
.and_return(service_response)
subject
expect(response).to have_gitlab_http_status(:bad_request)
expect(json_response).to eq({
'message' => 'Failed to import'
})
end
end
end
end
describe 'POST /projects/remote-import-s3' do
subject do
post api('/projects/remote-import-s3', user), params: params
end
let(:params) do
{
path: 'test-import',
region: 'region_name',
bucket_name: 'bucket_name',
file_key: 'file_key',
access_key_id: 'access_key_id',
secret_access_key: 'secret_access_key'
}
end
it_behaves_like 'requires authentication'
it 'returns NOT FOUND when the feature is disabled' do
stub_feature_flags(import_project_from_remote_file_s3: false)
subject
expect(response).to have_gitlab_http_status(:not_found)
end
context 'when the feature flag is enabled' do
before do
stub_feature_flags(import_project_from_remote_file_s3: true)
end
context 'when the response is successful' do
it 'schedules the import successfully' do
project = create(
:project,
namespace: user.namespace,
name: 'test-import',
path: 'test-import'
)
service_response = ServiceResponse.success(payload: project)
expect_next(::Import::GitlabProjects::CreateProjectService)
.to receive(:execute)
.and_return(service_response)
subject
expect(response).to have_gitlab_http_status(:created)
expect(json_response).to include({
'id' => project.id,
'name' => 'test-import',
'name_with_namespace' => "#{user.namespace.name} / test-import",
'path' => 'test-import',
'path_with_namespace' => "#{user.namespace.path}/test-import"
})
end
end
context 'when the service returns an error' do
it 'fails to schedule the import' do
service_response = ServiceResponse.error(
message: 'Failed to import',
http_status: :bad_request
)
expect_next(::Import::GitlabProjects::CreateProjectService)
.to receive(:execute) .to receive(:execute)
.and_return(service_response) .and_return(service_response)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do
let(:remote_url) { 'https://external.file.path/file' }
let(:params) do
{
path: 'path',
namespace: user.namespace,
name: 'name',
remote_import_url: remote_url
}
end
let_it_be(:user) { create(:user) }
subject { described_class.new(user, params) }
shared_examples 'successfully import' do |content_type|
it 'creates a project and returns a successful response' do
stub_headers_for(remote_url, {
'content-type' => content_type,
'content-length' => '10'
})
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(1)
expect(response).to be_success
expect(response.http_status).to eq(:ok)
expect(response.payload).to be_instance_of(Project)
expect(response.payload.name).to eq('name')
expect(response.payload.path).to eq('path')
expect(response.payload.namespace).to eq(user.namespace)
end
end
it_behaves_like 'successfully import', 'application/gzip'
it_behaves_like 'successfully import', 'application/x-tar'
context 'when the file url is invalid' do
it 'returns an erred response with the reason of the failure' do
stub_application_setting(allow_local_requests_from_web_hooks_and_services: false)
params[:remote_import_url] = 'https://localhost/file'
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq('Requests to localhost are not allowed')
end
end
context 'validate file type' do
it 'returns erred response when the file type is not informed' do
stub_headers_for(remote_url, { 'content-length' => '10' })
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message)
.to eq("Missing 'ContentType' header")
end
it 'returns erred response when the file type is not allowed' do
stub_headers_for(remote_url, {
'content-type' => 'application/js',
'content-length' => '10'
})
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message)
.to eq("Remote file content type 'application/js' not allowed. (Allowed content types: application/gzip, application/x-tar)")
end
end
context 'validate content type' do
it 'returns erred response when the file size is not informed' do
stub_headers_for(remote_url, { 'content-type' => 'application/gzip' })
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message)
.to eq("Missing 'ContentLength' header")
end
it 'returns error response when the file size is a text' do
stub_headers_for(remote_url, {
'content-type' => 'application/gzip',
'content-length' => 'some text'
})
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message)
.to eq("Missing 'ContentLength' header")
end
it 'returns erred response when the file is larger then allowed' do
stub_headers_for(remote_url, {
'content-type' => 'application/gzip',
'content-length' => 11.gigabytes.to_s
})
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message)
.to eq('Remote file larger than limit. (limit 10 GB)')
end
end
it 'does not validate content-type or content-length when the file is stored in AWS-S3' do
stub_headers_for(remote_url, {
'Server' => 'AmazonS3',
'x-amz-request-id' => 'Something'
})
response = nil
expect { response = subject.execute }
.to change(Project, :count)
expect(response).to be_success
expect(response.http_status).to eq(:ok)
end
context 'when required parameters are not provided' do
let(:params) { {} }
it 'returns an erred response with the reason of the failure' do
stub_application_setting(allow_local_requests_from_web_hooks_and_services: false)
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq("Parameter 'path' is required")
expect(subject.errors.full_messages).to match_array([
"Missing 'ContentLength' header",
"Missing 'ContentType' header",
"Parameter 'namespace' is required",
"Parameter 'path' is required",
"Parameter 'remote_import_url' is required"
])
end
end
context 'when the project is invalid' do
it 'returns an erred response with the reason of the failure' do
create(:project, namespace: user.namespace, path: 'path')
stub_headers_for(remote_url, {
'content-type' => 'application/gzip',
'content-length' => '10'
})
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq('Path has already been taken')
end
end
def stub_headers_for(url, headers = {})
allow(Gitlab::HTTP)
.to receive(:head)
.with(url)
.and_return(double(headers: headers))
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::CreateProjectFromUploadedFileService do
let(:file_upload) do
fixture_file_upload('spec/features/projects/import_export/test_project_export.tar.gz')
end
let(:params) do
{
path: 'path',
namespace: user.namespace,
name: 'name',
file: file_upload
}
end
let_it_be(:user) { create(:user) }
subject { described_class.new(user, params) }
it 'creates a project and returns a successful response' do
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(1)
expect(response).to be_success
expect(response.http_status).to eq(:ok)
expect(response.payload).to be_instance_of(Project)
expect(response.payload.name).to eq('name')
expect(response.payload.path).to eq('path')
expect(response.payload.namespace).to eq(user.namespace)
end
context 'when required parameters are not provided' do
let(:params) { {} }
it 'returns an erred response with the reason of the failure' do
stub_application_setting(allow_local_requests_from_web_hooks_and_services: false)
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq("Parameter 'path' is required")
expect(subject.errors.full_messages).to match_array([
"Parameter 'namespace' is required",
"Parameter 'path' is required",
"Parameter 'file' is required"
])
end
end
context 'when the project is invalid' do
it 'returns an erred response with the reason of the failure' do
create(:project, namespace: user.namespace, path: 'path')
response = nil
expect { response = subject.execute }
.not_to change(Project, :count)
expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq('Path has already been taken')
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::CreateProjectService, :aggregate_failures do
let(:fake_file_acquisition_strategy) do
Class.new do
attr_reader :errors
def initialize(...)
@errors = ActiveModel::Errors.new(self)
end
def valid?
true
end
def project_params
{}
end
end
end
let(:params) do
{
path: 'path',
namespace: user.namespace,
name: 'name'
}
end
let_it_be(:user) { create(:user) }
subject { described_class.new(user, params: params, file_acquisition_strategy: FakeStrategy) }
before do
stub_const('FakeStrategy', fake_file_acquisition_strategy)
end
describe 'validation' do
it { expect(subject).to be_valid }
it 'validates presence of path' do
params[:path] = nil
invalid = described_class.new(user, params: params, file_acquisition_strategy: FakeStrategy)
expect(invalid).not_to be_valid
expect(invalid.errors.full_messages).to include("Path can't be blank")
end
it 'validates presence of name' do
params[:namespace] = nil
invalid = described_class.new(user, params: params, file_acquisition_strategy: FakeStrategy)
expect(invalid).not_to be_valid
expect(invalid.errors.full_messages).to include("Namespace can't be blank")
end
it 'is invalid if the strategy is invalid' do
expect_next_instance_of(FakeStrategy) do |strategy|
allow(strategy).to receive(:valid?).and_return(false)
allow(strategy).to receive(:errors).and_wrap_original do |original|
original.call.tap do |errors|
errors.add(:base, "some error")
end
end
end
invalid = described_class.new(user, params: params, file_acquisition_strategy: FakeStrategy)
expect(invalid).not_to be_valid
expect(invalid.errors.full_messages).to include("some error")
expect(invalid.errors.full_messages).to include("some error")
end
end
describe '#execute' do
it 'creates a project successfully' do
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(1)
expect(response).to be_success
expect(response.http_status).to eq(:ok)
expect(response.payload).to be_instance_of(Project)
expect(response.payload.name).to eq('name')
expect(response.payload.path).to eq('path')
expect(response.payload.namespace).to eq(user.namespace)
project = Project.last
expect(project.name).to eq('name')
expect(project.path).to eq('path')
expect(project.namespace_id).to eq(user.namespace.id)
expect(project.import_type).to eq('gitlab_project')
end
context 'when the project creation raises an error' do
it 'fails to create a project' do
expect_next_instance_of(Projects::GitlabProjectsImportService) do |service|
expect(service).to receive(:execute).and_raise(StandardError, "failed to create project")
end
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(0)
expect(response).to be_error
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq("failed to create project")
expect(response.payload).to eq(other_errors: [])
end
end
context 'when the validation fail' do
it 'fails to create a project' do
params.delete(:path)
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(0)
expect(response).to be_error
expect(response.http_status).to eq(:bad_request)
expect(response.message).to eq("Path can't be blank")
expect(response.payload).to eq(other_errors: [])
end
context 'when the project contains multilple errors' do
it 'fails to create a project' do
params.merge!(name: '_ an invalid name _', path: '_ an invalid path _')
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(0)
expect(response).to be_error
expect(response.http_status).to eq(:bad_request)
expect(response.message)
.to eq(%{Project namespace path can contain only letters, digits, '_', '-' and '.'. Cannot start with '-', end in '.git' or end in '.atom'})
expect(response.payload).to eq(other_errors: [
%{Path can contain only letters, digits, '_', '-' and '.'. Cannot start with '-', end in '.git' or end in '.atom'},
%{Path must not start or end with a special character and must not contain consecutive special characters.}
])
end
end
end
context 'when the strategy adds project parameters' do
before do
expect_next_instance_of(FakeStrategy) do |strategy|
expect(strategy).to receive(:project_params).and_return(name: 'the strategy name')
end
subject.valid?
end
it 'merges the strategy project parameters' do
response = nil
expect { response = subject.execute }
.to change(Project, :count).by(1)
expect(response).to be_success
expect(response.http_status).to eq(:ok)
expect(response.payload).to be_instance_of(Project)
expect(response.payload.name).to eq('the strategy name')
expect(response.payload.path).to eq('path')
expect(response.payload.namespace).to eq(user.namespace)
project = Project.last
expect(project.name).to eq('the strategy name')
expect(project.path).to eq('path')
expect(project.namespace_id).to eq(user.namespace.id)
expect(project.import_type).to eq('gitlab_project')
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::FileAcquisitionStrategies::FileUpload, :aggregate_failures do
let(:file) { UploadedFile.new( File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') ) }
describe 'validation' do
it 'validates presence of file' do
valid = described_class.new(params: { file: file })
expect(valid).to be_valid
invalid = described_class.new(params: {})
expect(invalid).not_to be_valid
expect(invalid.errors.full_messages).to include("File must be uploaded")
end
end
describe '#project_params' do
it 'returns the file to upload in the params' do
subject = described_class.new(params: { file: file })
expect(subject.project_params).to eq(file: file)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::FileAcquisitionStrategies::RemoteFileS3, :aggregate_failures do
let(:region_name) { 'region_name' }
let(:bucket_name) { 'bucket_name' }
let(:file_key) { 'file_key' }
let(:access_key_id) { 'access_key_id' }
let(:secret_access_key) { 'secret_access_key' }
let(:file_exists) { true }
let(:content_type) { 'application/x-tar' }
let(:content_length) { 2.gigabytes }
let(:presigned_url) { 'https://external.file.path/file.tar.gz?PRESIGNED=true&TOKEN=some-token' }
let(:s3_double) do
instance_double(
Aws::S3::Object,
exists?: file_exists,
content_type: content_type,
content_length: content_length,
presigned_url: presigned_url
)
end
let(:params) do
{
region: region_name,
bucket_name: bucket_name,
file_key: file_key,
access_key_id: access_key_id,
secret_access_key: secret_access_key
}
end
subject { described_class.new(params: params) }
before do
# Avoid network requests
expect(Aws::S3::Client).to receive(:new).and_return(double)
expect(Aws::S3::Object).to receive(:new).and_return(s3_double)
end
describe 'validation' do
it { expect(subject).to be_valid }
%i[region bucket_name file_key access_key_id secret_access_key].each do |key|
context "#{key} validation" do
before do
params[key] = nil
end
it "validates presence of #{key}" do
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("#{key.to_s.humanize} can't be blank")
end
end
end
context 'content-length validation' do
let(:content_length) { 11.gigabytes }
it 'validates the remote content-length' do
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include('Content length is too big (should be at most 10 GB)')
end
end
context 'content-type validation' do
let(:content_type) { 'unknown' }
it 'validates the remote content-type' do
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("Content type 'unknown' not allowed. (Allowed: application/gzip, application/x-tar, application/x-gzip)")
end
end
context 'file_url validation' do
let(:presigned_url) { 'ftp://invalid.url/file.tar.gz' }
it 'validates the file_url scheme' do
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("File url is blocked: Only allowed schemes are https")
end
context 'when localhost urls are not allowed' do
let(:presigned_url) { 'https://localhost:3000/file.tar.gz' }
it 'validates the file_url' do
stub_application_setting(allow_local_requests_from_web_hooks_and_services: false)
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("File url is blocked: Requests to localhost are not allowed")
end
end
end
context 'when the remote file does not exist' do
it 'foo' do
expect(s3_double).to receive(:exists?).and_return(false)
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("File not found 'file_key' in 'bucket_name'")
end
end
context 'when it fails to build the s3 object' do
it 'foo' do
expect(s3_double).to receive(:exists?).and_raise(StandardError, "some error")
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("Failed to open 'file_key' in 'bucket_name': some error")
end
end
end
describe '#project_params' do
it 'returns import_export_upload in the params' do
subject = described_class.new(params: { remote_import_url: presigned_url })
expect(subject.project_params).to match(
import_export_upload: an_instance_of(::ImportExportUpload)
)
expect(subject.project_params[:import_export_upload]).to have_attributes(
remote_import_url: presigned_url
)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::FileAcquisitionStrategies::RemoteFile, :aggregate_failures do
let(:remote_url) { 'https://external.file.path/file.tar.gz' }
let(:params) { { remote_import_url: remote_url } }
subject { described_class.new(params: params) }
before do
stub_headers_for(remote_url, {
'content-length' => 10.gigabytes,
'content-type' => 'application/gzip'
})
end
describe 'validation' do
it { expect(subject).to be_valid }
context 'file_url validation' do
let(:remote_url) { 'ftp://invalid.url/file.tar.gz' }
it 'validates the file_url scheme' do
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("File url is blocked: Only allowed schemes are https")
end
context 'when localhost urls are not allowed' do
let(:remote_url) { 'https://localhost:3000/file.tar.gz' }
it 'validates the file_url' do
stub_application_setting(allow_local_requests_from_web_hooks_and_services: false)
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("File url is blocked: Requests to localhost are not allowed")
end
end
end
context 'when import_project_from_remote_file_s3 is enabled' do
before do
stub_feature_flags(import_project_from_remote_file_s3: true)
end
context 'when the HTTP request fail to recover the headers' do
it 'adds the error message' do
expect(Gitlab::HTTP)
.to receive(:head)
.and_raise(StandardError, 'request invalid')
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include('Failed to retrive headers: request invalid')
end
end
it 'validates the remote content-length' do
stub_headers_for(remote_url, { 'content-length' => 11.gigabytes })
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include('Content length is too big (should be at most 10 GB)')
end
it 'validates the remote content-type' do
stub_headers_for(remote_url, { 'content-type' => 'unknown' })
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("Content type 'unknown' not allowed. (Allowed: application/gzip, application/x-tar, application/x-gzip)")
end
context 'when trying to import from AWS S3' do
it 'adds an error suggesting to use `projects/remote-import-s3`' do
stub_headers_for(
remote_url,
'Server' => 'AmazonS3',
'x-amz-request-id' => 'some-id'
)
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include('To import from AWS S3 use `projects/remote-import-s3`')
end
end
end
context 'when import_project_from_remote_file_s3 is disabled' do
before do
stub_feature_flags(import_project_from_remote_file_s3: false)
end
context 'when trying to import from AWS S3' do
it 'does not validate the remote content-length or content-type' do
stub_headers_for(
remote_url,
'Server' => 'AmazonS3',
'x-amz-request-id' => 'some-id',
'content-length' => 11.gigabytes,
'content-type' => 'unknown'
)
expect(subject).to be_valid
end
end
context 'when NOT trying to import from AWS S3' do
it 'validates content-length and content-type' do
stub_headers_for(
remote_url,
'Server' => 'NOT AWS S3',
'content-length' => 11.gigabytes,
'content-type' => 'unknown'
)
expect(subject).not_to be_valid
expect(subject.errors.full_messages)
.to include("Content type 'unknown' not allowed. (Allowed: application/gzip, application/x-tar, application/x-gzip)")
expect(subject.errors.full_messages)
.to include('Content length is too big (should be at most 10 GB)')
end
end
end
end
describe '#project_params' do
it 'returns import_export_upload in the params' do
subject = described_class.new(params: { remote_import_url: remote_url })
expect(subject.project_params).to match(
import_export_upload: an_instance_of(::ImportExportUpload)
)
expect(subject.project_params[:import_export_upload]).to have_attributes(
remote_import_url: remote_url
)
end
end
def stub_headers_for(url, headers = {})
allow(Gitlab::HTTP)
.to receive(:head)
.with(remote_url, timeout: 1.second)
.and_return(double(headers: headers)) # rubocop: disable RSpec/VerifiedDoubles
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Import::GitlabProjects::RemoteFileValidator, :aggregate_failures do
let(:validated_class) do
Class.new do
include ActiveModel::Validations
def self.name
'AClass'
end
attr_accessor :content_type, :content_length
def initialize(content_length:, content_type:)
@content_type = content_type
@content_length = content_length
end
end
end
let(:validated_object) { validated_class.new(content_length: 1.gigabytes, content_type: 'application/gzip') }
subject { described_class.new }
it 'does nothing when the oject is valid' do
subject.validate(validated_object)
expect(validated_object.errors.full_messages).to be_empty
end
context 'content_length validation' do
it 'is invalid with file too small' do
validated_object.content_length = nil
subject.validate(validated_object)
expect(validated_object.errors.full_messages)
.to include('Content length is too small (should be at least 1 Byte)')
end
it 'is invalid with file too large' do
validated_object.content_length = (described_class::FILE_SIZE_LIMIT + 1).gigabytes
subject.validate(validated_object)
expect(validated_object.errors.full_messages)
.to include('Content length is too big (should be at most 10 GB)')
end
end
context 'content_type validation' do
it 'only allows ALLOWED_CONTENT_TYPES as content_type' do
described_class::ALLOWED_CONTENT_TYPES.each do |content_type|
validated_object.content_type = content_type
subject.validate(validated_object)
expect(validated_object.errors.to_a).to be_empty
end
validated_object.content_type = 'unknown'
subject.validate(validated_object)
expect(validated_object.errors.full_messages)
.to include("Content type 'unknown' not allowed. (Allowed: application/gzip, application/x-tar, application/x-gzip)")
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment