Commit 4def2cb0 authored by Michael Kozono's avatar Michael Kozono

Merge branch 'sh-accelerate-s3-copy' into 'master'

Fix large S3 uploads failing to finalize

See merge request gitlab-org/gitlab!50922
parents 8337dfbe 3714fc8f
---
title: Fix large S3 uploads failing to finalize
merge_request: 50922
author:
type: fixed
---
name: s3_multithreaded_uploads
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50922
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/296772
milestone: '13.8'
type: development
group: group::continuous integration
default_enabled: true
......@@ -17,7 +17,22 @@ module CarrierWave
class Fog < Abstract
class File
def copy_to(new_path)
connection.copy_object(@uploader.fog_directory, file.key, @uploader.fog_directory, new_path, copy_to_options)
# fog-aws needs multipart uploads to copy files above 5 GB,
# and it is currently the only Fog provider that supports
# multithreaded uploads (https://github.com/fog/fog-aws/pull/579).
# Multithreaded uploads are essential for copying large amounts of data
# within the request timeout.
if ::Feature.enabled?(:s3_multithreaded_uploads, default_enabled: true) && fog_provider == 'AWS'
file.concurrency = 10 # AWS SDK uses 10 threads by default
file.copy(@uploader.fog_directory, new_path, copy_to_options)
else
# Some Fog providers may issue a GET request (https://github.com/fog/fog-google/issues/512)
# instead of a HEAD request after the transfer completes,
# which might cause the file to be downloaded locally.
# We fallback to the original copy_object for non-AWS providers.
connection.copy_object(@uploader.fog_directory, file.key, @uploader.fog_directory, new_path, copy_to_options)
end
CarrierWave::Storage::Fog::File.new(@uploader, @base, new_path)
end
......
......@@ -6,38 +6,87 @@ RSpec.describe 'CarrierWave::Storage::Fog::File' do
let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) }
let(:uploader) { uploader_class.new }
let(:storage) { CarrierWave::Storage::Fog.new(uploader) }
let(:azure_options) do
{
azure_storage_account_name: 'AZURE_ACCOUNT_NAME',
azure_storage_access_key: 'AZURE_ACCESS_KEY',
provider: 'AzureRM'
}
end
let(:bucket_name) { 'some-bucket' }
let(:connection) { ::Fog::Storage.new(connection_options) }
let(:bucket) { connection.directories.new(key: bucket_name )}
let(:test_filename) { 'test' }
let(:test_data) { File.read(Rails.root.join('spec/support/gitlab_stubs/gitlab_ci.yml')) }
subject { CarrierWave::Storage::Fog::File.new(uploader, storage, 'test') }
subject { CarrierWave::Storage::Fog::File.new(uploader, storage, test_filename) }
before do
require 'fog/azurerm'
allow(uploader).to receive(:fog_credentials).and_return(azure_options)
Fog.mock!
require 'fog/aws'
stub_object_storage(connection_params: connection_options, remote_directory: bucket_name)
allow(uploader).to receive(:fog_directory).and_return(bucket_name)
allow(uploader).to receive(:fog_credentials).and_return(connection_options)
bucket.files.create(key: test_filename, body: test_data) # rubocop:disable Rails/SaveBang
end
describe '#authenticated_url' do
context 'with Azure' do
context 'AWS' do
let(:connection_options) do
{
provider: 'AWS',
aws_access_key_id: 'AWS_ACCESS_KEY',
aws_secret_access_key: 'AWS_SECRET_KEY'
}
end
describe '#copy_to' do
let(:dest_filename) { 'copied.txt'}
it 'copies the file' do
result = subject.copy_to(dest_filename)
expect(result.exists?).to be true
expect(result.read).to eq(test_data)
# Sanity check that the file actually is there
copied = bucket.files.get(dest_filename)
expect(copied).to be_present
expect(copied.body).to eq(test_data)
end
end
end
context 'Azure' do
let(:connection_options) do
{
provider: 'AzureRM',
azure_storage_account_name: 'AZURE_ACCOUNT_NAME',
azure_storage_access_key: 'AZURE_ACCESS_KEY'
}
end
describe '#copy_to' do
let(:dest_filename) { 'copied.txt'}
it 'copies the file' do
result = subject.copy_to(dest_filename)
# Fog Azure provider doesn't mock the actual copied data
expect(result.exists?).to be true
end
end
describe '#authenticated_url' do
it 'has an authenticated URL' do
expect(subject.authenticated_url).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token")
end
end
context 'with custom expire_at' do
it 'properly sets expires param' do
expire_at = 24.hours.from_now
context 'with custom expire_at' do
it 'properly sets expires param' do
expire_at = 24.hours.from_now
expect_next_instance_of(Fog::Storage::AzureRM::File) do |file|
expect(file).to receive(:url).with(expire_at).and_call_original
end
expect_next_instance_of(Fog::Storage::AzureRM::File) do |file|
expect(file).to receive(:url).with(expire_at).and_call_original
end
expect(subject.authenticated_url(expire_at: expire_at)).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token")
expect(subject.authenticated_url(expire_at: expire_at)).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token")
end
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment