Commit 7c4794bc authored by Stan Hu's avatar Stan Hu

Add Azure Blob Storage support

This uses our custom Azure gem
(https://gitlab.com/gitlab-org/gitlab-fog-azure-rm) to integrate direct
upload access with GitLab.

Because the Azure Put Blob API does not work with chunked encoding,
uploads cannot be streamed directly via a pre-signed URL without saving
to disk first. To make this work without that, we need to add an Azure
client directly in Workhorse that uses the Put Block and Put Block List
API.

The Workhorse client is implemented in
https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/555. We
use the Go Cloud Development Kit to generate a URL
(e.g. `azblob://container`) that can be extended for other object
storage providers.

Part of https://gitlab.com/gitlab-org/gitlab/-/issues/25877
parent 0d661320
......@@ -119,6 +119,7 @@ gem 'fog-local', '~> 0.6'
gem 'fog-openstack', '~> 1.0'
gem 'fog-rackspace', '~> 0.1.1'
gem 'fog-aliyun', '~> 0.3'
gem 'gitlab-fog-azure-rm', '~> 0.7', require: false
# for Google storage
gem 'google-api-client', '~> 0.33'
......
......@@ -112,6 +112,15 @@ GEM
aws-sigv4 (~> 1.1)
aws-sigv4 (1.2.1)
aws-eventstream (~> 1, >= 1.0.2)
azure-core (0.1.15)
faraday (~> 0.9)
faraday_middleware (~> 0.10)
nokogiri (~> 1.6)
azure-storage (0.15.0.preview)
azure-core (~> 0.1)
faraday (~> 0.9)
faraday_middleware (~> 0.10)
nokogiri (~> 1.6, >= 1.6.8)
babosa (1.0.2)
base32 (0.3.2)
batch-loader (1.4.0)
......@@ -313,6 +322,9 @@ GEM
railties (>= 4.2.0)
faraday (0.17.3)
multipart-post (>= 1.2, < 3)
faraday-cookie_jar (0.0.6)
faraday (>= 0.7.4)
http-cookie (~> 1.0.0)
faraday-http-cache (2.0.0)
faraday (~> 0.8)
faraday_middleware (0.14.0)
......@@ -407,6 +419,12 @@ GEM
github-markup (1.7.0)
gitlab-chronic (0.10.5)
numerizer (~> 0.2)
gitlab-fog-azure-rm (0.7.0)
azure-storage (~> 0.15.0.preview)
fog-core (= 2.1.0)
fog-json (~> 1.2.0)
mime-types
ms_rest_azure (~> 0.12.0)
gitlab-labkit (0.12.1)
actionpack (>= 5.0.0, < 6.1.0)
activesupport (>= 5.0.0, < 6.1.0)
......@@ -668,6 +686,15 @@ GEM
mini_mime (1.0.2)
mini_portile2 (2.4.0)
minitest (5.11.3)
ms_rest (0.7.6)
concurrent-ruby (~> 1.0)
faraday (>= 0.9, < 2.0.0)
timeliness (~> 0.3.10)
ms_rest_azure (0.12.0)
concurrent-ruby (~> 1.0)
faraday (>= 0.9, < 2.0.0)
faraday-cookie_jar (~> 0.0.6)
ms_rest (~> 0.7.6)
msgpack (1.3.1)
multi_json (1.14.1)
multi_xml (0.6.0)
......@@ -1104,6 +1131,7 @@ GEM
thrift (0.11.0.0)
tilt (2.0.10)
timecop (0.9.1)
timeliness (0.3.10)
timfel-krb5-auth (0.8.3)
toml (0.2.0)
parslet (~> 1.8.0)
......@@ -1275,6 +1303,7 @@ DEPENDENCIES
gitaly (~> 13.3.0.pre.rc1)
github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5)
gitlab-fog-azure-rm (~> 0.7)
gitlab-labkit (= 0.12.1)
gitlab-license (~> 1.0)
gitlab-mail_room (~> 0.0.6)
......
---
title: Add Azure Blob Storage support
merge_request: 38882
author:
type: added
......@@ -4,6 +4,12 @@ require "carrierwave/storage/fog"
# This pulls in https://github.com/carrierwaveuploader/carrierwave/pull/2504 to support
# sending AWS S3 encryption headers when copying objects.
#
# This patch also incorporates
# https://github.com/carrierwaveuploader/carrierwave/pull/2375 to
# provide Azure support. This is already in CarrierWave v2.1.x, but
# upgrading this gem is a significant task:
# https://gitlab.com/gitlab-org/gitlab/-/issues/216067
module CarrierWave
module Storage
class Fog < Abstract
......@@ -16,6 +22,31 @@ module CarrierWave
def copy_to_options
acl_header.merge(@uploader.fog_attributes)
end
def authenticated_url(options = {})
if %w[AWS Google Rackspace OpenStack AzureRM].include?(@uploader.fog_credentials[:provider])
# avoid a get by using local references
local_directory = connection.directories.new(key: @uploader.fog_directory)
local_file = local_directory.files.new(key: path)
expire_at = ::Fog::Time.now + @uploader.fog_authenticated_url_expiration
case @uploader.fog_credentials[:provider]
when 'AWS', 'Google'
# Older versions of fog-google do not support options as a parameter
if url_options_supported?(local_file)
local_file.url(expire_at, options)
else
warn "Options hash not supported in #{local_file.class}. You may need to upgrade your Fog provider."
local_file.url(expire_at)
end
when 'Rackspace'
connection.get_object_https_url(@uploader.fog_directory, path, expire_at, options)
when 'OpenStack'
connection.get_object_https_url(@uploader.fog_directory, path, expire_at)
else
local_file.url(expire_at)
end
end
end
end
end
end
......
class DirectUploadsValidator
SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS).freeze
SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS AzureRM).freeze
ValidationError = Class.new(StandardError)
......@@ -13,22 +13,32 @@ class DirectUploadsValidator
raise ValidationError, "No provider configured for '#{uploader_type}'. #{supported_provider_text}" if provider.blank?
return if SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider)
return if provider_loaded?(provider)
raise ValidationError, "Object storage provider '#{provider}' is not supported " \
"when 'direct_upload' is used for '#{uploader_type}'. #{supported_provider_text}"
end
private
def provider_loaded?(provider)
return false unless SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(provider)
require 'fog/azurerm' if provider == 'AzureRM'
true
end
def supported_provider_text
"Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.join(', ')} are supported."
"Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.to_sentence} are supported."
end
end
DirectUploadsValidator.new.tap do |validator|
CONFIGS = {
artifacts: Gitlab.config.artifacts,
uploads: Gitlab.config.uploads,
lfs: Gitlab.config.lfs
lfs: Gitlab.config.lfs,
uploads: Gitlab.config.uploads
}.freeze
CONFIGS.each do |uploader_type, uploader|
......
......@@ -58,6 +58,10 @@ module ObjectStorage
provider == 'Google'
end
def azure?
provider == 'AzureRM'
end
def fog_attributes
@fog_attributes ||= begin
return {} unless enabled? && aws?
......
......@@ -62,8 +62,16 @@ module ObjectStorage
end
def workhorse_client_hash
return {} unless config.aws?
if config.aws?
workhorse_aws_hash
elsif config.azure?
workhorse_azure_hash
else
{}
end
end
def workhorse_aws_hash
{
UseWorkhorseClient: use_workhorse_s3_client?,
RemoteTempObjectID: object_name,
......@@ -82,6 +90,21 @@ module ObjectStorage
}
end
def workhorse_azure_hash
{
# Azure requires Workhorse client because direct uploads can't
# use pre-signed URLs without buffering the whole file to disk.
UseWorkhorseClient: true,
RemoteTempObjectID: object_name,
ObjectStorage: {
Provider: 'AzureRM',
GoCloudConfig: {
URL: "azblob://#{bucket_name}"
}
}
}
end
def use_workhorse_s3_client?
return false unless Feature.enabled?(:use_workhorse_s3_client, default_enabled: true)
return false unless config.use_iam_profile? || config.consolidated_settings?
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe 'CarrierWave::Storage::Fog::File' do
let(:uploader_class) { Class.new(CarrierWave::Uploader::Base) }
let(:uploader) { uploader_class.new }
let(:storage) { CarrierWave::Storage::Fog.new(uploader) }
let(:azure_options) do
{
azure_storage_account_name: 'AZURE_ACCOUNT_NAME',
azure_storage_access_key: 'AZURE_ACCESS_KEY',
provider: 'AzureRM'
}
end
subject { CarrierWave::Storage::Fog::File.new(uploader, storage, 'test') }
before do
require 'fog/azurerm'
allow(uploader).to receive(:fog_credentials).and_return(azure_options)
Fog.mock!
end
describe '#authenticated_url' do
context 'with Azure' do
it 'has an authenticated URL' do
expect(subject.authenticated_url).to eq("https://sa.blob.core.windows.net/test_container/test_blob?token")
end
end
end
end
......@@ -8,7 +8,7 @@ RSpec.describe 'Direct upload support' do
end
where(:config_name) do
%w(lfs artifacts uploads)
%w(artifacts lfs uploads)
end
with_them do
......@@ -52,11 +52,19 @@ RSpec.describe 'Direct upload support' do
end
end
context 'when provider is AzureRM' do
let(:provider) { 'AzureRM' }
it 'succeeds' do
expect { subject }.not_to raise_error
end
end
context 'when connection is empty' do
let(:connection) { nil }
it 'raises an error' do
expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS are supported."
expect { subject }.to raise_error "No provider configured for '#{config_name}'. Only Google, AWS, and AzureRM are supported."
end
end
......
......@@ -105,7 +105,7 @@ RSpec.describe ObjectStorage::DirectUpload do
end
end
describe '#to_hash' do
describe '#to_hash', :aggregate_failures do
subject { direct_upload.to_hash }
shared_examples 'a valid S3 upload' do
......@@ -200,6 +200,21 @@ RSpec.describe ObjectStorage::DirectUpload do
end
end
shared_examples 'a valid AzureRM upload' do
before do
require 'fog/azurerm'
end
it_behaves_like 'a valid upload'
it 'enables the Workhorse client' do
expect(subject[:UseWorkhorseClient]).to be true
expect(subject[:RemoteTempObjectID]).to eq(object_name)
expect(subject[:ObjectStorage][:Provider]).to eq('AzureRM')
expect(subject[:ObjectStorage][:GoCloudConfig]).to eq({ URL: "azblob://#{bucket_name}" })
end
end
shared_examples 'a valid upload' do
it "returns valid structure" do
expect(subject).to have_key(:Timeout)
......@@ -370,5 +385,31 @@ RSpec.describe ObjectStorage::DirectUpload do
it_behaves_like 'a valid upload without multipart data'
end
end
context 'when AzureRM is used' do
let(:credentials) do
{
provider: 'AzureRM',
azure_storage_account_name: 'azuretest',
azure_storage_access_key: 'ABCD1234'
}
end
let(:storage_url) { 'https://azuretest.blob.core.windows.net' }
context 'when length is known' do
let(:has_length) { true }
it_behaves_like 'a valid AzureRM upload'
it_behaves_like 'a valid upload without multipart data'
end
context 'when length is unknown' do
let(:has_length) { false }
it_behaves_like 'a valid AzureRM upload'
it_behaves_like 'a valid upload without multipart data'
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment