Commit 990849bc authored by Stan Hu's avatar Stan Hu

Add configuration to support an S3 client inside Workhorse

This commit adds the configuration needed to support the Preparer
objects to extract the S3 bucket, endpoint, and other parameters needed
from Rails for Workhorse to upload a file. It does not yet use this
configuration.

This is in preparation for adding an S3 client in
https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/466.
parent 7efb5f0a
---
title: Add configuration to support an S3 client inside Workhorse
merge_request: 516
author:
type: other
......@@ -14,6 +14,7 @@ import (
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/gitaly"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/secret"
......@@ -74,6 +75,11 @@ type MultipartUploadParams struct {
AbortURL string
}
type ObjectStorageParams struct {
Provider string
S3Config config.S3Config
}
type RemoteObject struct {
// GetURL is an S3 GetObject URL
GetURL string
......@@ -85,12 +91,18 @@ type RemoteObject struct {
CustomPutHeaders bool
// PutHeaders are HTTP headers (e.g. Content-Type) to be sent with StoreURL
PutHeaders map[string]string
// Whether to ignore Rails pre-signed URLs and have Workhorse directly access object storage provider
UseWorkhorseClient bool
// Remote, temporary object name where Rails will move to the final destination
RemoteTempObjectID string
// ID is a unique identifier of object storage upload
ID string
// Timeout is a number that represents timeout in seconds for sending data to StoreURL
Timeout int
// MultipartUpload contains presigned URLs for S3 MultipartUpload
MultipartUpload *MultipartUploadParams
// Object storage config for Workhorse client
ObjectStorage *ObjectStorageParams
}
type Response struct {
......
......@@ -27,6 +27,25 @@ func (d *TomlDuration) UnmarshalTest(text []byte) error {
return err
}
type ObjectStorageCredentials struct {
Provider string
S3Credentials S3Credentials `toml:"s3"`
}
type S3Credentials struct {
AwsAccessKeyID string `toml:"aws_access_key_id"`
AwsSecretAccessKey string `toml:"aws_secret_access_key"`
}
type S3Config struct {
Region string `toml:"-"`
Bucket string `toml:"-"`
PathStyle bool `toml:"-"`
Endpoint string `toml:"-"`
UseIamProfile bool `toml:"-"`
}
type RedisConfig struct {
URL TomlURL
Sentinel []TomlURL
......@@ -54,6 +73,7 @@ type Config struct {
APIQueueLimit uint `toml:"-"`
APIQueueTimeout time.Duration `toml:"-"`
APICILongPollingDuration time.Duration `toml:"-"`
ObjectStorageCredentials *ObjectStorageCredentials `toml:"object_storage"`
}
// LoadConfig from a file
......
package filestore
import (
"strings"
"time"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
)
// DefaultObjectStoreTimeout is the timeout for ObjectStore upload operation
const DefaultObjectStoreTimeout = 4 * time.Hour
type ObjectStorageConfig struct {
Provider string
S3Credentials config.S3Credentials
S3Config config.S3Config
}
// SaveFileOpts represents all the options available for saving a file to object store
type SaveFileOpts struct {
// TempFilePrefix is the prefix used to create temporary local file
......@@ -25,7 +34,12 @@ type SaveFileOpts struct {
PresignedDelete string
// HTTP headers to be sent along with PUT request
PutHeaders map[string]string
// Whether to ignore Rails pre-signed URLs and have Workhorse directly access object storage provider
UseWorkhorseClient bool
// If UseWorkhorseClient is true, this is the temporary object name to store the file
RemoteTempObjectID string
// Workhorse object storage client (e.g. S3) parameters
ObjectStorageConfig ObjectStorageConfig
// Deadline it the S3 operation deadline, the upload will be aborted if not completed in time
Deadline time.Time
......@@ -40,6 +54,11 @@ type SaveFileOpts struct {
PresignedAbortMultipart string
}
// UseWorkhorseClientEnabled checks if the options require direct access to object storage
func (s *SaveFileOpts) UseWorkhorseClientEnabled() bool {
return s.UseWorkhorseClient && s.ObjectStorageConfig.IsValid() && s.RemoteTempObjectID != ""
}
// IsLocal checks if the options require the writing of the file on disk
func (s *SaveFileOpts) IsLocal() bool {
return s.LocalTempPath != ""
......@@ -69,9 +88,17 @@ func GetOpts(apiResponse *api.Response) *SaveFileOpts {
PresignedPut: apiResponse.RemoteObject.StoreURL,
PresignedDelete: apiResponse.RemoteObject.DeleteURL,
PutHeaders: apiResponse.RemoteObject.PutHeaders,
UseWorkhorseClient: apiResponse.RemoteObject.UseWorkhorseClient,
RemoteTempObjectID: apiResponse.RemoteObject.RemoteTempObjectID,
Deadline: time.Now().Add(timeout),
}
objectStorageParams := apiResponse.RemoteObject.ObjectStorage
if opts.UseWorkhorseClient && objectStorageParams != nil {
opts.ObjectStorageConfig.Provider = objectStorageParams.Provider
opts.ObjectStorageConfig.S3Config = objectStorageParams.S3Config
}
// Backwards compatibility to ensure API servers that do not include the
// CustomPutHeaders flag will default to the original content type.
if !apiResponse.RemoteObject.CustomPutHeaders {
......@@ -88,3 +115,24 @@ func GetOpts(apiResponse *api.Response) *SaveFileOpts {
return &opts
}
func (c *ObjectStorageConfig) IsAWS() bool {
return strings.EqualFold(c.Provider, "AWS") || strings.EqualFold(c.Provider, "S3")
}
func (c *ObjectStorageConfig) IsValid() bool {
return c.S3Config.Bucket != "" && c.S3Config.Region != "" && c.credentialsValid()
}
func (c *ObjectStorageConfig) credentialsValid() bool {
// We need to be able to distinguish between two cases of AWS access:
// 1. AWS access via key and secret, but credentials not configured in Workhorse
// 2. IAM instance profiles used
if c.S3Config.UseIamProfile {
return true
} else if c.S3Credentials.AwsAccessKeyID != "" && c.S3Credentials.AwsSecretAccessKey != "" {
return true
}
return false
}
......@@ -5,8 +5,10 @@ import (
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore"
)
......@@ -166,3 +168,107 @@ func TestGetOptsDefaultTimeout(t *testing.T) {
assert.WithinDuration(deadline, opts.Deadline, time.Minute)
}
func TestUseWorkhorseClientEnabled(t *testing.T) {
cfg := filestore.ObjectStorageConfig{
Provider: "AWS",
S3Config: config.S3Config{
Bucket: "test-bucket",
Region: "test-region",
},
S3Credentials: config.S3Credentials{
AwsAccessKeyID: "test-key",
AwsSecretAccessKey: "test-secret",
},
}
missingCfg := cfg
missingCfg.S3Credentials = config.S3Credentials{}
iamConfig := missingCfg
iamConfig.S3Config.UseIamProfile = true
tests := []struct {
name string
UseWorkhorseClient bool
remoteTempObjectID string
objectStorageConfig filestore.ObjectStorageConfig
expected bool
}{
{
name: "all direct access settings used",
UseWorkhorseClient: true,
remoteTempObjectID: "test-object",
objectStorageConfig: cfg,
expected: true,
},
{
name: "missing AWS credentials",
UseWorkhorseClient: true,
remoteTempObjectID: "test-object",
objectStorageConfig: missingCfg,
expected: false,
},
{
name: "direct access disabled",
UseWorkhorseClient: false,
remoteTempObjectID: "test-object",
objectStorageConfig: cfg,
expected: false,
},
{
name: "with IAM instance profile",
UseWorkhorseClient: true,
remoteTempObjectID: "test-object",
objectStorageConfig: iamConfig,
expected: true,
},
{
name: "missing remote temp object ID",
UseWorkhorseClient: true,
remoteTempObjectID: "",
objectStorageConfig: cfg,
expected: false,
},
{
name: "missing S3 config",
UseWorkhorseClient: true,
remoteTempObjectID: "test-object",
expected: false,
},
{
name: "missing S3 bucket",
UseWorkhorseClient: true,
remoteTempObjectID: "test-object",
objectStorageConfig: filestore.ObjectStorageConfig{
Provider: "AWS",
S3Config: config.S3Config{},
},
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
apiResponse := &api.Response{
TempPath: "/tmp",
RemoteObject: api.RemoteObject{
Timeout: 10,
ID: "id",
UseWorkhorseClient: test.UseWorkhorseClient,
RemoteTempObjectID: test.remoteTempObjectID,
},
}
deadline := time.Now().Add(time.Duration(apiResponse.RemoteObject.Timeout) * time.Second)
opts := filestore.GetOpts(apiResponse)
opts.ObjectStorageConfig = test.objectStorageConfig
require.Equal(t, apiResponse.TempPath, opts.LocalTempPath)
require.WithinDuration(t, deadline, opts.Deadline, time.Second)
require.Equal(t, apiResponse.RemoteObject.ID, opts.RemoteID)
require.Equal(t, apiResponse.RemoteObject.UseWorkhorseClient, opts.UseWorkhorseClient)
require.Equal(t, test.expected, opts.UseWorkhorseClientEnabled())
})
}
}
......@@ -31,15 +31,24 @@ func (l *object) Verify(fh *filestore.FileHandler) error {
return nil
}
type uploadPreparer struct{}
type uploadPreparer struct {
credentials config.ObjectStorageCredentials
}
func NewLfsUploadPreparer(c config.Config) upload.Preparer {
return &uploadPreparer{}
creds := c.ObjectStorageCredentials
if creds == nil {
creds = &config.ObjectStorageCredentials{}
}
return &uploadPreparer{credentials: *creds}
}
func (l *uploadPreparer) Prepare(a *api.Response) (*filestore.SaveFileOpts, upload.Verifier, error) {
opts := filestore.GetOpts(a)
opts.TempFilePrefix = a.LfsOid
opts.ObjectStorageConfig.S3Credentials = l.credentials.S3Credentials
return opts, &object{oid: a.LfsOid, size: a.LfsSize}, nil
}
......
package upload
import (
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore"
)
type ObjectStoragePreparer struct {
credentials config.ObjectStorageCredentials
}
func NewObjectStoragePreparer(c config.Config) Preparer {
creds := c.ObjectStorageCredentials
if creds == nil {
creds = &config.ObjectStorageCredentials{}
}
return &ObjectStoragePreparer{credentials: *creds}
}
func (p *ObjectStoragePreparer) Prepare(a *api.Response) (*filestore.SaveFileOpts, Verifier, error) {
opts := filestore.GetOpts(a)
opts.ObjectStorageConfig.S3Credentials = p.credentials.S3Credentials
return opts, nil, nil
}
package upload_test
import (
"testing"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/config"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/upload"
"github.com/stretchr/testify/require"
)
func TestPrepareWithS3Config(t *testing.T) {
creds := config.S3Credentials{
AwsAccessKeyID: "test-key",
AwsSecretAccessKey: "test-secret",
}
c := config.Config{
ObjectStorageCredentials: &config.ObjectStorageCredentials{
Provider: "AWS",
S3Credentials: creds,
},
}
r := &api.Response{
RemoteObject: api.RemoteObject{
UseWorkhorseClient: true,
ObjectStorage: &api.ObjectStorageParams{
Provider: "AWS",
},
},
}
p := upload.NewObjectStoragePreparer(c)
opts, v, err := p.Prepare(r)
require.NoError(t, err)
require.True(t, opts.ObjectStorageConfig.IsAWS())
require.True(t, opts.UseWorkhorseClient)
require.Equal(t, creds, opts.ObjectStorageConfig.S3Credentials)
require.Equal(t, nil, v)
}
func TestPrepareWithNoConfig(t *testing.T) {
c := config.Config{}
r := &api.Response{}
p := upload.NewObjectStoragePreparer(c)
opts, v, err := p.Prepare(r)
require.NoError(t, err)
require.False(t, opts.UseWorkhorseClient)
require.Equal(t, nil, v)
}
......@@ -279,7 +279,7 @@ func (u *upstream) configureRoutes() {
}
func createUploadPreparers(cfg config.Config) uploadPreparers {
defaultPreparer := &upload.DefaultPreparer{}
defaultPreparer := upload.NewObjectStoragePreparer(cfg)
return uploadPreparers{
artifacts: defaultPreparer,
......
......@@ -164,6 +164,7 @@ func main() {
}
cfg.Redis = cfgFromFile.Redis
cfg.ObjectStorageCredentials = cfgFromFile.ObjectStorageCredentials
if cfg.Redis != nil {
redis.Configure(cfg.Redis, redis.DefaultDialFunc)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment