Commit b49fae48 authored by George Koltsov's avatar George Koltsov

Add Project Uploads Migration to BulkImports

  - Add Project Uploads Migration to GitLab Migration
    feature in order to export/import all project uploads
    from source to destination GitLab instance
parent 8a735da6
......@@ -112,6 +112,10 @@ class BulkImports::Entity < ApplicationRecord
@export_relations_url_path ||= EXPORT_RELATIONS_URL % { resource: pluralized_name, full_path: encoded_source_full_path }
end
def relation_download_url_path(relation)
"#{export_relations_url_path}/download?relation=#{relation}"
end
private
def validate_parent_is_a_group
......
......@@ -7,8 +7,16 @@ module BulkImports
REMOTE_FILENAME_PATTERN = %r{filename="(?<filename>[^"]+)"}.freeze
FILENAME_SIZE_LIMIT = 255 # chars before the extension
def initialize(configuration:, relative_url:, dir:, file_size_limit:, allowed_content_types:, filename: nil)
DEFAULT_FILE_SIZE_LIMIT = 5.gigabytes
DEFAULT_ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze
def initialize(
configuration:,
relative_url:,
dir:,
file_size_limit: DEFAULT_FILE_SIZE_LIMIT,
allowed_content_types: DEFAULT_ALLOWED_CONTENT_TYPES,
filename: nil)
@configuration = configuration
@relative_url = relative_url
@filename = filename
......
......@@ -7,10 +7,6 @@ module BulkImports
include Gitlab::ImportExport::CommandLineUtil
include Gitlab::Utils::StrongMemoize
FILE_SIZE_LIMIT = 5.gigabytes
ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze
EXPORT_DOWNLOAD_URL_PATH = "/%{resource}/%{full_path}/export_relations/download?relation=%{relation}"
def initialize(relation:)
@relation = relation
@tmp_dir = Dir.mktmpdir
......@@ -39,33 +35,19 @@ module BulkImports
def download_service(tmp_dir, context)
@download_service ||= BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: relative_resource_url(context),
relative_url: context.entity.relation_download_url_path(relation),
dir: tmp_dir,
filename: filename,
file_size_limit: FILE_SIZE_LIMIT,
allowed_content_types: ALLOWED_CONTENT_TYPES
filename: filename
)
end
def decompression_service(tmp_dir)
@decompression_service ||= BulkImports::FileDecompressionService.new(
dir: tmp_dir,
filename: filename
)
@decompression_service ||= BulkImports::FileDecompressionService.new(dir: tmp_dir, filename: filename)
end
def ndjson_reader(tmp_dir)
@ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmp_dir)
end
def relative_resource_url(context)
strong_memoize(:relative_resource_url) do
resource = context.entity.pluralized_name
encoded_full_path = context.entity.encoded_source_full_path
EXPORT_DOWNLOAD_URL_PATH % { resource: resource, full_path: encoded_full_path, relation: relation }
end
end
end
end
end
......
# frozen_string_literal: true
module BulkImports
module Common
module Pipelines
class UploadsPipeline
include Pipeline
include Gitlab::ImportExport::CommandLineUtil
FILENAME = 'uploads.tar.gz'
def extract(context)
download_service(tmp_dir, context).execute
untar_zxf(archive: File.join(tmp_dir, FILENAME), dir: tmp_dir)
upload_file_paths = Dir.glob(File.join(tmp_dir, '**', '*'))
BulkImports::Pipeline::ExtractedData.new(data: upload_file_paths)
end
def load(context, file_path)
dynamic_path = FileUploader.extract_dynamic_path(file_path)
return unless dynamic_path
return if File.directory?(file_path)
named_captures = dynamic_path.named_captures.symbolize_keys
UploadService.new(context.portable, File.open(file_path, 'r'), FileUploader, **named_captures).execute
end
def after_run(_)
FileUtils.remove_entry(tmp_dir)
end
private
def download_service(tmp_dir, context)
BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: context.entity.relation_download_url_path('uploads'),
dir: tmp_dir,
filename: FILENAME
)
end
def tmp_dir
@tmp_dir ||= Dir.mktmpdir('bulk_imports')
end
end
end
end
end
......@@ -27,9 +27,13 @@ module BulkImports
pipeline: BulkImports::Common::Pipelines::BoardsPipeline,
stage: 4
},
uploads: {
pipeline: BulkImports::Common::Pipelines::UploadsPipeline,
stage: 5
},
finisher: {
pipeline: BulkImports::Common::Pipelines::EntityFinisher,
stage: 5
stage: 6
}
}
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe BulkImports::Common::Pipelines::UploadsPipeline do
let_it_be(:tmpdir) { Dir.mktmpdir }
let_it_be(:project) { create(:project) }
let_it_be(:entity) { create(:bulk_import_entity, :project_entity, project: project, source_full_path: 'test') }
let_it_be(:tracker) { create(:bulk_import_tracker, entity: entity) }
let_it_be(:context) { BulkImports::Pipeline::Context.new(tracker) }
let_it_be(:uploads_dir_path) { File.join(tmpdir, '72a497a02fe3ee09edae2ed06d390038') }
let_it_be(:upload_file_path) { File.join(uploads_dir_path, 'upload.txt')}
subject(:pipeline) { described_class.new(context) }
before do
stub_uploads_object_storage(FileUploader)
FileUtils.mkdir_p(uploads_dir_path)
FileUtils.touch(upload_file_path)
end
after do
FileUtils.remove_entry(tmpdir) if Dir.exist?(tmpdir)
end
describe '#run' do
it 'imports uploads into destination portable and removes tmpdir' do
allow(Dir).to receive(:mktmpdir).with('bulk_imports').and_return(tmpdir)
allow(pipeline).to receive(:extract).and_return(BulkImports::Pipeline::ExtractedData.new(data: [upload_file_path]))
pipeline.run
expect(project.uploads.map { |u| u.retrieve_uploader.filename }).to include('upload.txt')
expect(Dir.exist?(tmpdir)).to eq(false)
end
end
describe '#extract' do
it 'downloads & extracts upload paths' do
allow(Dir).to receive(:mktmpdir).and_return(tmpdir)
expect(pipeline).to receive(:untar_zxf)
file_download_service = instance_double("BulkImports::FileDownloadService")
expect(BulkImports::FileDownloadService)
.to receive(:new)
.with(
configuration: context.configuration,
relative_url: "/projects/test/export_relations/download?relation=uploads",
dir: tmpdir,
filename: 'uploads.tar.gz')
.and_return(file_download_service)
expect(file_download_service).to receive(:execute)
extracted_data = pipeline.extract(context)
expect(extracted_data.data).to contain_exactly(uploads_dir_path, upload_file_path)
end
end
describe '#load' do
it 'creates a file upload' do
expect { pipeline.load(context, upload_file_path) }.to change { project.uploads.count }.by(1)
end
context 'when dynamic path is nil' do
it 'returns' do
expect { pipeline.load(context, File.join(tmpdir, 'test')) }.not_to change { project.uploads.count }
end
end
context 'when path is a directory' do
it 'returns' do
expect { pipeline.load(context, uploads_dir_path) }.not_to change { project.uploads.count }
end
end
end
end
......@@ -10,7 +10,8 @@ RSpec.describe BulkImports::Projects::Stage do
[2, BulkImports::Common::Pipelines::LabelsPipeline],
[3, BulkImports::Projects::Pipelines::IssuesPipeline],
[4, BulkImports::Common::Pipelines::BoardsPipeline],
[5, BulkImports::Common::Pipelines::EntityFinisher]
[5, BulkImports::Common::Pipelines::UploadsPipeline],
[6, BulkImports::Common::Pipelines::EntityFinisher]
]
end
......
......@@ -243,4 +243,13 @@ RSpec.describe BulkImports::Entity, type: :model do
end
end
end
describe '#relation_download_url_path' do
it 'returns export relations url with download query string' do
entity = build(:bulk_import_entity)
expect(entity.relation_download_url_path('test'))
.to eq("/groups/#{entity.encoded_source_full_path}/export_relations/download?relation=test")
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment