Commit d18ee3fa authored by Luke Duncalfe's avatar Luke Duncalfe Committed by James Lopez

LFS export records repository_type data

A project can have the same `LfsObject` linked with up to three
`LfsObjectsProject` records. Each of these records would be for a
different repository, recorded in the `repository_type` property. The
different repositories at time of writing are "project", "wiki", and
"design". See https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/13894

This change exports the list of `repository_type`s as a JSON mapping of
oid => repository_types, which are imported to recreate the correct
`LfsObjectsProject` records.

https://gitlab.com/gitlab-org/gitlab-ee/issues/11090
parent 0a2bbc92
---
title: Add support for exporting repository type data for LFS objects
merge_request: 30830
author:
type: changed
......@@ -283,9 +283,6 @@ Introduced in GitLab 11.3. This file lives in `/var/log/gitlab/gitlab-rails/impo
Omnibus GitLab packages or in `/home/git/gitlab/log/importer.log` for
installations from source.
Currently it logs the progress of project imports from the Bitbucket Server
importer. Future importers may use this file.
## `auth.log`
Introduced in GitLab 12.0. This file lives in `/var/log/gitlab/gitlab-rails/auth.log` for
......
......@@ -19,6 +19,7 @@ Project.find_by_full_path('group/project').import_state.slice(:jid, :status, :la
grep JID /var/log/gitlab/sidekiq/current
grep "Import/Export error" /var/log/gitlab/sidekiq/current
grep "Import/Export backtrace" /var/log/gitlab/sidekiq/current
tail /var/log/gitlab/gitlab-rails/importer.log
```
## Troubleshooting performance issues
......
......@@ -4,7 +4,9 @@ module Gitlab
module ImportExport
extend self
# For every version update, the version history in import_export.md has to be kept up to date.
# For every version update the version history in these docs must be kept up to date:
# - development/import_export.md
# - user/project/settings/import_export.md
VERSION = '0.2.4'.freeze
FILENAME_LIMIT = 50
......@@ -28,6 +30,14 @@ module Gitlab
"project.bundle"
end
def lfs_objects_filename
"lfs-objects.json"
end
def lfs_objects_storage
"lfs-objects"
end
def config_file
Rails.root.join('lib/gitlab/import_export/import_export.yml')
end
......
......@@ -3,6 +3,10 @@
module Gitlab
module ImportExport
class LfsRestorer
include Gitlab::Utils::StrongMemoize
attr_accessor :project, :shared
def initialize(project:, shared:)
@project = project
@shared = shared
......@@ -17,7 +21,7 @@ module Gitlab
true
rescue => e
@shared.error(e)
shared.error(e)
false
end
......@@ -29,16 +33,57 @@ module Gitlab
lfs_object = LfsObject.find_or_initialize_by(oid: oid, size: size)
lfs_object.file = File.open(path) unless lfs_object.file&.exists?
lfs_object.save! if lfs_object.changed?
@project.all_lfs_objects << lfs_object
repository_types(oid).each do |repository_type|
LfsObjectsProject.create!(
project: project,
lfs_object: lfs_object,
repository_type: repository_type
)
end
end
def repository_types(oid)
# We allow support for imports created before the `lfs-objects.json`
# file was generated. In this case, the restorer will link an LFS object
# with a single `lfs_objects_projects` relation.
#
# This allows us backwards-compatibility without version bumping.
# See https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/30830#note_192608870
return ['project'] unless has_lfs_json?
lfs_json[oid]
end
def lfs_file_paths
@lfs_file_paths ||= Dir.glob("#{lfs_storage_path}/*")
end
def has_lfs_json?
strong_memoize(:has_lfs_json) do
File.exist?(lfs_json_path)
end
end
def lfs_json
return {} unless has_lfs_json?
@lfs_json ||=
begin
json = IO.read(lfs_json_path)
ActiveSupport::JSON.decode(json)
rescue
raise Gitlab::ImportExport::Error.new('Incorrect JSON format')
end
end
def lfs_storage_path
File.join(@shared.export_path, 'lfs-objects')
File.join(shared.export_path, ImportExport.lfs_objects_storage)
end
def lfs_json_path
File.join(shared.export_path, ImportExport.lfs_objects_filename)
end
end
end
......
......@@ -5,25 +5,40 @@ module Gitlab
class LfsSaver
include Gitlab::ImportExport::CommandLineUtil
attr_accessor :lfs_json, :project, :shared
BATCH_SIZE = 100
def initialize(project:, shared:)
@project = project
@shared = shared
@lfs_json = {}
end
def save
@project.all_lfs_objects.each do |lfs_object|
save_lfs_object(lfs_object)
project.all_lfs_objects.find_in_batches(batch_size: BATCH_SIZE) do |batch|
batch.each do |lfs_object|
save_lfs_object(lfs_object)
end
append_lfs_json_for_batch(batch) if write_lfs_json_enabled?
end
write_lfs_json if write_lfs_json_enabled?
true
rescue => e
@shared.error(e)
shared.error(e)
false
end
private
def write_lfs_json_enabled?
::Feature.enabled?(:export_lfs_objects_projects, default_enabled: true)
end
def save_lfs_object(lfs_object)
if lfs_object.local_store?
copy_file_for_lfs_object(lfs_object)
......@@ -45,12 +60,36 @@ module Gitlab
copy_files(lfs_object.file.path, destination_path_for_object(lfs_object))
end
def append_lfs_json_for_batch(lfs_objects_batch)
lfs_objects_projects = LfsObjectsProject
.select('lfs_objects.oid, array_agg(distinct lfs_objects_projects.repository_type) as repository_types')
.joins(:lfs_object)
.where(project: project, lfs_object: lfs_objects_batch)
.group('lfs_objects.oid')
lfs_objects_projects.each do |group|
oid = group.oid
lfs_json[oid] ||= []
lfs_json[oid] += group.repository_types
end
end
def write_lfs_json
mkdir_p(shared.export_path)
File.write(lfs_json_path, lfs_json.to_json)
end
def destination_path_for_object(lfs_object)
File.join(lfs_export_path, lfs_object.oid)
end
def lfs_export_path
File.join(@shared.export_path, 'lfs-objects')
File.join(shared.export_path, ImportExport.lfs_objects_storage)
end
def lfs_json_path
File.join(shared.export_path, ImportExport.lfs_objects_filename)
end
end
end
......
......@@ -14,6 +14,7 @@ FactoryBot.define do
# objects, so the test needs to decide which (if any) object gets it
trait :correct_oid do
oid 'b804383982bb89b00e828e3f44c038cc991d3d1768009fc39ba8e2c081b9fb75'
size 1062
end
trait :object_storage do
......
......@@ -6,6 +6,7 @@ describe Gitlab::ImportExport::LfsRestorer do
let(:export_path) { "#{Dir.tmpdir}/lfs_object_restorer_spec" }
let(:project) { create(:project) }
let(:shared) { project.import_export_shared }
let(:saver) { Gitlab::ImportExport::LfsSaver.new(project: project, shared: shared) }
subject(:restorer) { described_class.new(project: project, shared: shared) }
before do
......@@ -19,49 +20,98 @@ describe Gitlab::ImportExport::LfsRestorer do
describe '#restore' do
context 'when the archive contains lfs files' do
let(:dummy_lfs_file_path) { File.join(shared.export_path, 'lfs-objects', 'dummy') }
def create_lfs_object_with_content(content)
dummy_lfs_file = Tempfile.new('existing')
File.write(dummy_lfs_file.path, content)
size = dummy_lfs_file.size
oid = LfsObject.calculate_oid(dummy_lfs_file.path)
LfsObject.create!(oid: oid, size: size, file: dummy_lfs_file)
let(:lfs_object) { create(:lfs_object, :correct_oid, :with_file) }
# Use the LfsSaver to save data to be restored
def save_lfs_data
%w(project wiki).each do |repository_type|
create(
:lfs_objects_project,
project: project,
repository_type: repository_type,
lfs_object: lfs_object
)
end
saver.save
project.lfs_objects.delete_all
end
before do
FileUtils.mkdir_p(File.dirname(dummy_lfs_file_path))
File.write(dummy_lfs_file_path, 'not very large')
allow(restorer).to receive(:lfs_file_paths).and_return([dummy_lfs_file_path])
save_lfs_data
project.reload
end
it 'creates an lfs object for the project' do
expect { restorer.restore }.to change { project.reload.lfs_objects.size }.by(1)
it 'succeeds' do
expect(restorer.restore).to eq(true)
expect(shared.errors).to be_empty
end
it 'assigns the file correctly' do
it 'does not create a new `LfsObject` records, as one already exists' do
expect { restorer.restore }.not_to change { LfsObject.count }
end
it 'creates new `LfsObjectsProject` records in order to link the project to the existing `LfsObject`' do
expect { restorer.restore }.to change { LfsObjectsProject.count }.by(2)
end
it 'restores the correct `LfsObject` records' do
restorer.restore
expect(project.lfs_objects.first.file.read).to eq('not very large')
expect(project.lfs_objects).to contain_exactly(lfs_object)
end
it 'links an existing LFS object if it existed' do
lfs_object = create_lfs_object_with_content('not very large')
it 'restores the correct `LfsObjectsProject` records for the project' do
restorer.restore
expect(
project.lfs_objects_projects.pluck(:repository_type)
).to contain_exactly('project', 'wiki')
end
it 'assigns the file correctly' do
restorer.restore
expect(project.lfs_objects).to include(lfs_object)
expect(project.lfs_objects.first.file.read).to eq(lfs_object.file.read)
end
it 'succeeds' do
expect(restorer.restore).to be_truthy
expect(shared.errors).to be_empty
context 'when there is not an existing `LfsObject`' do
before do
lfs_object.destroy
end
it 'creates a new lfs object' do
expect { restorer.restore }.to change { LfsObject.count }.by(1)
end
it 'stores the upload' do
expect_any_instance_of(LfsObjectUploader).to receive(:store!)
restorer.restore
end
end
it 'stores the upload' do
expect_any_instance_of(LfsObjectUploader).to receive(:store!)
context 'when there is no lfs-objects.json file' do
before do
json_file = File.join(shared.export_path, ::Gitlab::ImportExport.lfs_objects_filename)
restorer.restore
FileUtils.rm_rf(json_file)
end
it 'restores the correct `LfsObject` records' do
restorer.restore
expect(project.lfs_objects).to contain_exactly(lfs_object)
end
it 'restores a single `LfsObjectsProject` record for the project with "project" for the `repository_type`' do
restorer.restore
expect(
project.lfs_objects_projects.pluck(:repository_type)
).to contain_exactly('project')
end
end
end
......
......@@ -19,6 +19,11 @@ describe Gitlab::ImportExport::LfsSaver do
describe '#save' do
context 'when the project has LFS objects locally stored' do
let(:lfs_object) { create(:lfs_object, :with_file) }
let(:lfs_json_file) { File.join(shared.export_path, Gitlab::ImportExport.lfs_objects_filename) }
def lfs_json
JSON.parse(IO.read(lfs_json_file))
end
before do
project.lfs_objects << lfs_object
......@@ -35,6 +40,45 @@ describe Gitlab::ImportExport::LfsSaver do
expect(File).to exist("#{shared.export_path}/lfs-objects/#{lfs_object.oid}")
end
describe 'saving a json file' do
before do
# Create two more LfsObjectProject records with different `repository_type`s
%w(wiki design).each do |repository_type|
create(
:lfs_objects_project,
project: project,
repository_type: repository_type,
lfs_object: lfs_object
)
end
FileUtils.rm_rf(lfs_json_file)
end
it 'saves a json file correctly' do
saver.save
expect(File.exist?(lfs_json_file)).to eq(true)
expect(lfs_json).to eq(
{
lfs_object.oid => [
LfsObjectsProject.repository_types['wiki'],
LfsObjectsProject.repository_types['design'],
nil
]
}
)
end
it 'does not save a json file if feature is disabled' do
stub_feature_flags(export_lfs_objects_projects: false)
saver.save
expect(File.exist?(lfs_json_file)).to eq(false)
end
end
end
context 'when the LFS objects are stored in object storage' do
......@@ -42,8 +86,11 @@ describe Gitlab::ImportExport::LfsSaver do
before do
allow(LfsObjectUploader).to receive(:object_store_enabled?).and_return(true)
allow(lfs_object.file).to receive(:url).and_return('http://my-object-storage.local')
project.lfs_objects << lfs_object
expect_next_instance_of(LfsObjectUploader) do |instance|
expect(instance).to receive(:url).and_return('http://my-object-storage.local')
end
end
it 'downloads the file to include in an archive' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment