Commit d8c31d19 authored by James Fargher's avatar James Fargher

Merge branch '210513-introduce-ndjson-reader-for-project-import' into 'master'

Introduce ndjson reader for project import

Closes #210513

See merge request gitlab-org/gitlab!27206
parents 96085859 2b53d520
......@@ -20,6 +20,7 @@ module Gitlab
def restore
@group_attributes = relation_reader.consume_attributes(nil)
@group_members = relation_reader.consume_relation(nil, 'members')
.map(&:first)
# We need to remove `name` and `path` as we did consume it in previous pass
@group_attributes.delete('name')
......
......@@ -53,6 +53,7 @@ module Gitlab
def initialize(relation_names:, allowed_path:)
@relation_names = relation_names.map(&:to_s)
@consumed_relations = Set.new
# This is legacy reader, to be used in transition
# period before `.ndjson`,
......@@ -81,17 +82,19 @@ module Gitlab
raise ArgumentError, "Invalid #{importable_name} passed to `consume_relation`. Use #{@allowed_path} instead."
end
value = relations.delete(key)
Enumerator.new do |documents|
next unless @consumed_relations.add?("#{importable_path}/#{key}")
return value unless block_given?
return if value.nil?
value = relations.delete(key)
next if value.nil?
if value.is_a?(Array)
value.each.with_index do |item, idx|
yield(item, idx)
if value.is_a?(Array)
value.each.with_index do |item, idx|
documents << [item, idx]
end
else
documents << [value, 0]
end
else
yield(value, 0)
end
end
......
# frozen_string_literal: true
module Gitlab
module ImportExport
module JSON
class NdjsonReader
MAX_JSON_DOCUMENT_SIZE = 50.megabytes
attr_reader :dir_path
def initialize(dir_path)
@dir_path = dir_path
@consumed_relations = Set.new
end
def exist?
Dir.exist?(@dir_path)
end
# This can be removed once legacy_reader is deprecated.
def legacy?
false
end
def consume_attributes(importable_path)
# This reads from `tree/project.json`
path = file_path("#{importable_path}.json")
data = File.read(path, MAX_JSON_DOCUMENT_SIZE)
json_decode(data)
end
def consume_relation(importable_path, key)
Enumerator.new do |documents|
next unless @consumed_relations.add?("#{importable_path}/#{key}")
# This reads from `tree/project/merge_requests.ndjson`
path = file_path(importable_path, "#{key}.ndjson")
next unless File.exist?(path)
File.foreach(path, MAX_JSON_DOCUMENT_SIZE).with_index do |line, line_num|
documents << [json_decode(line), line_num]
end
end
end
private
def json_decode(string)
ActiveSupport::JSON.decode(string)
rescue ActiveSupport::JSON.parse_error => e
Gitlab::ErrorTracking.log_exception(e)
raise Gitlab::ImportExport::Error, 'Incorrect JSON format'
end
def file_path(*path)
File.join(dir_path, *path)
end
end
end
end
end
......@@ -17,8 +17,13 @@ module Gitlab
end
def restore
unless relation_reader
raise Gitlab::ImportExport::Error, 'invalid import format'
end
@project_attributes = relation_reader.consume_attributes(importable_path)
@project_members = relation_reader.consume_relation(importable_path, 'project_members')
.map(&:first)
if relation_tree_restorer.restore
import_failure_service.with_retry(action: 'set_latest_merge_request_diff_ids!') do
......@@ -38,14 +43,27 @@ module Gitlab
def relation_reader
strong_memoize(:relation_reader) do
ImportExport::JSON::LegacyReader::File.new(
File.join(shared.export_path, 'project.json'),
relation_names: reader.project_relation_names,
allowed_path: importable_path
)
[ndjson_relation_reader, legacy_relation_reader]
.compact.find(&:exist?)
end
end
def ndjson_relation_reader
return unless Feature.enabled?(:project_import_ndjson, project.namespace)
ImportExport::JSON::NdjsonReader.new(
File.join(shared.export_path, 'tree')
)
end
def legacy_relation_reader
ImportExport::JSON::LegacyReader::File.new(
File.join(shared.export_path, 'project.json'),
relation_names: reader.project_relation_names,
allowed_path: importable_path
)
end
def relation_tree_restorer
@relation_tree_restorer ||= RelationTreeRestorer.new(
user: @user,
......
......@@ -67,7 +67,7 @@ module Gitlab
end
def process_relation!(relation_key, relation_definition)
@relation_reader.consume_relation(@importable_path, relation_key) do |data_hash, relation_index|
@relation_reader.consume_relation(@importable_path, relation_key).each do |data_hash, relation_index|
process_relation_item!(relation_key, relation_definition, relation_index, data_hash)
end
end
......
......@@ -186,5 +186,23 @@
}
],
"snippets": [],
"hooks": []
"hooks": [],
"custom_attributes": [
{
"id": 201,
"project_id": 5,
"created_at": "2016-06-14T15:01:51.315Z",
"updated_at": "2016-06-14T15:01:51.315Z",
"key": "color",
"value": "red"
},
{
"id": 202,
"project_id": 5,
"created_at": "2016-06-14T15:01:51.315Z",
"updated_at": "2016-06-14T15:01:51.315Z",
"key": "size",
"value": "small"
}
]
}
......@@ -15,7 +15,6 @@ RSpec.shared_examples 'import/export json legacy reader' do
subject { legacy_reader.consume_attributes("project") }
context 'no excluded attributes' do
let(:excluded_attributes) { [] }
let(:relation_names) { [] }
it 'returns the whole tree from parsed JSON' do
......@@ -42,60 +41,53 @@ RSpec.shared_examples 'import/export json legacy reader' do
describe '#consume_relation' do
context 'when valid path is passed' do
let(:key) { 'description' }
let(:key) { 'labels' }
context 'block not given' do
it 'returns value of the key' do
expect(legacy_reader).to receive(:relations).and_return({ key => 'test value' })
expect(legacy_reader.consume_relation("project", key)).to eq('test value')
end
end
subject { legacy_reader.consume_relation("project", key) }
context 'key has been consumed' do
before do
legacy_reader.consume_relation("project", key)
context 'key has not been consumed' do
it 'returns an Enumerator' do
expect(subject).to be_an_instance_of(Enumerator)
end
it 'does not yield' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.not_to yield_control
end
end
context 'value is nil' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => nil })
end
context 'value is nil' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => nil })
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
it 'does not yield' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.not_to yield_control
end
end
context 'value is an array' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => %w[label1 label2] })
end
context 'value is not array' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => 'value' })
it 'yields every relation value to the Enumerator' do
expect(subject.to_a).to eq([['label1', 0], ['label2', 1]])
end
end
it 'yield the value with index 0' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.to yield_with_args('value', 0)
context 'value is not array' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => 'non-array value' })
end
it 'yields the value with index 0 to the Enumerator' do
expect(subject.to_a).to eq([['non-array value', 0]])
end
end
end
context 'value is an array' do
context 'key has been consumed' do
before do
expect(legacy_reader).to receive(:relations).and_return({ key => %w[item1 item2 item3] })
legacy_reader.consume_relation("project", key).first
end
it 'yield each array element with index' do
expect do |blk|
legacy_reader.consume_relation("project", key, &blk)
end.to yield_successive_args(['item1', 0], ['item2', 1], ['item3', 2])
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::ImportExport::JSON::NdjsonReader do
include ImportExport::CommonUtil
let(:fixture) { 'spec/fixtures/lib/gitlab/import_export/light/tree' }
let(:root_tree) { JSON.parse(File.read(File.join(fixture, 'project.json'))) }
let(:ndjson_reader) { described_class.new(dir_path) }
let(:importable_path) { 'project' }
before :all do
extract_archive('spec/fixtures/lib/gitlab/import_export/light', 'tree.tar.gz')
end
after :all do
cleanup_artifacts_from_extract_archive('light')
end
describe '#exist?' do
subject { ndjson_reader.exist? }
context 'given valid dir_path' do
let(:dir_path) { fixture }
it { is_expected.to be true }
end
context 'given invalid dir_path' do
let(:dir_path) { 'invalid-dir-path' }
it { is_expected.to be false }
end
end
describe '#legacy?' do
let(:dir_path) { fixture }
subject { ndjson_reader.legacy? }
it { is_expected.to be false }
end
describe '#consume_attributes' do
let(:dir_path) { fixture }
subject { ndjson_reader.consume_attributes(importable_path) }
it 'returns the whole root tree from parsed JSON' do
expect(subject).to eq(root_tree)
end
end
describe '#consume_relation' do
let(:dir_path) { fixture }
subject { ndjson_reader.consume_relation(importable_path, key) }
context 'given any key' do
let(:key) { 'any-key' }
it 'returns an Enumerator' do
expect(subject).to be_an_instance_of(Enumerator)
end
end
context 'key has been consumed' do
let(:key) { 'issues' }
before do
ndjson_reader.consume_relation(importable_path, key).first
end
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'key has not been consumed' do
context 'relation file does not exist' do
let(:key) { 'non-exist-relation-file-name' }
before do
relation_file_path = File.join(dir_path, importable_path, "#{key}.ndjson")
expect(File).to receive(:exist?).with(relation_file_path).and_return(false)
end
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'relation file is empty' do
let(:key) { 'empty' }
it 'yields nothing to the Enumerator' do
expect(subject.to_a).to eq([])
end
end
context 'relation file contains multiple lines' do
let(:key) { 'custom_attributes' }
let(:attr_1) { JSON.parse('{"id":201,"project_id":5,"created_at":"2016-06-14T15:01:51.315Z","updated_at":"2016-06-14T15:01:51.315Z","key":"color","value":"red"}') }
let(:attr_2) { JSON.parse('{"id":202,"project_id":5,"created_at":"2016-06-14T15:01:51.315Z","updated_at":"2016-06-14T15:01:51.315Z","key":"size","value":"small"}') }
it 'yields every relation value to the Enumerator' do
expect(subject.to_a).to eq([[attr_1, 0], [attr_2, 1]])
end
end
end
end
end
......@@ -14,7 +14,7 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
let(:user) { create(:user) }
let(:shared) { Gitlab::ImportExport::Shared.new(importable) }
let(:attributes) { {} }
let(:attributes) { relation_reader.consume_attributes(importable_name) }
let(:members_mapper) do
Gitlab::ImportExport::MembersMapper.new(exported_members: {}, user: user, importable: importable)
......@@ -30,7 +30,7 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
relation_factory: relation_factory,
reader: reader,
importable: importable,
importable_path: nil,
importable_path: importable_path,
importable_attributes: attributes
)
end
......@@ -94,21 +94,24 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
end
context 'when restoring a project' do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/complex/project.json' }
let(:importable) { create(:project, :builds_enabled, :issues_disabled, name: 'project', path: 'project') }
let(:importable_name) { 'project' }
let(:importable_path) { 'project' }
let(:object_builder) { Gitlab::ImportExport::Project::ObjectBuilder }
let(:relation_factory) { Gitlab::ImportExport::Project::RelationFactory }
let(:reader) { Gitlab::ImportExport::Reader.new(shared: shared) }
context 'using legacy reader' do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/complex/project.json' }
let(:relation_reader) do
Gitlab::ImportExport::JSON::LegacyReader::File.new(
path,
relation_names: reader.project_relation_names
relation_names: reader.project_relation_names,
allowed_path: 'project'
)
end
let(:attributes) { relation_reader.consume_attributes(nil) }
let(:attributes) { relation_reader.consume_attributes('project') }
it_behaves_like 'import project successfully'
......@@ -118,6 +121,21 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
include_examples 'logging of relations creation'
end
context 'using ndjson reader' do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/complex/tree' }
let(:relation_reader) { Gitlab::ImportExport::JSON::NdjsonReader.new(path) }
before :all do
extract_archive('spec/fixtures/lib/gitlab/import_export/complex', 'tree.tar.gz')
end
after :all do
cleanup_artifacts_from_extract_archive('complex')
end
it_behaves_like 'import project successfully'
end
end
end
......@@ -125,9 +143,16 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
let(:path) { 'spec/fixtures/lib/gitlab/import_export/group_exports/no_children/group.json' }
let(:group) { create(:group) }
let(:importable) { create(:group, parent: group) }
let(:importable_name) { nil }
let(:importable_path) { nil }
let(:object_builder) { Gitlab::ImportExport::Group::ObjectBuilder }
let(:relation_factory) { Gitlab::ImportExport::Group::RelationFactory }
let(:relation_reader) { Gitlab::ImportExport::JSON::LegacyReader::File.new(path, relation_names: reader.group_relation_names) }
let(:relation_reader) do
Gitlab::ImportExport::JSON::LegacyReader::File.new(
path,
relation_names: reader.group_relation_names)
end
let(:reader) do
Gitlab::ImportExport::Reader.new(
shared: shared,
......@@ -135,6 +160,10 @@ describe Gitlab::ImportExport::RelationTreeRestorer do
)
end
it 'restores group tree' do
expect(subject).to eq(true)
end
include_examples 'logging of relations creation'
end
end
......@@ -15,9 +15,39 @@ module ImportExport
export_path = [prefix, 'spec', 'fixtures', 'lib', 'gitlab', 'import_export', name].compact
export_path = File.join(*export_path)
extract_archive(export_path, 'tree.tar.gz')
allow_any_instance_of(Gitlab::ImportExport).to receive(:export_path) { export_path }
end
def extract_archive(path, archive)
if File.exist?(File.join(path, archive))
system("cd #{path}; tar xzvf #{archive} &> /dev/null")
end
end
def cleanup_artifacts_from_extract_archive(name, prefix = nil)
export_path = [prefix, 'spec', 'fixtures', 'lib', 'gitlab', 'import_export', name].compact
export_path = File.join(*export_path)
if File.exist?(File.join(export_path, 'tree.tar.gz'))
system("cd #{export_path}; rm -fr tree &> /dev/null")
end
end
def setup_reader(reader)
case reader
when :legacy_reader
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:exist?).and_return(true)
allow_any_instance_of(Gitlab::ImportExport::JSON::NdjsonReader).to receive(:exist?).and_return(false)
when :ndjson_reader
allow_any_instance_of(Gitlab::ImportExport::JSON::LegacyReader::File).to receive(:exist?).and_return(false)
allow_any_instance_of(Gitlab::ImportExport::JSON::NdjsonReader).to receive(:exist?).and_return(true)
else
raise "invalid reader #{reader}. Supported readers: :legacy_reader, :ndjson_reader"
end
end
def fixtures_path
"spec/fixtures/lib/gitlab/import_export"
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment