Commit bc6828b4 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch '26910-kamil-import-export-constant-memory' into 'master'

Make constant-memory export serializer

See merge request gitlab-org/gitlab!17080
parents 47fe9772 fdce217f
......@@ -26,6 +26,51 @@ module Gitlab
class FastHashSerializer
attr_reader :subject, :tree
# Usage of this class results in delayed
# serialization of relation. The serialization
# will be triggered when the `JSON.generate`
# is exected.
#
# This class uses memory-optimised, lazily
# initialised, fast to recycle relation
# serialization.
#
# The `JSON.generate` does use `#to_json`,
# that returns raw JSON content that is written
# directly to file.
class JSONBatchRelation
include Gitlab::Utils::StrongMemoize
def initialize(relation, options, preloads)
@relation = relation
@options = options
@preloads = preloads
end
def raw_json
strong_memoize(:raw_json) do
result = +''
batch = @relation
batch = batch.preload(@preloads) if @preloads
batch.each do |item|
result.concat(",") unless result.empty?
result.concat(item.to_json(@options))
end
result
end
end
def to_json(options = {})
raw_json
end
def as_json(*)
raise NotImplementedError
end
end
BATCH_SIZE = 100
def initialize(subject, tree, batch_size: BATCH_SIZE)
......@@ -34,8 +79,11 @@ module Gitlab
@tree = tree
end
# Serializes the subject into a Hash for the given option tree
# (e.g. Project#as_json)
# With the usage of `JSONBatchRelation`, it returns partially
# serialized hash which is not easily accessible.
# It means you can only manipulate and replace top-level objects.
# All future mutations of the hash (such as `fix_project_tree`)
# should be aware of that.
def execute
simple_serialize.merge(serialize_includes)
end
......@@ -85,13 +133,20 @@ module Gitlab
return record.as_json(options)
end
# has-many relation
data = []
record.in_batches(of: @batch_size) do |batch| # rubocop:disable Cop/InBatches
# rubocop:disable Cop/InBatches
# If we put `rubocop:disable` inline after `do |batch|`,
# `Cop/LineBreakAroundConditionalBlock` will fail
record.in_batches(of: @batch_size) do |batch|
if Feature.enabled?(:export_fast_serialize_with_raw_json, default_enabled: true)
data.append(JSONBatchRelation.new(batch, options, preloads[key]).tap(&:raw_json))
else
batch = batch.preload(preloads[key]) if preloads&.key?(key)
data += batch.as_json(options)
end
end
# rubocop:enable Cop/InBatches
data
end
......
......@@ -20,7 +20,8 @@ module Gitlab
project_tree = serialize_project_tree
fix_project_tree(project_tree)
File.write(full_path, project_tree.to_json)
project_tree_json = JSON.generate(project_tree)
File.write(full_path, project_tree_json)
true
rescue => e
......@@ -30,6 +31,8 @@ module Gitlab
private
# Aware that the resulting hash needs to be pure-hash and
# does not include any AR objects anymore, only objects that run `.to_json`
def fix_project_tree(project_tree)
if @params[:description].present?
project_tree['description'] = @params[:description]
......
require 'spec_helper'
describe Gitlab::ImportExport::FastHashSerializer do
subject { described_class.new(project, tree).execute }
# FastHashSerializer#execute generates the hash which is not easily accessible
# and includes `JSONBatchRelation` items which are serialized at this point.
# Wrapping the result into JSON generating/parsing is for making
# the testing more convenient. Doing this, we can check that
# all items are properly serialized while traversing the simple hash.
subject { JSON.parse(JSON.generate(described_class.new(project, tree).execute)) }
let!(:project) { setup_project }
let(:user) { create(:user) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment