Commit c1af169f authored by Douwe Maan's avatar Douwe Maan

Merge branch 'fix/import-export-performance' into 'master'

Improve Import/Export memory use and performance

Closes #35389 and #26556

See merge request !13957
parents cc24087b 9e1f8ac2
......@@ -451,6 +451,10 @@ module Ci
trace
end
def serializable_hash(options = {})
super(options).merge(when: read_attribute(:when))
end
private
def update_artifacts_size
......
---
title: Improve Import/Export memory usage
merge_request:
author:
type: fixed
......@@ -116,6 +116,7 @@ excluded_attributes:
statuses:
- :trace
- :token
- :when
push_event_payload:
- :event_id
......
......@@ -9,6 +9,8 @@ module Gitlab
@user = user
@shared = shared
@project = project
@project_id = project.id
@saved = true
end
def restore
......@@ -22,8 +24,10 @@ module Gitlab
@project_members = @tree_hash.delete('project_members')
ActiveRecord::Base.no_touching do
create_relations
ActiveRecord::Base.uncached do
ActiveRecord::Base.no_touching do
create_relations
end
end
rescue => e
@shared.error(e)
......@@ -48,21 +52,24 @@ module Gitlab
# the configuration yaml file too.
# Finally, it updates each attribute in the newly imported project.
def create_relations
saved = []
default_relation_list.each do |relation|
next unless relation.is_a?(Hash) || @tree_hash[relation.to_s].present?
if relation.is_a?(Hash)
create_sub_relations(relation, @tree_hash)
elsif @tree_hash[relation.to_s].present?
save_relation_hash(@tree_hash[relation.to_s], relation)
end
end
create_sub_relations(relation, @tree_hash) if relation.is_a?(Hash)
@saved
end
relation_key = relation.is_a?(Hash) ? relation.keys.first : relation
relation_hash_list = @tree_hash[relation_key.to_s]
def save_relation_hash(relation_hash_batch, relation_key)
relation_hash = create_relation(relation_key, relation_hash_batch)
next unless relation_hash_list
@saved = false unless restored_project.append_or_update_attribute(relation_key, relation_hash)
relation_hash = create_relation(relation_key, relation_hash_list)
saved << restored_project.append_or_update_attribute(relation_key, relation_hash)
end
saved.all?
# Restore the project again, extra query that skips holding the AR objects in memory
@restored_project = Project.find(@project_id)
end
def default_relation_list
......@@ -93,20 +100,42 @@ module Gitlab
# issue, finds any subrelations such as notes, creates them and assign them back to the hash
#
# Recursively calls this method if the sub-relation is a hash containing more sub-relations
def create_sub_relations(relation, tree_hash)
def create_sub_relations(relation, tree_hash, save: true)
relation_key = relation.keys.first.to_s
return if tree_hash[relation_key].blank?
[tree_hash[relation_key]].flatten.each do |relation_item|
relation.values.flatten.each do |sub_relation|
# We just use author to get the user ID, do not attempt to create an instance.
next if sub_relation == :author
tree_array = [tree_hash[relation_key]].flatten
# Avoid keeping a possible heavy object in memory once we are done with it
while relation_item = tree_array.shift
# The transaction at this level is less speedy than one single transaction
# But we can't have it in the upper level or GC won't get rid of the AR objects
# after we save the batch.
Project.transaction do
process_sub_relation(relation, relation_item)
# For every subrelation that hangs from Project, save the associated records alltogether
# This effectively batches all records per subrelation item, only keeping those in memory
# We have to keep in mind that more batch granularity << Memory, but >> Slowness
if save
save_relation_hash([relation_item], relation_key)
tree_hash[relation_key].delete(relation_item)
end
end
end
tree_hash.delete(relation_key) if save
end
def process_sub_relation(relation, relation_item)
relation.values.flatten.each do |sub_relation|
# We just use author to get the user ID, do not attempt to create an instance.
next if sub_relation == :author
create_sub_relations(sub_relation, relation_item) if sub_relation.is_a?(Hash)
create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash)
relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation)
relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank?
end
relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation)
relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank?
end
end
......@@ -121,14 +150,12 @@ module Gitlab
end
def create_relation(relation, relation_hash_list)
relation_type = relation.to_sym
relation_array = [relation_hash_list].flatten.map do |relation_hash|
Gitlab::ImportExport::RelationFactory.create(relation_sym: relation_type,
relation_hash: parsed_relation_hash(relation_hash, relation_type),
Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym,
relation_hash: parsed_relation_hash(relation_hash, relation.to_sym),
members_mapper: members_mapper,
user: @user,
project: restored_project)
project: @restored_project)
end.compact
relation_hash_list.is_a?(Array) ? relation_array : relation_array.first
......
......@@ -16,7 +16,7 @@ module Gitlab
error_out(error.message, caller[0].dup)
@errors << error.message
# Debug:
Rails.logger.error(error.backtrace)
Rails.logger.error(error.backtrace.join("\n"))
end
private
......
......@@ -11,8 +11,8 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
allow(@shared).to receive(:export_path).and_return('spec/lib/gitlab/import_export/')
@project = create(:project, :builds_disabled, :issues_disabled, name: 'project', path: 'project')
allow(@project.repository).to receive(:fetch_ref).and_return(true)
allow(@project.repository.raw).to receive(:rugged_branch_exists?).and_return(false)
allow_any_instance_of(Repository).to receive(:fetch_ref).and_return(true)
allow_any_instance_of(Gitlab::Git::Repository).to receive(:branch_exists?).and_return(false)
expect_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch).with('feature', 'DCBA')
allow_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch)
......
......@@ -117,6 +117,13 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
expect(saved_project_json['pipelines'].first['statuses'].count { |hash| hash['type'] == 'Ci::Build' }).to eq(1)
end
it 'has no when YML attributes but only the DB column' do
allow_any_instance_of(Ci::Pipeline).to receive(:ci_yaml_file).and_return(File.read(Rails.root.join('spec/support/gitlab_stubs/gitlab_ci.yml')))
expect_any_instance_of(Ci::GitlabCiYamlProcessor).not_to receive(:build_attributes)
saved_project_json
end
it 'has pipeline commits' do
expect(saved_project_json['pipelines']).not_to be_empty
end
......@@ -251,15 +258,11 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
create(:label_priority, label: group_label, priority: 1)
milestone = create(:milestone, project: project)
merge_request = create(:merge_request, source_project: project, milestone: milestone)
commit_status = create(:commit_status, project: project)
ci_pipeline = create(:ci_pipeline,
project: project,
sha: merge_request.diff_head_sha,
ref: merge_request.source_branch,
statuses: [commit_status])
ci_build = create(:ci_build, project: project, when: nil)
ci_build.pipeline.update(project: project)
create(:commit_status, project: project, pipeline: ci_build.pipeline)
create(:ci_build, pipeline: ci_pipeline, project: project)
create(:milestone, project: project)
create(:note, noteable: issue, project: project)
create(:note, noteable: merge_request, project: project)
......@@ -267,7 +270,7 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
create(:note_on_commit,
author: user,
project: project,
commit_id: ci_pipeline.sha)
commit_id: ci_build.pipeline.sha)
create(:event, :created, target: milestone, project: project, author: user)
create(:service, project: project, type: 'CustomIssueTrackerService', category: 'issue_tracker')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment