Commit c1af169f authored by Douwe Maan's avatar Douwe Maan

Merge branch 'fix/import-export-performance' into 'master'

Improve Import/Export memory use and performance

Closes #35389 and #26556

See merge request !13957
parents cc24087b 9e1f8ac2
...@@ -451,6 +451,10 @@ module Ci ...@@ -451,6 +451,10 @@ module Ci
trace trace
end end
def serializable_hash(options = {})
super(options).merge(when: read_attribute(:when))
end
private private
def update_artifacts_size def update_artifacts_size
......
---
title: Improve Import/Export memory usage
merge_request:
author:
type: fixed
...@@ -116,6 +116,7 @@ excluded_attributes: ...@@ -116,6 +116,7 @@ excluded_attributes:
statuses: statuses:
- :trace - :trace
- :token - :token
- :when
push_event_payload: push_event_payload:
- :event_id - :event_id
......
...@@ -9,6 +9,8 @@ module Gitlab ...@@ -9,6 +9,8 @@ module Gitlab
@user = user @user = user
@shared = shared @shared = shared
@project = project @project = project
@project_id = project.id
@saved = true
end end
def restore def restore
...@@ -22,9 +24,11 @@ module Gitlab ...@@ -22,9 +24,11 @@ module Gitlab
@project_members = @tree_hash.delete('project_members') @project_members = @tree_hash.delete('project_members')
ActiveRecord::Base.uncached do
ActiveRecord::Base.no_touching do ActiveRecord::Base.no_touching do
create_relations create_relations
end end
end
rescue => e rescue => e
@shared.error(e) @shared.error(e)
false false
...@@ -48,21 +52,24 @@ module Gitlab ...@@ -48,21 +52,24 @@ module Gitlab
# the configuration yaml file too. # the configuration yaml file too.
# Finally, it updates each attribute in the newly imported project. # Finally, it updates each attribute in the newly imported project.
def create_relations def create_relations
saved = []
default_relation_list.each do |relation| default_relation_list.each do |relation|
next unless relation.is_a?(Hash) || @tree_hash[relation.to_s].present? if relation.is_a?(Hash)
create_sub_relations(relation, @tree_hash)
elsif @tree_hash[relation.to_s].present?
save_relation_hash(@tree_hash[relation.to_s], relation)
end
end
create_sub_relations(relation, @tree_hash) if relation.is_a?(Hash) @saved
end
relation_key = relation.is_a?(Hash) ? relation.keys.first : relation def save_relation_hash(relation_hash_batch, relation_key)
relation_hash_list = @tree_hash[relation_key.to_s] relation_hash = create_relation(relation_key, relation_hash_batch)
next unless relation_hash_list @saved = false unless restored_project.append_or_update_attribute(relation_key, relation_hash)
relation_hash = create_relation(relation_key, relation_hash_list) # Restore the project again, extra query that skips holding the AR objects in memory
saved << restored_project.append_or_update_attribute(relation_key, relation_hash) @restored_project = Project.find(@project_id)
end
saved.all?
end end
def default_relation_list def default_relation_list
...@@ -93,22 +100,44 @@ module Gitlab ...@@ -93,22 +100,44 @@ module Gitlab
# issue, finds any subrelations such as notes, creates them and assign them back to the hash # issue, finds any subrelations such as notes, creates them and assign them back to the hash
# #
# Recursively calls this method if the sub-relation is a hash containing more sub-relations # Recursively calls this method if the sub-relation is a hash containing more sub-relations
def create_sub_relations(relation, tree_hash) def create_sub_relations(relation, tree_hash, save: true)
relation_key = relation.keys.first.to_s relation_key = relation.keys.first.to_s
return if tree_hash[relation_key].blank? return if tree_hash[relation_key].blank?
[tree_hash[relation_key]].flatten.each do |relation_item| tree_array = [tree_hash[relation_key]].flatten
# Avoid keeping a possible heavy object in memory once we are done with it
while relation_item = tree_array.shift
# The transaction at this level is less speedy than one single transaction
# But we can't have it in the upper level or GC won't get rid of the AR objects
# after we save the batch.
Project.transaction do
process_sub_relation(relation, relation_item)
# For every subrelation that hangs from Project, save the associated records alltogether
# This effectively batches all records per subrelation item, only keeping those in memory
# We have to keep in mind that more batch granularity << Memory, but >> Slowness
if save
save_relation_hash([relation_item], relation_key)
tree_hash[relation_key].delete(relation_item)
end
end
end
tree_hash.delete(relation_key) if save
end
def process_sub_relation(relation, relation_item)
relation.values.flatten.each do |sub_relation| relation.values.flatten.each do |sub_relation|
# We just use author to get the user ID, do not attempt to create an instance. # We just use author to get the user ID, do not attempt to create an instance.
next if sub_relation == :author next if sub_relation == :author
create_sub_relations(sub_relation, relation_item) if sub_relation.is_a?(Hash) create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash)
relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation)
relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank?
end end
end end
end
def assign_relation_hash(relation_item, sub_relation) def assign_relation_hash(relation_item, sub_relation)
if sub_relation.is_a?(Hash) if sub_relation.is_a?(Hash)
...@@ -121,14 +150,12 @@ module Gitlab ...@@ -121,14 +150,12 @@ module Gitlab
end end
def create_relation(relation, relation_hash_list) def create_relation(relation, relation_hash_list)
relation_type = relation.to_sym
relation_array = [relation_hash_list].flatten.map do |relation_hash| relation_array = [relation_hash_list].flatten.map do |relation_hash|
Gitlab::ImportExport::RelationFactory.create(relation_sym: relation_type, Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym,
relation_hash: parsed_relation_hash(relation_hash, relation_type), relation_hash: parsed_relation_hash(relation_hash, relation.to_sym),
members_mapper: members_mapper, members_mapper: members_mapper,
user: @user, user: @user,
project: restored_project) project: @restored_project)
end.compact end.compact
relation_hash_list.is_a?(Array) ? relation_array : relation_array.first relation_hash_list.is_a?(Array) ? relation_array : relation_array.first
......
...@@ -16,7 +16,7 @@ module Gitlab ...@@ -16,7 +16,7 @@ module Gitlab
error_out(error.message, caller[0].dup) error_out(error.message, caller[0].dup)
@errors << error.message @errors << error.message
# Debug: # Debug:
Rails.logger.error(error.backtrace) Rails.logger.error(error.backtrace.join("\n"))
end end
private private
......
...@@ -11,8 +11,8 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do ...@@ -11,8 +11,8 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
allow(@shared).to receive(:export_path).and_return('spec/lib/gitlab/import_export/') allow(@shared).to receive(:export_path).and_return('spec/lib/gitlab/import_export/')
@project = create(:project, :builds_disabled, :issues_disabled, name: 'project', path: 'project') @project = create(:project, :builds_disabled, :issues_disabled, name: 'project', path: 'project')
allow(@project.repository).to receive(:fetch_ref).and_return(true) allow_any_instance_of(Repository).to receive(:fetch_ref).and_return(true)
allow(@project.repository.raw).to receive(:rugged_branch_exists?).and_return(false) allow_any_instance_of(Gitlab::Git::Repository).to receive(:branch_exists?).and_return(false)
expect_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch).with('feature', 'DCBA') expect_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch).with('feature', 'DCBA')
allow_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch) allow_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch)
......
...@@ -117,6 +117,13 @@ describe Gitlab::ImportExport::ProjectTreeSaver do ...@@ -117,6 +117,13 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
expect(saved_project_json['pipelines'].first['statuses'].count { |hash| hash['type'] == 'Ci::Build' }).to eq(1) expect(saved_project_json['pipelines'].first['statuses'].count { |hash| hash['type'] == 'Ci::Build' }).to eq(1)
end end
it 'has no when YML attributes but only the DB column' do
allow_any_instance_of(Ci::Pipeline).to receive(:ci_yaml_file).and_return(File.read(Rails.root.join('spec/support/gitlab_stubs/gitlab_ci.yml')))
expect_any_instance_of(Ci::GitlabCiYamlProcessor).not_to receive(:build_attributes)
saved_project_json
end
it 'has pipeline commits' do it 'has pipeline commits' do
expect(saved_project_json['pipelines']).not_to be_empty expect(saved_project_json['pipelines']).not_to be_empty
end end
...@@ -251,15 +258,11 @@ describe Gitlab::ImportExport::ProjectTreeSaver do ...@@ -251,15 +258,11 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
create(:label_priority, label: group_label, priority: 1) create(:label_priority, label: group_label, priority: 1)
milestone = create(:milestone, project: project) milestone = create(:milestone, project: project)
merge_request = create(:merge_request, source_project: project, milestone: milestone) merge_request = create(:merge_request, source_project: project, milestone: milestone)
commit_status = create(:commit_status, project: project)
ci_pipeline = create(:ci_pipeline, ci_build = create(:ci_build, project: project, when: nil)
project: project, ci_build.pipeline.update(project: project)
sha: merge_request.diff_head_sha, create(:commit_status, project: project, pipeline: ci_build.pipeline)
ref: merge_request.source_branch,
statuses: [commit_status])
create(:ci_build, pipeline: ci_pipeline, project: project)
create(:milestone, project: project) create(:milestone, project: project)
create(:note, noteable: issue, project: project) create(:note, noteable: issue, project: project)
create(:note, noteable: merge_request, project: project) create(:note, noteable: merge_request, project: project)
...@@ -267,7 +270,7 @@ describe Gitlab::ImportExport::ProjectTreeSaver do ...@@ -267,7 +270,7 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
create(:note_on_commit, create(:note_on_commit,
author: user, author: user,
project: project, project: project,
commit_id: ci_pipeline.sha) commit_id: ci_build.pipeline.sha)
create(:event, :created, target: milestone, project: project, author: user) create(:event, :created, target: milestone, project: project, author: user)
create(:service, project: project, type: 'CustomIssueTrackerService', category: 'issue_tracker') create(:service, project: project, type: 'CustomIssueTrackerService', category: 'issue_tracker')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment