Commit 68c070f9 authored by Yorick Peterse's avatar Yorick Peterse

Merge branch 'deduplicate-merge-request-diff-authors' into 'master'

Reduce space needed for merge request diff commits

See merge request gitlab-org/gitlab!63669
parents 7edc61c2 43a3ce63
...@@ -360,7 +360,7 @@ class MergeRequest < ApplicationRecord ...@@ -360,7 +360,7 @@ class MergeRequest < ApplicationRecord
scope :preload_approved_by_users, -> { preload(:approved_by_users) } scope :preload_approved_by_users, -> { preload(:approved_by_users) }
scope :preload_metrics, -> (relation) { preload(metrics: relation) } scope :preload_metrics, -> (relation) { preload(metrics: relation) }
scope :preload_project_and_latest_diff, -> { preload(:source_project, :latest_merge_request_diff) } scope :preload_project_and_latest_diff, -> { preload(:source_project, :latest_merge_request_diff) }
scope :preload_latest_diff_commit, -> { preload(latest_merge_request_diff: :merge_request_diff_commits) } scope :preload_latest_diff_commit, -> { preload(latest_merge_request_diff: { merge_request_diff_commits: [:commit_author, :committer] }) }
scope :preload_milestoneish_associations, -> { preload_routables.preload(:assignees, :labels) } scope :preload_milestoneish_associations, -> { preload_routables.preload(:assignees, :labels) }
scope :with_web_entity_associations, -> { preload(:author, target_project: [:project_feature, group: [:route, :parent], namespace: :route]) } scope :with_web_entity_associations, -> { preload(:author, target_project: [:project_feature, group: [:route, :parent], namespace: :route]) }
......
# frozen_string_literal: true
class MergeRequest::DiffCommitUser < ApplicationRecord
validates :name, length: { maximum: 512 }
validates :email, length: { maximum: 512 }
validates :name, presence: true, unless: :email
validates :email, presence: true, unless: :name
# Prepares a value to be inserted into a column in the table
# `merge_request_diff_commit_users`. Values in this table are limited to
# 512 characters.
#
# We treat empty strings as NULL values, as there's no point in (for
# example) storing a row where both the name and Email are an empty
# string. In addition, if we treated them differently we could end up with
# two rows: one where field X is NULL, and one where field X is an empty
# string. This is redundant, so we avoid storing such data.
def self.prepare(value)
value.present? ? value[0..511] : nil
end
# Creates a new row, or returns an existing one if a row already exists.
def self.find_or_create(name, email)
find_or_create_by!(name: name, email: email)
rescue ActiveRecord::RecordNotUnique
retry
end
# Finds many (name, email) pairs in bulk.
def self.bulk_find(pairs)
queries = {}
rows = []
pairs.each do |(name, email)|
queries[[name, email]] = where(name: name, email: email).to_sql
end
# We may end up having to query many users. To ensure we don't hit any
# query size limits, we get a fixed number of users at a time.
queries.values.each_slice(1_000).map do |slice|
rows.concat(from("(#{slice.join("\nUNION ALL\n")}) #{table_name}").to_a)
end
rows
end
# Finds or creates rows for the given pairs of names and Emails.
#
# The `names_and_emails` argument must be an Array/Set of tuples like so:
#
# [
# [name, email],
# [name, email],
# ...
# ]
#
# This method expects that the names and Emails have already been trimmed to
# at most 512 characters.
#
# The return value is a Hash that maps these tuples to instances of this
# model.
def self.bulk_find_or_create(pairs)
mapping = {}
create = []
# Over time, fewer new rows need to be created. We take advantage of that
# here by first finding all rows that already exist, using a limited number
# of queries (in most cases only one query will be needed).
bulk_find(pairs).each do |row|
mapping[[row.name, row.email]] = row
end
pairs.each do |(name, email)|
create << { name: name, email: email } unless mapping[[name, email]]
end
return mapping if create.empty?
# Sometimes we may need to insert new users into the table. We do this in
# bulk, so we only need one INSERT for all missing users.
insert_all(create, returning: %w[id name email]).each do |row|
mapping[[row['name'], row['email']]] =
new(id: row['id'], name: row['name'], email: row['email'])
end
# It's possible for (name, email) pairs to be inserted concurrently,
# resulting in the above insert not returning anything. Here we get any
# remaining users that were created concurrently.
bulk_find(pairs.reject { |pair| mapping.key?(pair) }).each do |row|
mapping[[row.name, row.email]] = row
end
mapping
end
end
...@@ -701,7 +701,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -701,7 +701,7 @@ class MergeRequestDiff < ApplicationRecord
end end
def load_commits(limit: nil) def load_commits(limit: nil)
commits = merge_request_diff_commits.limit(limit) commits = merge_request_diff_commits.with_users.limit(limit)
.map { |commit| Commit.from_hash(commit.to_hash, project) } .map { |commit| Commit.from_hash(commit.to_hash, project) }
CommitCollection CommitCollection
......
...@@ -9,21 +9,51 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -9,21 +9,51 @@ class MergeRequestDiffCommit < ApplicationRecord
belongs_to :merge_request_diff belongs_to :merge_request_diff
# This relation is called `commit_author` and not `author`, as the project
# import/export logic treats relations named `author` as instances of the
# `User` class.
#
# NOTE: these columns are _not_ indexed, nor do they use foreign keys.
#
# This is deliberate, as creating these indexes on GitLab.com takes a _very_
# long time. In addition, there's no real need for them either based on how
# this data is used.
#
# For more information, refer to the following:
#
# - https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5038#note_614592881
# - https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63669
belongs_to :commit_author, class_name: 'MergeRequest::DiffCommitUser'
belongs_to :committer, class_name: 'MergeRequest::DiffCommitUser'
sha_attribute :sha sha_attribute :sha
alias_attribute :id, :sha alias_attribute :id, :sha
serialize :trailers, Serializers::Json # rubocop:disable Cop/ActiveRecordSerialize serialize :trailers, Serializers::Json # rubocop:disable Cop/ActiveRecordSerialize
validates :trailers, json_schema: { filename: 'git_trailers' } validates :trailers, json_schema: { filename: 'git_trailers' }
scope :with_users, -> { preload(:commit_author, :committer) }
# A list of keys of which their values need to be trimmed before they can be
# inserted into the merge_request_diff_commit_users table.
TRIM_USER_KEYS =
%i[author_name author_email committer_name committer_email].freeze
# Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead. # Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead.
# cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress # cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress
def self.create_bulk(merge_request_diff_id, commits) def self.create_bulk(merge_request_diff_id, commits)
rows = commits.map.with_index do |commit, index| commit_hashes, user_tuples = prepare_commits_for_bulk_insert(commits)
# See #parent_ids. users = MergeRequest::DiffCommitUser.bulk_find_or_create(user_tuples)
commit_hash = commit.to_hash.except(:parent_ids)
rows = commit_hashes.map.with_index do |commit_hash, index|
sha = commit_hash.delete(:id) sha = commit_hash.delete(:id)
author = users[[commit_hash[:author_name], commit_hash[:author_email]]]
committer =
users[[commit_hash[:committer_name], commit_hash[:committer_email]]]
commit_hash.merge( commit_hash.merge(
commit_author_id: author&.id,
committer_id: committer&.id,
merge_request_diff_id: merge_request_diff_id, merge_request_diff_id: merge_request_diff_id,
relative_order: index, relative_order: index,
sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize
...@@ -36,6 +66,24 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -36,6 +66,24 @@ class MergeRequestDiffCommit < ApplicationRecord
Gitlab::Database.bulk_insert(self.table_name, rows) # rubocop:disable Gitlab/BulkInsert Gitlab::Database.bulk_insert(self.table_name, rows) # rubocop:disable Gitlab/BulkInsert
end end
def self.prepare_commits_for_bulk_insert(commits)
user_tuples = Set.new
hashes = commits.map do |commit|
hash = commit.to_hash.except(:parent_ids)
TRIM_USER_KEYS.each do |key|
hash[key] = MergeRequest::DiffCommitUser.prepare(hash[key])
end
user_tuples << [hash[:author_name], hash[:author_email]]
user_tuples << [hash[:committer_name], hash[:committer_email]]
hash
end
[hashes, user_tuples]
end
def self.oldest_merge_request_id_per_commit(project_id, shas) def self.oldest_merge_request_id_per_commit(project_id, shas)
# This method is defined here and not on MergeRequest, otherwise the SHA # This method is defined here and not on MergeRequest, otherwise the SHA
# values used in the WHERE below won't be encoded correctly. # values used in the WHERE below won't be encoded correctly.
...@@ -54,4 +102,20 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -54,4 +102,20 @@ class MergeRequestDiffCommit < ApplicationRecord
) )
.group(:sha) .group(:sha)
end end
def author_name
commit_author_id ? commit_author.name : super
end
def author_email
commit_author_id ? commit_author.email : super
end
def committer_name
committer_id ? committer.name : super
end
def committer_email
committer_id ? committer.email : super
end
end end
# frozen_string_literal: true
class AddMergeRequestDiffCommitUsers < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
def up
create_table_with_constraints :merge_request_diff_commit_users, id: :bigint do |t|
t.text :name
t.text :email
t.text_limit :name, 512
t.text_limit :email, 512
t.index [:name, :email], unique: true
end
# Names or Emails can be optional, so in some cases one of these may be
# null. But if both are NULL/empty, no row should exist in this table.
add_check_constraint(
:merge_request_diff_commit_users,
"(COALESCE(name, '') != '') OR (COALESCE(email, '') != '')",
:merge_request_diff_commit_users_name_or_email_existence
)
end
def down
drop_table :merge_request_diff_commit_users
end
end
# frozen_string_literal: true
class AddMergeRequestDiffCommitUserColumns < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
def up
# NOTE: these columns are _not_ indexed, nor do they use foreign keys.
#
# This is deliberate, as creating these indexes on GitLab.com takes a _very_
# long time. In addition, there's no real need for them either based on how
# this data is used.
#
# For more information, refer to the following:
#
# - https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5038#note_614592881
# - https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63669
add_column(:merge_request_diff_commits, :commit_author_id, :bigint)
add_column(:merge_request_diff_commits, :committer_id, :bigint)
end
def down
remove_column(:merge_request_diff_commits, :commit_author_id)
remove_column(:merge_request_diff_commits, :committer_id)
end
end
# frozen_string_literal: true
class ScheduleMergeRequestDiffUsersBackgroundMigration < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
# The number of rows to process in a single migration job.
#
# The minimum interval for background migrations is two minutes. On staging we
# observed we can process roughly 20 000 rows in a minute. Based on the total
# number of rows on staging, this translates to a total processing time of
# roughly 14 days.
#
# By using a batch size of 40 000, we maintain a rate of roughly 20 000 rows
# per minute, hopefully keeping the total migration time under two weeks;
# instead of four weeks.
BATCH_SIZE = 40_000
MIGRATION_NAME = 'MigrateMergeRequestDiffCommitUsers'
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
def up
start = MergeRequestDiff.minimum(:id).to_i
max = MergeRequestDiff.maximum(:id).to_i
delay = BackgroundMigrationWorker.minimum_interval
# The table merge_request_diff_commits contains _a lot_ of rows (roughly 400
# 000 000 on staging). Iterating a table that large to determine job ranges
# would take a while.
#
# To avoid that overhead, we simply schedule fixed ranges according to the
# minimum and maximum IDs. The background migration in turn only processes
# rows that actually exist.
while start < max
stop = start + BATCH_SIZE
migrate_in(delay, MIGRATION_NAME, [start, stop])
Gitlab::Database::BackgroundMigrationJob
.create!(class_name: MIGRATION_NAME, arguments: [start, stop])
delay += BackgroundMigrationWorker.minimum_interval
start += BATCH_SIZE
end
end
def down
# no-op
end
end
42b3090efee66f5a7a5c06d8768d1417892c5d6745f60163a09f58e6e3722761
\ No newline at end of file
aa04d433e400ed3ec11e5d40ada72f122b1d8b7a82f8803d9206da5c94ec5ef9
\ No newline at end of file
0c01bb41113c468a602649b591e1fd2959a6e3190c835ef2e27351cf69f50fd5
\ No newline at end of file
...@@ -14760,6 +14760,24 @@ CREATE SEQUENCE merge_request_context_commits_id_seq ...@@ -14760,6 +14760,24 @@ CREATE SEQUENCE merge_request_context_commits_id_seq
ALTER SEQUENCE merge_request_context_commits_id_seq OWNED BY merge_request_context_commits.id; ALTER SEQUENCE merge_request_context_commits_id_seq OWNED BY merge_request_context_commits.id;
CREATE TABLE merge_request_diff_commit_users (
id bigint NOT NULL,
name text,
email text,
CONSTRAINT check_147358fc42 CHECK ((char_length(name) <= 512)),
CONSTRAINT check_f5fa206cf7 CHECK ((char_length(email) <= 512)),
CONSTRAINT merge_request_diff_commit_users_name_or_email_existence CHECK (((COALESCE(name, ''::text) <> ''::text) OR (COALESCE(email, ''::text) <> ''::text)))
);
CREATE SEQUENCE merge_request_diff_commit_users_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE merge_request_diff_commit_users_id_seq OWNED BY merge_request_diff_commit_users.id;
CREATE TABLE merge_request_diff_commits ( CREATE TABLE merge_request_diff_commits (
authored_date timestamp without time zone, authored_date timestamp without time zone,
committed_date timestamp without time zone, committed_date timestamp without time zone,
...@@ -14771,7 +14789,9 @@ CREATE TABLE merge_request_diff_commits ( ...@@ -14771,7 +14789,9 @@ CREATE TABLE merge_request_diff_commits (
committer_name text, committer_name text,
committer_email text, committer_email text,
message text, message text,
trailers jsonb DEFAULT '{}'::jsonb NOT NULL trailers jsonb DEFAULT '{}'::jsonb NOT NULL,
commit_author_id bigint,
committer_id bigint
); );
CREATE TABLE merge_request_diff_details ( CREATE TABLE merge_request_diff_details (
...@@ -20161,6 +20181,8 @@ ALTER TABLE ONLY merge_request_cleanup_schedules ALTER COLUMN merge_request_id S ...@@ -20161,6 +20181,8 @@ ALTER TABLE ONLY merge_request_cleanup_schedules ALTER COLUMN merge_request_id S
ALTER TABLE ONLY merge_request_context_commits ALTER COLUMN id SET DEFAULT nextval('merge_request_context_commits_id_seq'::regclass); ALTER TABLE ONLY merge_request_context_commits ALTER COLUMN id SET DEFAULT nextval('merge_request_context_commits_id_seq'::regclass);
ALTER TABLE ONLY merge_request_diff_commit_users ALTER COLUMN id SET DEFAULT nextval('merge_request_diff_commit_users_id_seq'::regclass);
ALTER TABLE ONLY merge_request_diff_details ALTER COLUMN merge_request_diff_id SET DEFAULT nextval('merge_request_diff_details_merge_request_diff_id_seq'::regclass); ALTER TABLE ONLY merge_request_diff_details ALTER COLUMN merge_request_diff_id SET DEFAULT nextval('merge_request_diff_details_merge_request_diff_id_seq'::regclass);
ALTER TABLE ONLY merge_request_diffs ALTER COLUMN id SET DEFAULT nextval('merge_request_diffs_id_seq'::regclass); ALTER TABLE ONLY merge_request_diffs ALTER COLUMN id SET DEFAULT nextval('merge_request_diffs_id_seq'::regclass);
...@@ -21601,6 +21623,9 @@ ALTER TABLE ONLY merge_request_context_commit_diff_files ...@@ -21601,6 +21623,9 @@ ALTER TABLE ONLY merge_request_context_commit_diff_files
ALTER TABLE ONLY merge_request_context_commits ALTER TABLE ONLY merge_request_context_commits
ADD CONSTRAINT merge_request_context_commits_pkey PRIMARY KEY (id); ADD CONSTRAINT merge_request_context_commits_pkey PRIMARY KEY (id);
ALTER TABLE ONLY merge_request_diff_commit_users
ADD CONSTRAINT merge_request_diff_commit_users_pkey PRIMARY KEY (id);
ALTER TABLE ONLY merge_request_diff_commits ALTER TABLE ONLY merge_request_diff_commits
ADD CONSTRAINT merge_request_diff_commits_pkey PRIMARY KEY (merge_request_diff_id, relative_order); ADD CONSTRAINT merge_request_diff_commits_pkey PRIMARY KEY (merge_request_diff_id, relative_order);
...@@ -23904,6 +23929,8 @@ CREATE INDEX index_merge_request_blocks_on_blocked_merge_request_id ON merge_req ...@@ -23904,6 +23929,8 @@ CREATE INDEX index_merge_request_blocks_on_blocked_merge_request_id ON merge_req
CREATE UNIQUE INDEX index_merge_request_cleanup_schedules_on_merge_request_id ON merge_request_cleanup_schedules USING btree (merge_request_id); CREATE UNIQUE INDEX index_merge_request_cleanup_schedules_on_merge_request_id ON merge_request_cleanup_schedules USING btree (merge_request_id);
CREATE UNIQUE INDEX index_merge_request_diff_commit_users_on_name_and_email ON merge_request_diff_commit_users USING btree (name, email);
CREATE INDEX index_merge_request_diff_commits_on_sha ON merge_request_diff_commits USING btree (sha); CREATE INDEX index_merge_request_diff_commits_on_sha ON merge_request_diff_commits USING btree (sha);
CREATE INDEX index_merge_request_diff_details_failed_verification ON merge_request_diff_details USING btree (verification_retry_at NULLS FIRST) WHERE (verification_state = 3); CREATE INDEX index_merge_request_diff_details_failed_verification ON merge_request_diff_details USING btree (verification_retry_at NULLS FIRST) WHERE (verification_state = 3);
...@@ -61,7 +61,9 @@ tree: ...@@ -61,7 +61,9 @@ tree:
- :push_event_payload - :push_event_payload
- :suggestions - :suggestions
- merge_request_diff: - merge_request_diff:
- :merge_request_diff_commits - merge_request_diff_commits:
- :commit_author
- :committer
- :merge_request_diff_files - :merge_request_diff_files
- events: - events:
- :push_event_payload - :push_event_payload
...@@ -201,6 +203,10 @@ excluded_attributes: ...@@ -201,6 +203,10 @@ excluded_attributes:
- :verification_failure - :verification_failure
merge_request_diff_commits: merge_request_diff_commits:
- :merge_request_diff_id - :merge_request_diff_id
- :commit_author_id
- :committer_id
merge_request_diff_commit_user:
- :id
merge_request_diff_detail: merge_request_diff_detail:
- :merge_request_diff_id - :merge_request_diff_id
- :verification_retry_at - :verification_retry_at
......
...@@ -28,6 +28,7 @@ module Gitlab ...@@ -28,6 +28,7 @@ module Gitlab
def find def find
return if epic? && group.nil? return if epic? && group.nil?
return find_diff_commit_user if diff_commit_user?
super super
end end
...@@ -81,6 +82,13 @@ module Gitlab ...@@ -81,6 +82,13 @@ module Gitlab
end end
end end
def find_diff_commit_user
find_with_cache do
MergeRequest::DiffCommitUser
.find_or_create(@attributes['name'], @attributes['email'])
end
end
def label? def label?
klass == Label klass == Label
end end
...@@ -101,6 +109,10 @@ module Gitlab ...@@ -101,6 +109,10 @@ module Gitlab
klass == DesignManagement::Design klass == DesignManagement::Design
end end
def diff_commit_user?
klass == MergeRequest::DiffCommitUser
end
# If an existing group milestone used the IID # If an existing group milestone used the IID
# claim the IID back and set the group milestone to use one available # claim the IID back and set the group milestone to use one available
# This is necessary to fix situations like the following: # This is necessary to fix situations like the following:
......
...@@ -31,7 +31,9 @@ module Gitlab ...@@ -31,7 +31,9 @@ module Gitlab
ci_cd_settings: 'ProjectCiCdSetting', ci_cd_settings: 'ProjectCiCdSetting',
error_tracking_setting: 'ErrorTracking::ProjectErrorTrackingSetting', error_tracking_setting: 'ErrorTracking::ProjectErrorTrackingSetting',
links: 'Releases::Link', links: 'Releases::Link',
metrics_setting: 'ProjectMetricsSetting' }.freeze metrics_setting: 'ProjectMetricsSetting',
commit_author: 'MergeRequest::DiffCommitUser',
committer: 'MergeRequest::DiffCommitUser' }.freeze
BUILD_MODELS = %i[Ci::Build commit_status].freeze BUILD_MODELS = %i[Ci::Build commit_status].freeze
...@@ -56,6 +58,7 @@ module Gitlab ...@@ -56,6 +58,7 @@ module Gitlab
external_pull_request external_pull_request
external_pull_requests external_pull_requests
DesignManagement::Design DesignManagement::Design
MergeRequest::DiffCommitUser
].freeze ].freeze
def create def create
......
...@@ -56,6 +56,7 @@ RSpec.describe 'Database schema' do ...@@ -56,6 +56,7 @@ RSpec.describe 'Database schema' do
ldap_group_links: %w[group_id], ldap_group_links: %w[group_id],
members: %w[source_id created_by_id], members: %w[source_id created_by_id],
merge_requests: %w[last_edited_by_id state_id], merge_requests: %w[last_edited_by_id state_id],
merge_request_diff_commits: %w[commit_author_id committer_id],
namespaces: %w[owner_id parent_id], namespaces: %w[owner_id parent_id],
notes: %w[author_id commit_id noteable_id updated_by_id resolved_by_id confirmed_by_id discussion_id], notes: %w[author_id commit_id noteable_id updated_by_id resolved_by_id confirmed_by_id discussion_id],
notification_settings: %w[source_id], notification_settings: %w[source_id],
......
# frozen_string_literal: true
FactoryBot.define do
factory :merge_request_diff_commit_user, class: 'MergeRequest::DiffCommitUser' do
name { generate(:name) }
email { generate(:email) }
end
end
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -198,6 +198,8 @@ merge_request_diff: ...@@ -198,6 +198,8 @@ merge_request_diff:
- merge_request_diff_files - merge_request_diff_files
merge_request_diff_commits: merge_request_diff_commits:
- merge_request_diff - merge_request_diff
- commit_author
- committer
merge_request_diff_detail: merge_request_diff_detail:
- merge_request_diff - merge_request_diff
merge_request_diff_files: merge_request_diff_files:
......
...@@ -109,14 +109,14 @@ RSpec.describe 'Test coverage of the Project Import' do ...@@ -109,14 +109,14 @@ RSpec.describe 'Test coverage of the Project Import' do
def failure_message(not_tested_relations) def failure_message(not_tested_relations)
<<~MSG <<~MSG
These relations seem to be added recenty and These relations seem to be added recently and
they expected to be covered in our Import specs: #{not_tested_relations}. they expected to be covered in our Import specs: #{not_tested_relations}.
To do that, expand one of the files listed in `project_json_fixtures` To do that, expand one of the files listed in `project_json_fixtures`
(or expand the list if you consider adding a new fixture file). (or expand the list if you consider adding a new fixture file).
After that, add a new spec into After that, add a new spec into
`spec/lib/gitlab/import_export/project_tree_restorer_spec.rb` `spec/lib/gitlab/import_export/project/tree_restorer_spec.rb`
to check that the relation is being imported correctly. to check that the relation is being imported correctly.
In case the spec breaks the master or there is a sense of urgency, In case the spec breaks the master or there is a sense of urgency,
......
...@@ -150,4 +150,30 @@ RSpec.describe Gitlab::ImportExport::Project::ObjectBuilder do ...@@ -150,4 +150,30 @@ RSpec.describe Gitlab::ImportExport::Project::ObjectBuilder do
expect(merge_request.persisted?).to be true expect(merge_request.persisted?).to be true
end end
end end
context 'merge request diff commit users' do
it 'finds the existing user' do
user = MergeRequest::DiffCommitUser
.find_or_create('Alice', 'alice@example.com')
found = described_class.build(
MergeRequest::DiffCommitUser,
'name' => 'Alice',
'email' => 'alice@example.com'
)
expect(found).to eq(user)
end
it 'creates a new user' do
found = described_class.build(
MergeRequest::DiffCommitUser,
'name' => 'Alice',
'email' => 'alice@example.com'
)
expect(found.name).to eq('Alice')
expect(found.email).to eq('alice@example.com')
end
end
end end
...@@ -224,6 +224,27 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do ...@@ -224,6 +224,27 @@ RSpec.describe Gitlab::ImportExport::Project::TreeRestorer do
expect(MergeRequestDiffCommit.count).to eq(77) expect(MergeRequestDiffCommit.count).to eq(77)
end end
it 'assigns committer and author details to all diff commits' do
MergeRequestDiffCommit.all.each do |commit|
expect(commit.commit_author_id).not_to be_nil
expect(commit.committer_id).not_to be_nil
end
end
it 'assigns the correct commit users to different diff commits' do
commit1 = MergeRequestDiffCommit
.find_by(sha: '0b4bc9a49b562e85de7cc9e834518ea6828729b9')
commit2 = MergeRequestDiffCommit
.find_by(sha: 'a4e5dfebf42e34596526acb8611bc7ed80e4eb3f')
expect(commit1.commit_author.name).to eq('Dmitriy Zaporozhets')
expect(commit1.commit_author.email).to eq('dmitriy.zaporozhets@gmail.com')
expect(commit2.commit_author.name).to eq('James Lopez')
expect(commit2.commit_author.email).to eq('james@jameslopez.es')
end
it 'has the correct data for merge request latest_merge_request_diff' do it 'has the correct data for merge request latest_merge_request_diff' do
MergeRequest.find_each do |merge_request| MergeRequest.find_each do |merge_request|
expect(merge_request.latest_merge_request_diff_id).to eq(merge_request.merge_request_diffs.maximum(:id)) expect(merge_request.latest_merge_request_diff_id).to eq(merge_request.merge_request_diffs.maximum(:id))
......
...@@ -235,6 +235,10 @@ MergeRequestDiffCommit: ...@@ -235,6 +235,10 @@ MergeRequestDiffCommit:
- committer_email - committer_email
- message - message
- trailers - trailers
MergeRequest::DiffCommitUser:
- id
- name
- email
MergeRequestDiffFile: MergeRequestDiffFile:
- merge_request_diff_id - merge_request_diff_id
- relative_order - relative_order
......
# frozen_string_literal: true
require 'spec_helper'
require_migration! 'schedule_merge_request_diff_users_background_migration'
RSpec.describe ScheduleMergeRequestDiffUsersBackgroundMigration, :migration do
let(:migration) { described_class.new }
describe '#up' do
before do
allow(described_class::MergeRequestDiff)
.to receive(:minimum)
.with(:id)
.and_return(42)
allow(described_class::MergeRequestDiff)
.to receive(:maximum)
.with(:id)
.and_return(85_123)
end
it 'schedules the migrations in batches' do
expect(migration)
.to receive(:migrate_in)
.ordered
.with(2.minutes.to_i, described_class::MIGRATION_NAME, [42, 40_042])
expect(migration)
.to receive(:migrate_in)
.ordered
.with(4.minutes.to_i, described_class::MIGRATION_NAME, [40_042, 80_042])
expect(migration)
.to receive(:migrate_in)
.ordered
.with(6.minutes.to_i, described_class::MIGRATION_NAME, [80_042, 120_042])
migration.up
end
it 'creates rows to track the background migration jobs' do
expect(Gitlab::Database::BackgroundMigrationJob)
.to receive(:create!)
.ordered
.with(class_name: described_class::MIGRATION_NAME, arguments: [42, 40_042])
expect(Gitlab::Database::BackgroundMigrationJob)
.to receive(:create!)
.ordered
.with(class_name: described_class::MIGRATION_NAME, arguments: [40_042, 80_042])
expect(Gitlab::Database::BackgroundMigrationJob)
.to receive(:create!)
.ordered
.with(class_name: described_class::MIGRATION_NAME, arguments: [80_042, 120_042])
migration.up
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe MergeRequest::DiffCommitUser do
describe 'validations' do
it 'requires that names are less than 512 characters long' do
expect(described_class.new(name: 'a' * 1000)).not_to be_valid
end
it 'requires that Emails are less than 512 characters long' do
expect(described_class.new(email: 'a' * 1000)).not_to be_valid
end
it 'requires either a name or Email' do
expect(described_class.new).not_to be_valid
end
it 'allows setting of just a name' do
expect(described_class.new(name: 'Alice')).to be_valid
end
it 'allows setting of just an Email' do
expect(described_class.new(email: 'alice@example.com')).to be_valid
end
it 'allows setting of both a name and Email' do
expect(described_class.new(name: 'Alice', email: 'alice@example.com'))
.to be_valid
end
end
describe '.prepare' do
it 'trims a value to at most 512 characters' do
expect(described_class.prepare('€' * 1_000)).to eq('€' * 512)
end
it 'returns nil if the value is an empty string' do
expect(described_class.prepare('')).to be_nil
end
end
describe '.find_or_create' do
it 'creates a new row if none exist' do
alice = described_class.find_or_create('Alice', 'alice@example.com')
expect(alice.name).to eq('Alice')
expect(alice.email).to eq('alice@example.com')
end
it 'returns an existing row if one exists' do
user1 = create(:merge_request_diff_commit_user)
user2 = described_class.find_or_create(user1.name, user1.email)
expect(user1).to eq(user2)
end
it 'handles concurrent inserts' do
user = create(:merge_request_diff_commit_user)
expect(described_class)
.to receive(:find_or_create_by!)
.ordered
.with(name: user.name, email: user.email)
.and_raise(ActiveRecord::RecordNotUnique)
expect(described_class)
.to receive(:find_or_create_by!)
.ordered
.with(name: user.name, email: user.email)
.and_return(user)
expect(described_class.find_or_create(user.name, user.email)).to eq(user)
end
end
describe '.bulk_find_or_create' do
it 'bulk creates missing rows and reuses existing rows' do
bob = create(
:merge_request_diff_commit_user,
name: 'Bob',
email: 'bob@example.com'
)
users = described_class.bulk_find_or_create(
[%w[Alice alice@example.com], %w[Bob bob@example.com]]
)
alice = described_class.find_by(name: 'Alice')
expect(users[%w[Alice alice@example.com]]).to eq(alice)
expect(users[%w[Bob bob@example.com]]).to eq(bob)
end
it 'does not insert any data when all users exist' do
bob = create(
:merge_request_diff_commit_user,
name: 'Bob',
email: 'bob@example.com'
)
users = described_class.bulk_find_or_create([%w[Bob bob@example.com]])
expect(described_class).not_to receive(:insert_all)
expect(users[%w[Bob bob@example.com]]).to eq(bob)
end
it 'handles concurrently inserted rows' do
bob = create(
:merge_request_diff_commit_user,
name: 'Bob',
email: 'bob@example.com'
)
input = [%w[Bob bob@example.com]]
expect(described_class)
.to receive(:bulk_find)
.twice
.with(input)
.and_return([], [bob])
users = described_class.bulk_find_or_create(input)
expect(users[%w[Bob bob@example.com]]).to eq(bob)
end
end
end
...@@ -16,6 +16,11 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -16,6 +16,11 @@ RSpec.describe MergeRequestDiffCommit do
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end end
describe 'associations' do
it { is_expected.to belong_to(:commit_author) }
it { is_expected.to belong_to(:committer) }
end
describe '#to_hash' do describe '#to_hash' do
subject { merge_request.commits.first } subject { merge_request.commits.first }
...@@ -46,6 +51,8 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -46,6 +51,8 @@ RSpec.describe MergeRequestDiffCommit do
"committed_date": "2014-02-27T10:01:38.000+01:00".to_time, "committed_date": "2014-02-27T10:01:38.000+01:00".to_time,
"committer_name": "Dmitriy Zaporozhets", "committer_name": "Dmitriy Zaporozhets",
"committer_email": "dmitriy.zaporozhets@gmail.com", "committer_email": "dmitriy.zaporozhets@gmail.com",
"commit_author_id": an_instance_of(Integer),
"committer_id": an_instance_of(Integer),
"merge_request_diff_id": merge_request_diff_id, "merge_request_diff_id": merge_request_diff_id,
"relative_order": 0, "relative_order": 0,
"sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e"), "sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e"),
...@@ -59,6 +66,8 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -59,6 +66,8 @@ RSpec.describe MergeRequestDiffCommit do
"committed_date": "2014-02-27T09:57:31.000+01:00".to_time, "committed_date": "2014-02-27T09:57:31.000+01:00".to_time,
"committer_name": "Dmitriy Zaporozhets", "committer_name": "Dmitriy Zaporozhets",
"committer_email": "dmitriy.zaporozhets@gmail.com", "committer_email": "dmitriy.zaporozhets@gmail.com",
"commit_author_id": an_instance_of(Integer),
"committer_id": an_instance_of(Integer),
"merge_request_diff_id": merge_request_diff_id, "merge_request_diff_id": merge_request_diff_id,
"relative_order": 1, "relative_order": 1,
"sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d"), "sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d"),
...@@ -76,6 +85,21 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -76,6 +85,21 @@ RSpec.describe MergeRequestDiffCommit do
subject subject
end end
it 'creates diff commit users' do
diff = create(:merge_request_diff, merge_request: merge_request)
described_class.create_bulk(diff.id, [commits.first])
commit_row = MergeRequestDiffCommit
.find_by(merge_request_diff_id: diff.id, relative_order: 0)
commit_user_row =
MergeRequest::DiffCommitUser.find_by(name: 'Dmitriy Zaporozhets')
expect(commit_row.commit_author).to eq(commit_user_row)
expect(commit_row.committer).to eq(commit_user_row)
end
context 'with dates larger than the DB limit' do context 'with dates larger than the DB limit' do
let(:commits) do let(:commits) do
# This commit's date is "Sun Aug 17 07:12:55 292278994 +0000" # This commit's date is "Sun Aug 17 07:12:55 292278994 +0000"
...@@ -92,6 +116,8 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -92,6 +116,8 @@ RSpec.describe MergeRequestDiffCommit do
"committed_date": timestamp, "committed_date": timestamp,
"committer_name": "Alejandro Rodríguez", "committer_name": "Alejandro Rodríguez",
"committer_email": "alejorro70@gmail.com", "committer_email": "alejorro70@gmail.com",
"commit_author_id": an_instance_of(Integer),
"committer_id": an_instance_of(Integer),
"merge_request_diff_id": merge_request_diff_id, "merge_request_diff_id": merge_request_diff_id,
"relative_order": 0, "relative_order": 0,
"sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69"), "sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69"),
...@@ -107,4 +133,28 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -107,4 +133,28 @@ RSpec.describe MergeRequestDiffCommit do
end end
end end
end end
describe '.prepare_commits_for_bulk_insert' do
it 'returns the commit hashes and unique user tuples' do
commit = double(:commit, to_hash: {
parent_ids: %w[foo bar],
author_name: 'a' * 1000,
author_email: 'a' * 1000,
committer_name: 'Alice',
committer_email: 'alice@example.com'
})
hashes, tuples = described_class.prepare_commits_for_bulk_insert([commit])
expect(hashes).to eq([{
author_name: 'a' * 512,
author_email: 'a' * 512,
committer_name: 'Alice',
committer_email: 'alice@example.com'
}])
expect(tuples)
.to include(['a' * 512, 'a' * 512], %w[Alice alice@example.com])
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment