Commit b3244f8d authored by Adam Hegyi's avatar Adam Hegyi Committed by Heinrich Lee Yu

Deduplicate merge_request_metrics table

This change deduplicates records in the `merge_request_metrics` table.
parent 6f5a070d
......@@ -21,6 +21,8 @@ class MergeRequest < ApplicationRecord
include MilestoneEventable
include StateEventable
extend ::Gitlab::Utils::Override
sha_attribute :squash_commit_sha
self.reactive_cache_key = ->(model) { [model.project.id, model.iid] }
......@@ -1582,6 +1584,23 @@ class MergeRequest < ApplicationRecord
super.merge(label_url_method: :project_merge_requests_url)
end
override :ensure_metrics
def ensure_metrics
MergeRequest::Metrics.safe_find_or_create_by(merge_request_id: id).tap do |metrics_record|
# Make sure we refresh the loaded association object with the newly created/loaded item.
# This is needed in order to have the exact functionality than before.
#
# Example:
#
# merge_request.metrics.destroy
# merge_request.ensure_metrics
# merge_request.metrics # should return the metrics record and not nil
# merge_request.metrics.merge_request # should return the same MR record
metrics_record.association(:merge_request).target = self
association(:metrics).target = metrics_record
end
end
private
def with_rebase_lock
......
---
title: Deduplicate merge_request_metrics table
merge_request: 29566
author:
type: other
# frozen_string_literal: true
class DedupMrMetrics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
TMP_INDEX_NAME = 'tmp_unique_merge_request_metrics_by_merge_request_id'
INDEX_NAME = 'unique_merge_request_metrics_by_merge_request_id'
disable_ddl_transaction!
class MergeRequestMetrics < ActiveRecord::Base
self.table_name = 'merge_request_metrics'
include EachBatch
end
def up
last_metrics_record_id = MergeRequestMetrics.maximum(:id) || 0
# This index will disallow further duplicates while we're deduplicating the data.
add_concurrent_index(:merge_request_metrics, :merge_request_id, where: "id > #{Integer(last_metrics_record_id)}", unique: true, name: TMP_INDEX_NAME)
MergeRequestMetrics.each_batch do |relation|
duplicated_merge_request_ids = MergeRequestMetrics
.where(merge_request_id: relation.select(:merge_request_id))
.select(:merge_request_id)
.group(:merge_request_id)
.having('COUNT(merge_request_metrics.merge_request_id) > 1')
.pluck(:merge_request_id)
duplicated_merge_request_ids.each do |merge_request_id|
deduplicate_item(merge_request_id)
end
end
add_concurrent_index(:merge_request_metrics, :merge_request_id, unique: true, name: INDEX_NAME)
remove_concurrent_index_by_name(:merge_request_metrics, TMP_INDEX_NAME)
end
def down
remove_concurrent_index_by_name(:merge_request_metrics, TMP_INDEX_NAME)
remove_concurrent_index_by_name(:merge_request_metrics, INDEX_NAME)
end
private
def deduplicate_item(merge_request_id)
merge_request_metrics_records = MergeRequestMetrics.where(merge_request_id: merge_request_id).order(updated_at: :asc).to_a
attributes = {}
merge_request_metrics_records.each do |merge_request_metrics_record|
params = merge_request_metrics_record.attributes.except('id')
attributes.merge!(params.compact)
end
ActiveRecord::Base.transaction do
record_to_keep = merge_request_metrics_records.pop
records_to_delete = merge_request_metrics_records
MergeRequestMetrics.where(id: records_to_delete.map(&:id)).delete_all
record_to_keep.update!(attributes)
end
end
end
......@@ -20388,6 +20388,8 @@ CREATE INDEX tmp_index_ci_pipelines_lock_version ON public.ci_pipelines USING bt
CREATE INDEX tmp_index_ci_stages_lock_version ON public.ci_stages USING btree (id) WHERE (lock_version IS NULL);
CREATE UNIQUE INDEX unique_merge_request_metrics_by_merge_request_id ON public.merge_request_metrics USING btree (merge_request_id);
CREATE UNIQUE INDEX users_security_dashboard_projects_unique_index ON public.users_security_dashboard_projects USING btree (project_id, user_id);
CREATE UNIQUE INDEX vulnerability_feedback_unique_idx ON public.vulnerability_feedback USING btree (project_id, category, feedback_type, project_fingerprint);
......@@ -23378,6 +23380,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200525144525
20200526000407
20200526013844
20200526115436
20200526120714
20200526142550
20200526153844
......
......@@ -5,6 +5,10 @@ require 'spec_helper'
RSpec.describe Analytics::MergeRequestMetricsRefresh do
subject { calculator_class.new(merge_request) }
around do |example|
Timecop.freeze { example.run }
end
let(:calculator_class) do
Class.new do
include Analytics::MergeRequestMetricsRefresh
......@@ -18,7 +22,7 @@ RSpec.describe Analytics::MergeRequestMetricsRefresh do
end
def update_metric!(metrics)
metrics.first_comment_at = Time.now
metrics.first_comment_at = Time.zone.now
end
end
end
......@@ -27,20 +31,24 @@ RSpec.describe Analytics::MergeRequestMetricsRefresh do
describe '#execute' do
it 'updates metric via update_metric! method' do
expect { subject.execute }.to change { merge_request.metrics.first_comment_at }.to(be_like_time(Time.now))
expect { subject.execute }.to change { merge_request.metrics.first_comment_at }.to(be_like_time(Time.zone.now))
end
context 'when metric is already present' do
let(:first_comment_at) { 1.day.ago }
before do
merge_request.metrics.first_comment_at = 1.day.ago
merge_request.metrics.update!(first_comment_at: first_comment_at)
end
it 'does not update metric' do
expect { subject.execute }.not_to change { merge_request.metrics.first_comment_at }
subject.execute
expect(merge_request.metrics.reload.first_comment_at).to be_like_time(first_comment_at)
end
it 'updates metric when forced' do
expect { subject.execute(force: true) }.to change { merge_request.metrics.first_comment_at }.to(be_like_time(Time.now))
expect { subject.execute(force: true) }.to change { merge_request.metrics.first_comment_at }.to(be_like_time(Time.zone.now))
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200526115436_dedup_mr_metrics')
RSpec.describe DedupMrMetrics, :migration, schema: 20200526013844 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:merge_requests) { table(:merge_requests) }
let(:metrics) { table(:merge_request_metrics) }
let(:merge_request_params) { { source_branch: 'x', target_branch: 'y', target_project_id: project.id } }
let!(:namespace) { namespaces.create(name: 'foo', path: 'foo') }
let!(:project) { projects.create!(namespace_id: namespace.id) }
let!(:merge_request_1) { merge_requests.create!(merge_request_params) }
let!(:merge_request_2) { merge_requests.create!(merge_request_params) }
let!(:merge_request_3) { merge_requests.create!(merge_request_params) }
let!(:duplicated_metrics_1) { metrics.create(merge_request_id: merge_request_1.id, latest_build_started_at: 1.day.ago, first_deployed_to_production_at: 5.days.ago, updated_at: 2.months.ago) }
let!(:duplicated_metrics_2) { metrics.create(merge_request_id: merge_request_1.id, latest_build_started_at: Time.now, merged_at: Time.now, updated_at: 1.month.ago) }
let!(:duplicated_metrics_3) { metrics.create(merge_request_id: merge_request_3.id, diff_size: 30, commits_count: 20, updated_at: 2.months.ago) }
let!(:duplicated_metrics_4) { metrics.create(merge_request_id: merge_request_3.id, added_lines: 5, commits_count: nil, updated_at: 1.month.ago) }
let!(:non_duplicated_metrics) { metrics.create(merge_request_id: merge_request_2.id, latest_build_started_at: 2.days.ago) }
it 'deduplicates merge_request_metrics table' do
expect { migrate! }.to change { metrics.count }.from(5).to(3)
end
it 'merges `duplicated_metrics_1` with `duplicated_metrics_2`' do
migrate!
expect(metrics.where(id: duplicated_metrics_1.id)).not_to exist
merged_metrics = metrics.find_by(id: duplicated_metrics_2.id)
expect(merged_metrics).to be_present
expect(merged_metrics.latest_build_started_at).to be_like_time(duplicated_metrics_2.latest_build_started_at)
expect(merged_metrics.merged_at).to be_like_time(duplicated_metrics_2.merged_at)
expect(merged_metrics.first_deployed_to_production_at).to be_like_time(duplicated_metrics_1.first_deployed_to_production_at)
end
it 'merges `duplicated_metrics_3` with `duplicated_metrics_4`' do
migrate!
expect(metrics.where(id: duplicated_metrics_3.id)).not_to exist
merged_metrics = metrics.find_by(id: duplicated_metrics_4.id)
expect(merged_metrics).to be_present
expect(merged_metrics.diff_size).to eq(duplicated_metrics_3.diff_size)
expect(merged_metrics.commits_count).to eq(duplicated_metrics_3.commits_count)
expect(merged_metrics.added_lines).to eq(duplicated_metrics_4.added_lines)
end
it 'does not change non duplicated records' do
expect { migrate! }.not_to change { non_duplicated_metrics.reload.attributes }
end
it 'does nothing when there are no metrics' do
metrics.delete_all
migrate!
expect(metrics.count).to eq(0)
end
end
......@@ -280,6 +280,21 @@ RSpec.describe MergeRequest do
expect(MergeRequest::Metrics.count).to eq(1)
end
it 'does not create duplicated metrics records when MR is concurrently updated' do
merge_request = create(:merge_request)
merge_request.metrics.destroy
instance1 = MergeRequest.find(merge_request.id)
instance2 = MergeRequest.find(merge_request.id)
instance1.ensure_metrics
instance2.ensure_metrics
metrics_records = MergeRequest::Metrics.where(merge_request_id: merge_request.id)
expect(metrics_records.size).to eq(1)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment