Commit 5db54a84 authored by Maxime Orefice's avatar Maxime Orefice Committed by Adam Hegyi

Dedup Ci::RunnerProjects

This commit makes sure we get rid of duplicates ci_runner_projects
and creates a unique index to prevent this bug from happening
in the future.

Changelog: fixed
parent d557b47f
# frozen_string_literal: true
class DedupRunnerProjects < Gitlab::Database::Migration[1.0]
TABLE_NAME = :ci_runner_projects
TMP_INDEX_NAME = 'tmp_unique_ci_runner_projects_by_runner_id_and_project_id'
OLD_INDEX_NAME = 'index_ci_runner_projects_on_runner_id_and_project_id'
INDEX_NAME = 'index_unique_ci_runner_projects_on_runner_id_and_project_id'
BATCH_SIZE = 5000
disable_ddl_transaction!
module Ci
class RunnerProject < ActiveRecord::Base
include EachBatch
self.table_name = 'ci_runner_projects'
end
end
def up
last_runner_project_record_id = Ci::RunnerProject.maximum(:id) || 0
# This index will disallow further duplicates while we're deduplicating the data.
add_concurrent_index(TABLE_NAME, [:runner_id, :project_id], where: "id > #{Integer(last_runner_project_record_id)}", unique: true, name: TMP_INDEX_NAME)
Ci::RunnerProject.each_batch(of: BATCH_SIZE) do |relation|
duplicated_runner_projects = Ci::RunnerProject
.select('COUNT(*)', :runner_id, :project_id)
.where('(runner_id, project_id) IN (?)', relation.select(:runner_id, :project_id))
.group(:runner_id, :project_id)
.having('COUNT(*) > 1')
duplicated_runner_projects.each do |runner_project|
deduplicate_item(runner_project)
end
end
add_concurrent_index(TABLE_NAME, [:runner_id, :project_id], unique: true, name: INDEX_NAME)
remove_concurrent_index_by_name(TABLE_NAME, TMP_INDEX_NAME)
remove_concurrent_index_by_name(TABLE_NAME, OLD_INDEX_NAME)
end
def down
add_concurrent_index(TABLE_NAME, [:runner_id, :project_id], name: OLD_INDEX_NAME)
remove_concurrent_index_by_name(TABLE_NAME, TMP_INDEX_NAME)
remove_concurrent_index_by_name(TABLE_NAME, INDEX_NAME)
end
private
def deduplicate_item(runner_project)
runner_projects_records = Ci::RunnerProject
.where(project_id: runner_project.project_id, runner_id: runner_project.runner_id)
.order(updated_at: :asc)
.to_a
attributes = {}
runner_projects_records.each do |runner_projects_record|
params = runner_projects_record.attributes.except('id')
attributes.merge!(params.compact)
end
ApplicationRecord.transaction do
record_to_keep = runner_projects_records.pop
records_to_delete = runner_projects_records
Ci::RunnerProject.where(id: records_to_delete.map(&:id)).delete_all
record_to_keep.update!(attributes)
end
end
end
7f2b3e70e33273d75f68bd1fa33103f24a4e4cfc3f2e5777dfd258b5a2e7bf4e
\ No newline at end of file
......@@ -25712,8 +25712,6 @@ CREATE UNIQUE INDEX index_ci_runner_namespaces_on_runner_id_and_namespace_id ON
CREATE INDEX index_ci_runner_projects_on_project_id ON ci_runner_projects USING btree (project_id);
CREATE INDEX index_ci_runner_projects_on_runner_id_and_project_id ON ci_runner_projects USING btree (runner_id, project_id);
CREATE INDEX index_ci_runners_on_active ON ci_runners USING btree (active, id);
CREATE INDEX index_ci_runners_on_contacted_at_and_id_desc ON ci_runners USING btree (contacted_at, id DESC);
......@@ -27796,6 +27794,8 @@ CREATE INDEX index_u2f_registrations_on_user_id ON u2f_registrations USING btree
CREATE UNIQUE INDEX index_uniq_im_issuable_escalation_statuses_on_issue_id ON incident_management_issuable_escalation_statuses USING btree (issue_id);
CREATE UNIQUE INDEX index_unique_ci_runner_projects_on_runner_id_and_project_id ON ci_runner_projects USING btree (runner_id, project_id);
CREATE UNIQUE INDEX index_unique_issue_metrics_issue_id ON issue_metrics USING btree (issue_id);
CREATE INDEX index_unit_test_failures_failed_at ON ci_unit_test_failures USING btree (failed_at DESC);
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20220124130028_dedup_runner_projects.rb')
RSpec.describe DedupRunnerProjects, :migration, schema: 20220120085655 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:runners) { table(:ci_runners) }
let(:runner_projects) { table(:ci_runner_projects) }
let!(:namespace) { namespaces.create!(name: 'foo', path: 'foo') }
let!(:project) { projects.create!(namespace_id: namespace.id) }
let!(:project_2) { projects.create!(namespace_id: namespace.id) }
let!(:runner) { runners.create!(runner_type: 'project_type') }
let!(:runner_2) { runners.create!(runner_type: 'project_type') }
let!(:runner_3) { runners.create!(runner_type: 'project_type') }
let!(:duplicated_runner_project_1) { runner_projects.create!(runner_id: runner.id, project_id: project.id) }
let!(:duplicated_runner_project_2) { runner_projects.create!(runner_id: runner.id, project_id: project.id) }
let!(:duplicated_runner_project_3) { runner_projects.create!(runner_id: runner_2.id, project_id: project_2.id) }
let!(:duplicated_runner_project_4) { runner_projects.create!(runner_id: runner_2.id, project_id: project_2.id) }
let!(:non_duplicated_runner_project) { runner_projects.create!(runner_id: runner_3.id, project_id: project.id) }
it 'deduplicates ci_runner_projects table' do
expect { migrate! }.to change { runner_projects.count }.from(5).to(3)
end
it 'merges `duplicated_runner_project_1` with `duplicated_runner_project_2`', :aggregate_failures do
migrate!
expect(runner_projects.where(id: duplicated_runner_project_1.id)).not_to(exist)
merged_runner_projects = runner_projects.find_by(id: duplicated_runner_project_2.id)
expect(merged_runner_projects).to be_present
expect(merged_runner_projects.created_at).to be_like_time(duplicated_runner_project_1.created_at)
expect(merged_runner_projects.created_at).to be_like_time(duplicated_runner_project_2.created_at)
end
it 'merges `duplicated_runner_project_3` with `duplicated_runner_project_4`', :aggregate_failures do
migrate!
expect(runner_projects.where(id: duplicated_runner_project_3.id)).not_to(exist)
merged_runner_projects = runner_projects.find_by(id: duplicated_runner_project_4.id)
expect(merged_runner_projects).to be_present
expect(merged_runner_projects.created_at).to be_like_time(duplicated_runner_project_3.created_at)
expect(merged_runner_projects.created_at).to be_like_time(duplicated_runner_project_4.created_at)
end
it 'does not change non duplicated records' do
expect { migrate! }.not_to change { non_duplicated_runner_project.reload.attributes }
end
it 'does nothing when there are no runner projects' do
runner_projects.delete_all
migrate!
expect(runner_projects.count).to eq(0)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment