Commit 6c8c11ed authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'loose-fk-sliding-partitioning' into 'master'

Setup sliding partitioning strategy for LFK

See merge request gitlab-org/gitlab!75640
parents 11b57243 257f70de
# frozen_string_literal: true
class LooseForeignKeys::DeletedRecord < ApplicationRecord
PARTITION_DURATION = 1.day
include PartitionedTable
self.primary_key = :id
self.ignored_columns = %i[partition]
partitioned_by :partition, strategy: :sliding_list,
next_partition_if: -> (active_partition) do
return false if Feature.disabled?(:lfk_automatic_partition_creation, default_enabled: :yaml)
oldest_record_in_partition = LooseForeignKeys::DeletedRecord
.select(:id, :created_at)
.for_partition(active_partition)
.order(:id)
.limit(1)
.take
oldest_record_in_partition.present? && oldest_record_in_partition.created_at < PARTITION_DURATION.ago
end,
detach_partition_if: -> (partition) do
return false if Feature.disabled?(:lfk_automatic_partition_dropping, default_enabled: :yaml)
!LooseForeignKeys::DeletedRecord
.for_partition(partition)
.status_pending
.exists?
end
scope :for_table, -> (table) { where(fully_qualified_table_name: table) }
scope :for_partition, -> (partition) { where(partition: partition) }
scope :consume_order, -> { order(:partition, :consume_after, :id) }
enum status: { pending: 1, processed: 2 }, _prefix: :status
def self.load_batch_for_table(table, batch_size)
for_table(table)
# selecting partition as partition_number to workaround the sliding partitioning column ignore
select(arel_table[Arel.star], arel_table[:partition].as('partition_number'))
.for_table(table)
.status_pending
.consume_order
.limit(batch_size)
......@@ -20,9 +50,9 @@ class LooseForeignKeys::DeletedRecord < ApplicationRecord
# Run a query for each partition to optimize the row lookup by primary key (partition, id)
update_count = 0
all_records.group_by(&:partition).each do |partition, records_within_partition|
all_records.group_by(&:partition_number).each do |partition, records_within_partition|
update_count += status_pending
.where(partition: partition)
.for_partition(partition)
.where(id: records_within_partition.pluck(:id))
.update_all(status: :processed)
end
......
---
name: lfk_automatic_partition_creation
introduced_by_url:
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/346907
milestone: '14.6'
type: development
group: group::sharding
default_enabled: false
---
name: lfk_automatic_partition_dropping
introduced_by_url:
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/346908
milestone: '14.6'
type: development
group: group::sharding
default_enabled: false
......@@ -2,7 +2,8 @@
Gitlab::Database::Partitioning.register_models([
AuditEvent,
WebHookLog
WebHookLog,
LooseForeignKeys::DeletedRecord
])
if Gitlab.ee?
......
# frozen_string_literal: true
class MoveLooseFkDeletedRecordsToDynamicSchema < Gitlab::Database::Migration[1.0]
enable_lock_retries!
def up
if table_exists?('gitlab_partitions_static.loose_foreign_keys_deleted_records_1')
execute 'ALTER TABLE gitlab_partitions_static.loose_foreign_keys_deleted_records_1 SET SCHEMA gitlab_partitions_dynamic'
end
end
def down
if table_exists?('gitlab_partitions_dynamic.loose_foreign_keys_deleted_records_1')
execute 'ALTER TABLE gitlab_partitions_dynamic.loose_foreign_keys_deleted_records_1 SET SCHEMA gitlab_partitions_static'
end
end
end
2bca61880005c9303b2ff71747cde64d3418b6ef8ad2a9f114d584f4149e386b
\ No newline at end of file
......@@ -147,6 +147,18 @@ CREATE TABLE incident_management_pending_issue_escalations (
)
PARTITION BY RANGE (process_at);
CREATE TABLE loose_foreign_keys_deleted_records (
id bigint NOT NULL,
partition bigint DEFAULT 1 NOT NULL,
primary_key_value bigint NOT NULL,
status smallint DEFAULT 1 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
fully_qualified_table_name text NOT NULL,
consume_after timestamp with time zone DEFAULT now(),
CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150))
)
PARTITION BY LIST (partition);
CREATE TABLE verification_codes (
created_at timestamp with time zone DEFAULT now() NOT NULL,
visitor_id_code text NOT NULL,
......@@ -1035,39 +1047,6 @@ CREATE TABLE gitlab_partitions_static.analytics_cycle_analytics_merge_request_st
);
ALTER TABLE ONLY analytics_cycle_analytics_merge_request_stage_events ATTACH PARTITION gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_31 FOR VALUES WITH (modulus 32, remainder 31);
CREATE TABLE loose_foreign_keys_deleted_records (
id bigint NOT NULL,
partition bigint DEFAULT 1 NOT NULL,
primary_key_value bigint NOT NULL,
status smallint DEFAULT 1 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
fully_qualified_table_name text NOT NULL,
consume_after timestamp with time zone DEFAULT now(),
CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150))
)
PARTITION BY LIST (partition);
CREATE SEQUENCE loose_foreign_keys_deleted_records_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE loose_foreign_keys_deleted_records_id_seq OWNED BY loose_foreign_keys_deleted_records.id;
CREATE TABLE gitlab_partitions_static.loose_foreign_keys_deleted_records_1 (
id bigint DEFAULT nextval('loose_foreign_keys_deleted_records_id_seq'::regclass) NOT NULL,
partition bigint DEFAULT 1 NOT NULL,
primary_key_value bigint NOT NULL,
status smallint DEFAULT 1 NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
fully_qualified_table_name text NOT NULL,
consume_after timestamp with time zone DEFAULT now(),
CONSTRAINT check_1a541f3235 CHECK ((char_length(fully_qualified_table_name) <= 150))
);
ALTER TABLE ONLY loose_foreign_keys_deleted_records ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1 FOR VALUES IN ('1');
CREATE TABLE product_analytics_events_experimental (
id bigint NOT NULL,
project_id integer NOT NULL,
......@@ -15889,6 +15868,15 @@ CREATE SEQUENCE lists_id_seq
ALTER SEQUENCE lists_id_seq OWNED BY lists.id;
CREATE SEQUENCE loose_foreign_keys_deleted_records_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE loose_foreign_keys_deleted_records_id_seq OWNED BY loose_foreign_keys_deleted_records.id;
CREATE TABLE member_tasks (
id bigint NOT NULL,
member_id bigint NOT NULL,
......@@ -22451,12 +22439,6 @@ ALTER TABLE ONLY gitlab_partitions_static.analytics_cycle_analytics_merge_reques
ALTER TABLE ONLY gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_31
ADD CONSTRAINT analytics_cycle_analytics_merge_request_stage_events_31_pkey PRIMARY KEY (stage_event_hash_id, merge_request_id);
ALTER TABLE ONLY loose_foreign_keys_deleted_records
ADD CONSTRAINT loose_foreign_keys_deleted_records_pkey PRIMARY KEY (partition, id);
ALTER TABLE ONLY gitlab_partitions_static.loose_foreign_keys_deleted_records_1
ADD CONSTRAINT loose_foreign_keys_deleted_records_1_pkey PRIMARY KEY (partition, id);
ALTER TABLE ONLY product_analytics_events_experimental
ADD CONSTRAINT product_analytics_events_experimental_pkey PRIMARY KEY (id, project_id);
......@@ -23546,6 +23528,9 @@ ALTER TABLE ONLY list_user_preferences
ALTER TABLE ONLY lists
ADD CONSTRAINT lists_pkey PRIMARY KEY (id);
ALTER TABLE ONLY loose_foreign_keys_deleted_records
ADD CONSTRAINT loose_foreign_keys_deleted_records_pkey PRIMARY KEY (partition, id);
ALTER TABLE ONLY member_tasks
ADD CONSTRAINT member_tasks_pkey PRIMARY KEY (id);
......@@ -24316,10 +24301,6 @@ CREATE INDEX index_merge_request_stage_events_project_duration ON ONLY analytics
CREATE INDEX index_006f943df6 ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_16 USING btree (stage_event_hash_id, project_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
CREATE INDEX index_loose_foreign_keys_deleted_records_for_partitioned_query ON ONLY loose_foreign_keys_deleted_records USING btree (partition, fully_qualified_table_name, consume_after, id) WHERE (status = 1);
CREATE INDEX index_01e3390fac ON gitlab_partitions_static.loose_foreign_keys_deleted_records_1 USING btree (partition, fully_qualified_table_name, consume_after, id) WHERE (status = 1);
CREATE INDEX index_02749b504c ON gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_11 USING btree (stage_event_hash_id, project_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
CREATE INDEX index_merge_request_stage_events_group_duration ON ONLY analytics_cycle_analytics_merge_request_stage_events USING btree (stage_event_hash_id, group_id, end_event_timestamp, merge_request_id, start_event_timestamp) WHERE (end_event_timestamp IS NOT NULL);
......@@ -26574,6 +26555,8 @@ CREATE INDEX index_lists_on_milestone_id ON lists USING btree (milestone_id);
CREATE INDEX index_lists_on_user_id ON lists USING btree (user_id);
CREATE INDEX index_loose_foreign_keys_deleted_records_for_partitioned_query ON ONLY loose_foreign_keys_deleted_records USING btree (partition, fully_qualified_table_name, consume_after, id) WHERE (status = 1);
CREATE INDEX index_member_tasks_on_member_id ON member_tasks USING btree (member_id);
CREATE UNIQUE INDEX index_member_tasks_on_member_id_and_project_id ON member_tasks USING btree (member_id, project_id);
......@@ -28228,8 +28211,6 @@ ALTER INDEX index_issue_stage_events_project_duration ATTACH PARTITION gitlab_pa
ALTER INDEX index_merge_request_stage_events_project_duration ATTACH PARTITION gitlab_partitions_static.index_006f943df6;
ALTER INDEX index_loose_foreign_keys_deleted_records_for_partitioned_query ATTACH PARTITION gitlab_partitions_static.index_01e3390fac;
ALTER INDEX index_merge_request_stage_events_project_duration ATTACH PARTITION gitlab_partitions_static.index_02749b504c;
ALTER INDEX index_merge_request_stage_events_group_duration ATTACH PARTITION gitlab_partitions_static.index_0287f5ba09;
......@@ -28738,8 +28719,6 @@ ALTER INDEX index_issue_stage_events_project_duration ATTACH PARTITION gitlab_pa
ALTER INDEX index_issue_stage_events_group_in_progress_duration ATTACH PARTITION gitlab_partitions_static.index_ff8741d8d7;
ALTER INDEX loose_foreign_keys_deleted_records_pkey ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1_pkey;
ALTER INDEX index_product_analytics_events_experimental_project_and_time ATTACH PARTITION gitlab_partitions_static.product_analytics_events_expe_project_id_collector_tstamp_idx10;
ALTER INDEX index_product_analytics_events_experimental_project_and_time ATTACH PARTITION gitlab_partitions_static.product_analytics_events_expe_project_id_collector_tstamp_idx11;
......@@ -47,11 +47,16 @@ RSpec.describe Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers do
record_to_be_deleted.delete
expect(LooseForeignKeys::DeletedRecord.count).to eq(1)
deleted_record = LooseForeignKeys::DeletedRecord.all.first
arel_table = LooseForeignKeys::DeletedRecord.arel_table
deleted_record = LooseForeignKeys::DeletedRecord
.select(arel_table[Arel.star], arel_table[:partition].as('partition_number')) # aliasing the ignored partition column to partition_number
.all
.first
expect(deleted_record.primary_key_value).to eq(record_to_be_deleted.id)
expect(deleted_record.fully_qualified_table_name).to eq('public._test_loose_fk_test_table')
expect(deleted_record.partition).to eq(1)
expect(deleted_record.partition_number).to eq(1)
end
it 'stores multiple record deletions' do
......
......@@ -5,31 +5,148 @@ require 'spec_helper'
RSpec.describe LooseForeignKeys::DeletedRecord, type: :model do
let_it_be(:table) { 'public.projects' }
let_it_be(:deleted_record_1) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 5) }
let_it_be(:deleted_record_2) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 1) }
let_it_be(:deleted_record_3) { described_class.create!(partition: 1, fully_qualified_table_name: 'public.other_table', primary_key_value: 3) }
let_it_be(:deleted_record_4) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 1) } # duplicate
describe 'class methods' do
let_it_be(:deleted_record_1) { described_class.create!(fully_qualified_table_name: table, primary_key_value: 5) }
let_it_be(:deleted_record_2) { described_class.create!(fully_qualified_table_name: table, primary_key_value: 1) }
let_it_be(:deleted_record_3) { described_class.create!(fully_qualified_table_name: 'public.other_table', primary_key_value: 3) }
let_it_be(:deleted_record_4) { described_class.create!(fully_qualified_table_name: table, primary_key_value: 1) } # duplicate
describe '.load_batch_for_table' do
it 'loads records and orders them by creation date' do
records = described_class.load_batch_for_table(table, 10)
describe '.load_batch_for_table' do
it 'loads records and orders them by creation date' do
records = described_class.load_batch_for_table(table, 10)
expect(records).to eq([deleted_record_1, deleted_record_2, deleted_record_4])
expect(records).to eq([deleted_record_1, deleted_record_2, deleted_record_4])
end
it 'supports configurable batch size' do
records = described_class.load_batch_for_table(table, 2)
expect(records).to eq([deleted_record_1, deleted_record_2])
end
end
it 'supports configurable batch size' do
records = described_class.load_batch_for_table(table, 2)
describe '.mark_records_processed' do
it 'updates all records' do
records = described_class.load_batch_for_table(table, 10)
described_class.mark_records_processed(records)
expect(records).to eq([deleted_record_1, deleted_record_2])
expect(described_class.status_pending.count).to eq(1)
expect(described_class.status_processed.count).to eq(3)
end
end
end
describe '.mark_records_processed' do
it 'updates all records' do
described_class.mark_records_processed([deleted_record_1, deleted_record_2, deleted_record_4])
describe 'sliding_list partitioning' do
let(:connection) { described_class.connection }
let(:partition_manager) { Gitlab::Database::Partitioning::PartitionManager.new(described_class) }
describe 'next_partition_if callback' do
let(:active_partition) { described_class.partitioning_strategy.active_partition.value }
subject(:value) { described_class.partitioning_strategy.next_partition_if.call(active_partition) }
context 'when the partition is empty' do
it { is_expected.to eq(false) }
end
context 'when the partition has records' do
before do
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 1, status: :processed)
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 2, status: :pending)
end
it { is_expected.to eq(false) }
end
context 'when the first record of the partition is older than PARTITION_DURATION' do
before do
described_class.create!(
fully_qualified_table_name: 'public.table',
primary_key_value: 1,
created_at: (described_class::PARTITION_DURATION + 1.day).ago)
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 2)
end
it { is_expected.to eq(true) }
context 'when the lfk_automatic_partition_creation FF is off' do
before do
stub_feature_flags(lfk_automatic_partition_creation: false)
end
it { is_expected.to eq(false) }
end
end
end
describe 'detach_partition_if callback' do
let(:active_partition) { described_class.partitioning_strategy.active_partition.value }
subject(:value) { described_class.partitioning_strategy.detach_partition_if.call(active_partition) }
context 'when the partition contains unprocessed records' do
before do
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 1, status: :processed)
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 2, status: :pending)
end
it { is_expected.to eq(false) }
end
context 'when the partition contains only processed records' do
before do
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 1, status: :processed)
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 2, status: :processed)
end
it { is_expected.to eq(true) }
context 'when the lfk_automatic_partition_dropping FF is off' do
before do
stub_feature_flags(lfk_automatic_partition_dropping: false)
end
it { is_expected.to eq(false) }
end
end
end
describe 'the behavior of the strategy' do
it 'moves records to new partitions as time passes', :freeze_time do
# We start with partition 1
expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1])
# it's not a day old yet so no new partitions are created
partition_manager.sync_partitions
expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1])
# add one record so the next partition will be created
described_class.create!(fully_qualified_table_name: 'public.table', primary_key_value: 1)
# after traveling forward a day
travel(described_class::PARTITION_DURATION + 1.second)
# a new partition is created
partition_manager.sync_partitions
expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([1, 2])
# and we can insert to the new partition
expect { described_class.create!(fully_qualified_table_name: table, primary_key_value: 5) }.not_to raise_error
# after processing old records
LooseForeignKeys::DeletedRecord.for_partition(1).update_all(status: :processed)
partition_manager.sync_partitions
# the old one is removed
expect(described_class.partitioning_strategy.current_partitions.map(&:value)).to eq([2])
expect(described_class.status_pending.count).to eq(1)
expect(described_class.status_processed.count).to eq(3)
# and we only have the newly created partition left.
expect(described_class.count).to eq(1)
end
end
end
end
......@@ -90,7 +90,7 @@ RSpec.describe LooseForeignKeys::BatchCleanerService do
described_class.new(parent_table: '_test_loose_fk_parent_table',
loose_foreign_key_definitions: loose_foreign_key_definitions,
deleted_parent_records: LooseForeignKeys::DeletedRecord.status_pending.all
deleted_parent_records: LooseForeignKeys::DeletedRecord.load_batch_for_table('public._test_loose_fk_parent_table', 100)
).execute
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment