Commit 7c5506ec authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'stomlinson/identify-partitions-to-drop' into 'master'

Partition manager identifies partitions to detach

See merge request gitlab-org/gitlab!65093
parents 74159ded 01210db7
......@@ -10,12 +10,12 @@ module PartitionedTable
monthly: Gitlab::Database::Partitioning::MonthlyStrategy
}.freeze
def partitioned_by(partitioning_key, strategy:)
def partitioned_by(partitioning_key, strategy:, **kwargs)
strategy_class = PARTITIONING_STRATEGIES[strategy.to_sym] || raise(ArgumentError, "Unknown partitioning strategy: #{strategy}")
@partitioning_strategy = strategy_class.new(self, partitioning_key)
@partitioning_strategy = strategy_class.new(self, partitioning_key, **kwargs)
Gitlab::Database::Partitioning::PartitionCreator.register(self)
Gitlab::Database::Partitioning::PartitionManager.register(self)
end
end
end
......@@ -9,7 +9,7 @@ class WebHookLog < ApplicationRecord
self.primary_key = :id
partitioned_by :created_at, strategy: :monthly
partitioned_by :created_at, strategy: :monthly, retain_for: 3.months
belongs_to :web_hook
......
......@@ -247,6 +247,15 @@
:idempotent: true
:tags:
- :exclude_from_kubernetes
- :name: cronjob:database_partition_management
:worker_name: Database::PartitionManagementWorker
:feature_category: :database
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:environments_auto_stop_cron
:worker_name: Environments::AutoStopCronWorker
:feature_category: :continuous_delivery
......
# frozen_string_literal: true
module Database
class PartitionManagementWorker
include ApplicationWorker
sidekiq_options retry: 3
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
feature_category :database
idempotent!
def perform
Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions
ensure
Gitlab::Database::Partitioning::PartitionMonitoring.new.report_metrics
end
end
end
......@@ -10,8 +10,7 @@ class PartitionCreationWorker
idempotent!
def perform
Gitlab::Database::Partitioning::PartitionCreator.new.create_partitions
ensure
Gitlab::Database::Partitioning::PartitionMonitoring.new.report_metrics
# This worker has been removed in favor of Database::PartitionManagementWorker
Database::PartitionManagementWorker.new.perform
end
end
---
name: partition_pruning_dry_run
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65093
rollout_issue_url:
milestone: '14.1'
type: development
group: group::database
default_enabled: false
......@@ -540,9 +540,9 @@ Settings.cron_jobs['authorized_project_update_periodic_recalculate_worker']['job
Settings.cron_jobs['update_container_registry_info_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['update_container_registry_info_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['update_container_registry_info_worker']['job_class'] = 'UpdateContainerRegistryInfoWorker'
Settings.cron_jobs['postgres_dynamic_partitions_creator'] ||= Settingslogic.new({})
Settings.cron_jobs['postgres_dynamic_partitions_creator']['cron'] ||= '21 */6 * * *'
Settings.cron_jobs['postgres_dynamic_partitions_creator']['job_class'] ||= 'PartitionCreationWorker'
Settings.cron_jobs['postgres_dynamic_partitions_manager'] ||= Settingslogic.new({})
Settings.cron_jobs['postgres_dynamic_partitions_manager']['cron'] ||= '21 */6 * * *'
Settings.cron_jobs['postgres_dynamic_partitions_manager']['job_class'] ||= 'Database::PartitionManagementWorker'
Settings.cron_jobs['ci_platform_metrics_update_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['cron'] ||= '47 9 * * *'
Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['job_class'] = 'CiPlatformMetricsUpdateCronWorker'
......
......@@ -3,15 +3,15 @@
# Make sure we have loaded partitioned models here
# (even with eager loading disabled).
Gitlab::Database::Partitioning::PartitionCreator.register(AuditEvent)
Gitlab::Database::Partitioning::PartitionCreator.register(WebHookLog)
Gitlab::Database::Partitioning::PartitionManager.register(AuditEvent)
Gitlab::Database::Partitioning::PartitionManager.register(WebHookLog)
if Gitlab.ee?
Gitlab::Database::Partitioning::PartitionCreator.register(IncidentManagement::PendingEscalations::Alert)
Gitlab::Database::Partitioning::PartitionManager.register(IncidentManagement::PendingEscalations::Alert)
end
begin
Gitlab::Database::Partitioning::PartitionCreator.new.create_partitions unless ENV['DISABLE_POSTGRES_PARTITION_CREATION_ON_STARTUP']
Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions unless ENV['DISABLE_POSTGRES_PARTITION_CREATION_ON_STARTUP']
rescue ActiveRecord::ActiveRecordError, PG::Error
# ignore - happens when Rake tasks yet have to create a database, e.g. for testing
end
......@@ -4,16 +4,17 @@ module Gitlab
module Database
module Partitioning
class MonthlyStrategy
attr_reader :model, :partitioning_key
attr_reader :model, :partitioning_key, :retain_for
# We create this many partitions in the future
HEADROOM = 6.months
delegate :table_name, to: :model
def initialize(model, partitioning_key)
def initialize(model, partitioning_key, retain_for: nil)
@model = model
@partitioning_key = partitioning_key
@retain_for = retain_for
end
def current_partitions
......@@ -27,13 +28,21 @@ module Gitlab
desired_partitions - current_partitions
end
def extra_partitions
current_partitions - desired_partitions
end
private
def desired_partitions
[].tap do |parts|
min_date, max_date = relevant_range
if pruning_old_partitions? && min_date <= oldest_active_date
min_date = oldest_active_date.beginning_of_month
else
parts << partition_for(upper_bound: min_date)
end
while min_date < max_date
next_date = min_date.next_month
......@@ -52,13 +61,17 @@ module Gitlab
# to start from MINVALUE to a specific date `x`. The range returned
# does not include the range of the first, half-unbounded partition.
def relevant_range
if first_partition = current_partitions.min
if (first_partition = current_partitions.min)
# Case 1: First partition starts with MINVALUE, i.e. from is nil -> start with first real partition
# Case 2: Rather unexpectedly, first partition does not start with MINVALUE, i.e. from is not nil
# In this case, use first partition beginning as a start
min_date = first_partition.from || first_partition.to
end
if pruning_old_partitions?
min_date ||= oldest_active_date
end
# In case we don't have a partition yet
min_date ||= Date.today
min_date = min_date.beginning_of_month
......@@ -72,6 +85,14 @@ module Gitlab
TimePartition.new(table_name, lower_bound, upper_bound)
end
def pruning_old_partitions?
Feature.enabled?(:partition_pruning_dry_run) && retain_for.present?
end
def oldest_active_date
(Date.today - retain_for).beginning_of_month
end
def connection
ActiveRecord::Base.connection
end
......
......@@ -3,7 +3,7 @@
module Gitlab
module Database
module Partitioning
class PartitionCreator
class PartitionManager
def self.register(model)
raise ArgumentError, "Only models with a #partitioning_strategy can be registered." unless model.respond_to?(:partitioning_strategy)
......@@ -15,7 +15,7 @@ module Gitlab
end
LEASE_TIMEOUT = 1.minute
LEASE_KEY = 'database_partition_creation_%s'
MANAGEMENT_LEASE_KEY = 'database_partition_management_%s'
attr_reader :models
......@@ -23,23 +23,25 @@ module Gitlab
@models = models
end
def create_partitions
def sync_partitions
Gitlab::AppLogger.info("Checking state of dynamic postgres partitions")
models.each do |model|
# Double-checking before getting the lease:
# The prevailing situation is no missing partitions
next if missing_partitions(model).empty?
# The prevailing situation is no missing partitions and no extra partitions
next if missing_partitions(model).empty? && extra_partitions(model).empty?
only_with_exclusive_lease(model) do
only_with_exclusive_lease(model, lease_key: MANAGEMENT_LEASE_KEY) do
partitions_to_create = missing_partitions(model)
create(partitions_to_create) unless partitions_to_create.empty?
next if partitions_to_create.empty?
create(model, partitions_to_create)
if Feature.enabled?(:partition_pruning_dry_run)
partitions_to_detach = extra_partitions(model)
detach(partitions_to_detach) unless partitions_to_detach.empty?
end
end
rescue StandardError => e
Gitlab::AppLogger.error("Failed to create partition(s) for #{model.table_name}: #{e.class}: #{e.message}")
Gitlab::AppLogger.error("Failed to create / detach partition(s) for #{model.table_name}: #{e.class}: #{e.message}")
end
end
......@@ -51,15 +53,22 @@ module Gitlab
model.partitioning_strategy.missing_partitions
end
def only_with_exclusive_lease(model)
lease = Gitlab::ExclusiveLease.new(LEASE_KEY % model.table_name, timeout: LEASE_TIMEOUT)
def extra_partitions(model)
return [] unless Feature.enabled?(:partition_pruning_dry_run)
return [] unless connection.table_exists?(model.table_name)
model.partitioning_strategy.extra_partitions
end
def only_with_exclusive_lease(model, lease_key:)
lease = Gitlab::ExclusiveLease.new(lease_key % model.table_name, timeout: LEASE_TIMEOUT)
yield if lease.try_obtain
ensure
lease&.cancel
end
def create(model, partitions)
def create(partitions)
connection.transaction do
with_lock_retries do
partitions.each do |partition|
......@@ -71,6 +80,18 @@ module Gitlab
end
end
def detach(partitions)
connection.transaction do
with_lock_retries do
partitions.each { |p| detach_one_partition(p) }
end
end
end
def detach_one_partition(partition)
Gitlab::AppLogger.info("Planning to detach #{partition.partition_name} for table #{partition.table}")
end
def with_lock_retries(&block)
Gitlab::Database::WithLockRetries.new(
klass: self.class,
......
......@@ -6,7 +6,7 @@ module Gitlab
class PartitionMonitoring
attr_reader :models
def initialize(models = PartitionCreator.models)
def initialize(models = PartitionManager.models)
@models = models
end
......
......@@ -118,7 +118,7 @@ namespace :gitlab do
desc 'Create missing dynamic database partitions'
task create_dynamic_partitions: :environment do
Gitlab::Database::Partitioning::PartitionCreator.new.create_partitions
Gitlab::Database::Partitioning::PartitionManager.new.sync_partitions
end
# This is targeted towards deploys and upgrades of GitLab.
......
......@@ -71,6 +71,18 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
model.create!(created_at: Date.parse('2020-06-15'))
end
context 'when pruning partitions before June 2020' do
subject { described_class.new(model, partitioning_key, retain_for: 1.month).missing_partitions }
it 'does not include the missing partition from May 2020 because it would be dropped' do
expect(subject).not_to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01'))
end
it 'detects the missing partition for 1 month ago (July 2020)' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-07-01', '2020-08-01'))
end
end
it 'detects the gap and the missing partition in May 2020' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01'))
end
......@@ -108,6 +120,19 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
SQL
end
context 'when pruning partitions before June 2020' do
subject { described_class.new(model, partitioning_key, retain_for: 1.month).missing_partitions }
it 'detects exactly the set of partitions from June 2020 to March 2021' do
months = %w[2020-07-01 2020-08-01 2020-09-01 2020-10-01 2020-11-01 2020-12-01 2021-01-01 2021-02-01 2021-03-01]
expected = months[..-2].zip(months.drop(1)).map do |(from, to)|
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, from, to)
end
expect(subject).to match_array(expected)
end
end
it 'detects the missing catch-all partition at the beginning' do
expect(subject).to include(Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-08-01'))
end
......@@ -150,4 +175,100 @@ RSpec.describe Gitlab::Database::Partitioning::MonthlyStrategy do
end
end
end
describe '#extra_partitions' do
let(:model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'partitioned_test'
self.primary_key = :id
end
end
let(:partitioning_key) { :created_at }
let(:table_name) { :partitioned_test }
around do |example|
travel_to(Date.parse('2020-08-22')) { example.run }
end
describe 'with existing partitions' do
before do
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE TABLE #{table_name}
(id serial not null, created_at timestamptz not null, PRIMARY KEY (id, created_at))
PARTITION BY RANGE (created_at);
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_000000
PARTITION OF #{table_name}
FOR VALUES FROM (MINVALUE) TO ('2020-05-01');
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_202005
PARTITION OF #{table_name}
FOR VALUES FROM ('2020-05-01') TO ('2020-06-01');
CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.partitioned_test_202006
PARTITION OF #{table_name}
FOR VALUES FROM ('2020-06-01') TO ('2020-07-01')
SQL
end
context 'without a time retention policy' do
subject { described_class.new(model, partitioning_key).extra_partitions }
it 'has no extra partitions to prune' do
expect(subject).to eq([])
end
end
context 'with a time retention policy that excludes no partitions' do
subject { described_class.new(model, partitioning_key, retain_for: 4.months).extra_partitions }
it 'has no extra partitions to prune' do
expect(subject).to eq([])
end
end
context 'with a time retention policy of 3 months' do
subject { described_class.new(model, partitioning_key, retain_for: 3.months).extra_partitions }
it 'prunes the unbounded partition ending 2020-05-01' do
min_value_to_may = Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-05-01',
partition_name: 'partitioned_test_000000')
expect(subject).to contain_exactly(min_value_to_may)
end
context 'when the feature flag is toggled off' do
before do
stub_feature_flags(partition_pruning_dry_run: false)
end
it 'is empty' do
expect(subject).to eq([])
end
end
end
context 'with a time retention policy of 2 months' do
subject { described_class.new(model, partitioning_key, retain_for: 2.months).extra_partitions }
it 'prunes the unbounded partition and the partition for May-June' do
expect(subject).to contain_exactly(
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, nil, '2020-05-01', partition_name: 'partitioned_test_000000'),
Gitlab::Database::Partitioning::TimePartition.new(model.table_name, '2020-05-01', '2020-06-01', partition_name: 'partitioned_test_202005')
)
end
context 'when the feature flag is toggled off' do
before do
stub_feature_flags(partition_pruning_dry_run: false)
end
it 'is empty' do
expect(subject).to eq([])
end
end
end
end
end
end
......@@ -2,8 +2,9 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::Partitioning::PartitionCreator do
RSpec.describe Gitlab::Database::Partitioning::PartitionManager do
include Database::PartitioningHelpers
include Database::TableSchemaHelpers
include ExclusiveLeaseHelpers
describe '.register' do
......@@ -14,12 +15,12 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionCreator do
end
end
describe '#create_partitions (mocked)' do
subject { described_class.new(models).create_partitions }
context 'creating partitions (mocked)' do
subject(:sync_partitions) { described_class.new(models).sync_partitions }
let(:models) { [model] }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table) }
let(:partitioning_strategy) { double(missing_partitions: partitions) }
let(:partitioning_strategy) { double(missing_partitions: partitions, extra_partitions: []) }
let(:table) { "some_table" }
before do
......@@ -27,7 +28,7 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionCreator do
allow(ActiveRecord::Base.connection).to receive(:table_exists?).with(table).and_return(true)
allow(ActiveRecord::Base.connection).to receive(:execute).and_call_original
stub_exclusive_lease(described_class::LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT)
stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT)
end
let(:partitions) do
......@@ -41,7 +42,7 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionCreator do
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.first.to_sql)
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.second.to_sql)
subject
sync_partitions
end
context 'error handling with 2 models' do
......@@ -52,21 +53,21 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionCreator do
]
end
let(:strategy1) { double('strategy1', missing_partitions: nil) }
let(:strategy2) { double('strategy2', missing_partitions: partitions) }
let(:strategy1) { double('strategy1', missing_partitions: nil, extra_partitions: []) }
let(:strategy2) { double('strategy2', missing_partitions: partitions, extra_partitions: []) }
it 'still creates partitions for the second table' do
expect(strategy1).to receive(:missing_partitions).and_raise('this should never happen (tm)')
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.first.to_sql)
expect(ActiveRecord::Base.connection).to receive(:execute).with(partitions.second.to_sql)
subject
sync_partitions
end
end
end
describe '#create_partitions' do
subject { described_class.new([my_model]).create_partitions }
context 'creating partitions' do
subject(:sync_partitions) { described_class.new([my_model]).sync_partitions }
let(:connection) { ActiveRecord::Base.connection }
let(:my_model) do
......@@ -88,9 +89,73 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionCreator do
end
it 'creates partitions' do
expect { subject }.to change { find_partitions(my_model.table_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA).size }.from(0)
expect { sync_partitions }.to change { find_partitions(my_model.table_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA).size }.from(0)
end
end
context 'detaching partitions (mocked)' do
subject(:sync_partitions) { manager.sync_partitions }
let(:manager) { described_class.new(models) }
let(:models) { [model] }
let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table)}
let(:partitioning_strategy) { double(extra_partitions: extra_partitions, missing_partitions: []) }
let(:table) { "foo" }
before do
allow(ActiveRecord::Base.connection).to receive(:table_exists?).and_call_original
allow(ActiveRecord::Base.connection).to receive(:table_exists?).with(table).and_return(true)
stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT)
end
let(:extra_partitions) do
[
instance_double(Gitlab::Database::Partitioning::TimePartition, table: table, partition_name: 'foo1'),
instance_double(Gitlab::Database::Partitioning::TimePartition, table: table, partition_name: 'foo2')
]
end
context 'with the partition_pruning_dry_run feature flag enabled' do
before do
stub_feature_flags(partition_pruning_dry_run: true)
end
it 'detaches each extra partition' do
extra_partitions.each { |p| expect(manager).to receive(:detach_one_partition).with(p) }
subject
sync_partitions
end
context 'error handling' do
let(:models) do
[
double(partitioning_strategy: error_strategy, table_name: table),
model
]
end
let(:error_strategy) { double(extra_partitions: nil, missing_partitions: []) }
it 'still drops partitions for the other model' do
expect(error_strategy).to receive(:extra_partitions).and_raise('injected error!')
extra_partitions.each { |p| expect(manager).to receive(:detach_one_partition).with(p) }
sync_partitions
end
end
end
context 'with the partition_pruning_dry_run feature flag disabled' do
before do
stub_feature_flags(partition_pruning_dry_run: false)
end
it 'returns immediately' do
expect(manager).not_to receive(:detach)
sync_partitions
end
end
end
end
......@@ -14,6 +14,16 @@ RSpec.describe PartitionedTable do
end
end
context 'with keyword arguments passed to the strategy' do
subject { my_class.partitioned_by(key, strategy: :monthly, retain_for: 3.months) }
it 'passes the keyword arguments to the strategy' do
expect(Gitlab::Database::Partitioning::MonthlyStrategy).to receive(:new).with(my_class, key, retain_for: 3.months).and_call_original
subject
end
end
it 'assigns the MonthlyStrategy as the partitioning strategy' do
subject
......@@ -27,7 +37,7 @@ RSpec.describe PartitionedTable do
end
it 'registers itself with the PartitionCreator' do
expect(Gitlab::Database::Partitioning::PartitionCreator).to receive(:register).with(my_class)
expect(Gitlab::Database::Partitioning::PartitionManager).to receive(:register).with(my_class)
subject
end
......
# frozen_string_literal: true
require "spec_helper"
RSpec.describe Database::PartitionManagementWorker do
describe '#perform' do
subject { described_class.new.perform }
let(:manager) { instance_double('PartitionManager', sync_partitions: nil) }
let(:monitoring) { instance_double('PartitionMonitoring', report_metrics: nil) }
before do
allow(Gitlab::Database::Partitioning::PartitionManager).to receive(:new).and_return(manager)
allow(Gitlab::Database::Partitioning::PartitionMonitoring).to receive(:new).and_return(monitoring)
end
it 'delegates to PartitionManager' do
expect(manager).to receive(:sync_partitions)
subject
end
it 'reports partition metrics' do
expect(monitoring).to receive(:report_metrics)
subject
end
end
end
# frozen_string_literal: true
require "spec_helper"
#
require 'spec_helper'
RSpec.describe PartitionCreationWorker do
describe '#perform' do
subject { described_class.new.perform }
let(:creator) { instance_double('PartitionCreator', create_partitions: nil) }
let(:monitoring) { instance_double('PartitionMonitoring', report_metrics: nil) }
before do
allow(Gitlab::Database::Partitioning::PartitionCreator).to receive(:new).and_return(creator)
allow(Gitlab::Database::Partitioning::PartitionMonitoring).to receive(:new).and_return(monitoring)
end
it 'delegates to PartitionCreator' do
expect(creator).to receive(:create_partitions)
let(:management_worker) { double }
subject
end
it 'reports partition metrics' do
expect(monitoring).to receive(:report_metrics)
describe '#perform' do
it 'forwards to the Database::PartitionManagementWorker' do
expect(Database::PartitionManagementWorker).to receive(:new).and_return(management_worker)
expect(management_worker).to receive(:perform)
subject
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment