Commit a7aafeee authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'ab/reindexing-strategy' into 'master'

Adjust reindexing strategy

See merge request gitlab-org/gitlab!65655
parents ed63cf22 291ff03f
# frozen_string_literal: true
class AddIndexTypeToPostgresIndexesView < ActiveRecord::Migration[6.1]
def up
execute(<<~SQL)
DROP VIEW IF EXISTS postgres_indexes;
CREATE VIEW postgres_indexes AS
SELECT (pg_namespace.nspname::text || '.'::text) || i.relname::text AS identifier,
pg_index.indexrelid,
pg_namespace.nspname AS schema,
i.relname AS name,
pg_indexes.tablename,
a.amname AS type,
pg_index.indisunique AS "unique",
pg_index.indisvalid AS valid_index,
i.relispartition AS partitioned,
pg_index.indisexclusion AS exclusion,
pg_index.indexprs IS NOT NULL AS expression,
pg_index.indpred IS NOT NULL AS partial,
pg_indexes.indexdef AS definition,
pg_relation_size(i.oid::regclass) AS ondisk_size_bytes
FROM pg_index
JOIN pg_class i ON i.oid = pg_index.indexrelid
JOIN pg_namespace ON i.relnamespace = pg_namespace.oid
JOIN pg_indexes ON i.relname = pg_indexes.indexname
JOIN pg_am a ON i.relam = a.oid
WHERE pg_namespace.nspname <> 'pg_catalog'::name AND (pg_namespace.nspname = ANY (ARRAY["current_schema"(), 'gitlab_partitions_dynamic'::name, 'gitlab_partitions_static'::name]));
SQL
end
def down
execute(<<~SQL)
DROP VIEW IF EXISTS postgres_indexes;
CREATE VIEW postgres_indexes AS
SELECT (((pg_namespace.nspname)::text || '.'::text) || (pg_class.relname)::text) AS identifier,
pg_index.indexrelid,
pg_namespace.nspname AS schema,
pg_class.relname AS name,
pg_indexes.tablename,
pg_index.indisunique AS "unique",
pg_index.indisvalid AS valid_index,
pg_class.relispartition AS partitioned,
pg_index.indisexclusion AS exclusion,
(pg_index.indexprs IS NOT NULL) AS expression,
(pg_index.indpred IS NOT NULL) AS partial,
pg_indexes.indexdef AS definition,
pg_relation_size((pg_class.oid)::regclass) AS ondisk_size_bytes
FROM (((pg_index
JOIN pg_class ON ((pg_class.oid = pg_index.indexrelid)))
JOIN pg_namespace ON ((pg_class.relnamespace = pg_namespace.oid)))
JOIN pg_indexes ON ((pg_class.relname = pg_indexes.indexname)))
WHERE ((pg_namespace.nspname <> 'pg_catalog'::name) AND (pg_namespace.nspname = ANY (ARRAY["current_schema"(), 'gitlab_partitions_dynamic'::name, 'gitlab_partitions_static'::name])));
SQL
end
end
45ec2dd6113d112050a1ac062064950fa18b3b5903a9fd60234e9e9fa48c7070
\ No newline at end of file
......@@ -16583,23 +16583,25 @@ END AS attrelname
ORDER BY relation_stats.nspname, relation_stats.tblname, relation_stats.idxname;
CREATE VIEW postgres_indexes AS
SELECT (((pg_namespace.nspname)::text || '.'::text) || (pg_class.relname)::text) AS identifier,
SELECT (((pg_namespace.nspname)::text || '.'::text) || (i.relname)::text) AS identifier,
pg_index.indexrelid,
pg_namespace.nspname AS schema,
pg_class.relname AS name,
i.relname AS name,
pg_indexes.tablename,
a.amname AS type,
pg_index.indisunique AS "unique",
pg_index.indisvalid AS valid_index,
pg_class.relispartition AS partitioned,
i.relispartition AS partitioned,
pg_index.indisexclusion AS exclusion,
(pg_index.indexprs IS NOT NULL) AS expression,
(pg_index.indpred IS NOT NULL) AS partial,
pg_indexes.indexdef AS definition,
pg_relation_size((pg_class.oid)::regclass) AS ondisk_size_bytes
FROM (((pg_index
JOIN pg_class ON ((pg_class.oid = pg_index.indexrelid)))
JOIN pg_namespace ON ((pg_class.relnamespace = pg_namespace.oid)))
JOIN pg_indexes ON ((pg_class.relname = pg_indexes.indexname)))
pg_relation_size((i.oid)::regclass) AS ondisk_size_bytes
FROM ((((pg_index
JOIN pg_class i ON ((i.oid = pg_index.indexrelid)))
JOIN pg_namespace ON ((i.relnamespace = pg_namespace.oid)))
JOIN pg_indexes ON ((i.relname = pg_indexes.indexname)))
JOIN pg_am a ON ((i.relam = a.oid)))
WHERE ((pg_namespace.nspname <> 'pg_catalog'::name) AND (pg_namespace.nspname = ANY (ARRAY["current_schema"(), 'gitlab_partitions_dynamic'::name, 'gitlab_partitions_static'::name])));
CREATE VIEW postgres_partitioned_tables AS
......@@ -7,6 +7,7 @@ module Gitlab
self.table_name = 'postgres_indexes'
self.primary_key = 'identifier'
self.inheritance_column = :_type_disabled
has_one :bloat_estimate, class_name: 'Gitlab::Database::PostgresIndexBloatEstimate', foreign_key: :identifier
has_many :reindexing_actions, class_name: 'Gitlab::Database::Reindexing::ReindexAction', foreign_key: :index_identifier
......@@ -22,10 +23,10 @@ module Gitlab
# is defined on a table that is not partitioned.
#
# Deprecated: Switch to scope .reindexing_support
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false, expression: false)}
scope :regular, -> { where(unique: false, partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES)}
# Indexes for reindexing with PG12
scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false) }
scope :reindexing_support, -> { where(partitioned: false, exclusion: false, expression: false, type: Gitlab::Database::Reindexing::SUPPORTED_TYPES) }
scope :not_match, ->(regex) { where("name !~ ?", regex)}
......@@ -43,6 +44,10 @@ module Gitlab
strong_memoize(:bloat_size) { bloat_estimate&.bloat_size || 0 }
end
def relative_bloat_level
bloat_size / ondisk_size_bytes.to_f
end
def to_s
name
end
......
......@@ -6,6 +6,8 @@ module Gitlab
# Number of indexes to reindex per invocation
DEFAULT_INDEXES_PER_INVOCATION = 2
SUPPORTED_TYPES = %w(btree gist).freeze
# candidate_indexes: Array of Gitlab::Database::PostgresIndex
def self.perform(candidate_indexes, how_many: DEFAULT_INDEXES_PER_INVOCATION)
IndexSelection.new(candidate_indexes).take(how_many).each do |index|
......
......@@ -6,6 +6,12 @@ module Gitlab
class IndexSelection
include Enumerable
# Only reindex indexes with a relative bloat level (bloat estimate / size) higher than this
MINIMUM_RELATIVE_BLOAT = 0.2
# Only consider indexes with a total ondisk size in this range (before reindexing)
INDEX_SIZE_RANGE = (1.gigabyte..100.gigabyte).freeze
delegate :each, to: :indexes
def initialize(candidates)
......@@ -24,11 +30,12 @@ module Gitlab
# we force a N+1 pattern here and estimate bloat on a per-index
# basis.
@indexes ||= filter_candidates.sort_by(&:bloat_size).reverse
end
def filter_candidates
candidates.not_recently_reindexed
@indexes ||= candidates
.not_recently_reindexed
.where(ondisk_size_bytes: INDEX_SIZE_RANGE)
.sort_by(&:relative_bloat_level) # forced N+1
.reverse
.select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT }
end
end
end
......
......@@ -10,7 +10,7 @@ module Gitlab
enum state: { started: 0, finished: 1, failed: 2 }
# Amount of time to consider a previous reindexing *recent*
RECENT_THRESHOLD = 7.days
RECENT_THRESHOLD = 10.days
scope :recent, -> { where(state: :finished).where('action_end > ?', Time.zone.now - RECENT_THRESHOLD) }
......
......@@ -61,7 +61,8 @@ module Gitlab
index: index.identifier,
table: index.tablename,
estimated_bloat_bytes: bloat_size,
index_size_before_bytes: ondisk_size_before
index_size_before_bytes: ondisk_size_before,
relative_bloat_level: index.relative_bloat_level
)
duration = Benchmark.realtime do
......@@ -77,6 +78,7 @@ module Gitlab
estimated_bloat_bytes: bloat_size,
index_size_before_bytes: ondisk_size_before,
index_size_after_bytes: index.ondisk_size_bytes,
relative_bloat_level: index.relative_bloat_level,
duration_s: duration.round(2)
)
end
......
......@@ -38,6 +38,12 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it 'only non-expression indexes' do
expect(described_class.regular).to all(have_attributes(expression: false))
end
it 'only btree and gist indexes' do
types = described_class.regular.map(&:type).uniq
expect(types & %w(btree gist)).to eq(types)
end
end
describe '.reindexing_support' do
......@@ -52,6 +58,12 @@ RSpec.describe Gitlab::Database::PostgresIndex do
it 'only non-expression indexes' do
expect(described_class.reindexing_support).to all(have_attributes(expression: false))
end
it 'only btree and gist indexes' do
types = described_class.reindexing_support.map(&:type).uniq
expect(types & %w(btree gist)).to eq(types)
end
end
describe '.not_match' do
......@@ -85,6 +97,16 @@ RSpec.describe Gitlab::Database::PostgresIndex do
end
end
describe '#relative_bloat_level' do
subject { build(:postgres_index, bloat_estimate: bloat_estimate, ondisk_size_bytes: 1024) }
let(:bloat_estimate) { build(:postgres_index_bloat_estimate, bloat_size: 256) }
it 'calculates the relative bloat level' do
expect(subject.relative_bloat_level).to eq(0.25)
end
end
describe '#unique?' do
it 'returns true for a unique index' do
expect(find('public.bar_key')).to be_unique
......
......@@ -10,20 +10,50 @@ RSpec.describe Gitlab::Database::Reindexing::IndexSelection do
before do
swapout_view_for_table(:postgres_index_bloat_estimates)
swapout_view_for_table(:postgres_indexes)
create_list(:postgres_index, 10, ondisk_size_bytes: 10.gigabytes).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 2.gigabyte * (i + 1))
end
end
def execute(sql)
ActiveRecord::Base.connection.execute(sql)
end
it 'orders by highest bloat first' do
create_list(:postgres_index, 10).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
end
it 'orders by highest relative bloat first' do
expected = Gitlab::Database::PostgresIndex.all.sort_by(&:relative_bloat_level).reverse.map(&:name)
expect(subject.map(&:name)).to eq(expected)
end
it 'excludes indexes with a relative bloat level below 20%' do
excluded = create(
:postgres_index_bloat_estimate,
index: create(:postgres_index, ondisk_size_bytes: 10.gigabytes),
bloat_size_bytes: 1.9.gigabyte # 19% relative index bloat
)
expected = Gitlab::Database::PostgresIndexBloatEstimate.order(bloat_size_bytes: :desc).map(&:index)
expect(subject).not_to include(excluded.index)
end
it 'excludes indexes smaller than 1 GB ondisk size' do
excluded = create(
:postgres_index_bloat_estimate,
index: create(:postgres_index, ondisk_size_bytes: 0.99.gigabytes),
bloat_size_bytes: 0.8.gigabyte
)
expect(subject).not_to include(excluded.index)
end
it 'excludes indexes larger than 100 GB ondisk size' do
excluded = create(
:postgres_index_bloat_estimate,
index: create(:postgres_index, ondisk_size_bytes: 101.gigabytes),
bloat_size_bytes: 25.gigabyte
)
expect(subject).to eq(expected)
expect(subject).not_to include(excluded.index)
end
context 'with time frozen' do
......@@ -31,20 +61,17 @@ RSpec.describe Gitlab::Database::Reindexing::IndexSelection do
freeze_time { example.run }
end
it 'does not return indexes with reindex action in the last 7 days' do
not_recently_reindexed = create_list(:postgres_index, 2).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
create(:reindex_action, index: index, action_end: Time.zone.now - 7.days - 1.minute)
it 'does not return indexes with reindex action in the last 10 days' do
not_recently_reindexed = Gitlab::Database::PostgresIndex.all.each do |index|
create(:reindex_action, index: index, action_end: Time.zone.now - 10.days - 1.minute)
end
create_list(:postgres_index, 2).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
create_list(:postgres_index, 10, ondisk_size_bytes: 10.gigabytes).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 2.gigabyte * (i + 1))
create(:reindex_action, index: index, action_end: Time.zone.now)
end
expected = Gitlab::Database::PostgresIndexBloatEstimate.where(identifier: not_recently_reindexed.map(&:identifier)).map(&:index).map(&:identifier).sort
expect(subject.map(&:identifier).sort).to eq(expected)
expect(subject.map(&:name).sort).to eq(not_recently_reindexed.map(&:name).sort)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment