Commit 2d15b97d authored by Andreas Brandl's avatar Andreas Brandl

Expand index selection strategy for reindexing

We change strategy to select indexes for reindexing:

1. Consider relative bloat levels instead of absolute bloat
1. Exclude GIN indexes (risk for GitLab.com)
1. Exclude indexes with relative bloat level < 20%
1. Exclude indexes < 1 GB ondisk size (not worth it for now)
1. Exclude indexes > 100 GB ondisk size (risk for GitLab.com)
1. Don't reindex an index twice within 10 days

Together with adding support for unique indexex in
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64695, this
aims to distribute reindexing actions better across indexes and exclude
the risky ones.

Relates to:

- https://gitlab.com/gitlab-org/gitlab/-/issues/335295
- https://gitlab.com/gitlab-org/gitlab/-/issues/335214
- https://gitlab.com/gitlab-org/gitlab/-/issues/335211

Related discussion in :

-
https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5069#note_618889225
parent 1f3e6ecf
......@@ -44,6 +44,10 @@ module Gitlab
strong_memoize(:bloat_size) { bloat_estimate&.bloat_size || 0 }
end
def relative_bloat_level
bloat_size / ondisk_size_bytes.to_f
end
def to_s
name
end
......
......@@ -6,6 +6,12 @@ module Gitlab
class IndexSelection
include Enumerable
# Only reindex indexes with a relative bloat level (bloat estimate / size) higher than this
MINIMUM_RELATIVE_BLOAT = 0.2
# Only consider indexes with a total ondisk size in this range (before reindexing)
INDEX_SIZE_RANGE = (1.gigabyte..100.gigabyte).freeze
delegate :each, to: :indexes
def initialize(candidates)
......@@ -24,11 +30,12 @@ module Gitlab
# we force a N+1 pattern here and estimate bloat on a per-index
# basis.
@indexes ||= filter_candidates.sort_by(&:bloat_size).reverse
end
def filter_candidates
candidates.not_recently_reindexed
@indexes ||= candidates
.not_recently_reindexed
.where(ondisk_size_bytes: INDEX_SIZE_RANGE)
.sort_by(&:relative_bloat_level) # forced N+1
.reverse
.select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT }
end
end
end
......
......@@ -97,6 +97,16 @@ RSpec.describe Gitlab::Database::PostgresIndex do
end
end
describe '#relative_bloat_level' do
subject { build(:postgres_index, bloat_estimate: bloat_estimate, ondisk_size_bytes: 1024) }
let(:bloat_estimate) { build(:postgres_index_bloat_estimate, bloat_size: 256) }
it 'calculates the relative bloat level' do
expect(subject.relative_bloat_level).to eq(0.25)
end
end
describe '#unique?' do
it 'returns true for a unique index' do
expect(find('public.bar_key')).to be_unique
......
......@@ -10,20 +10,50 @@ RSpec.describe Gitlab::Database::Reindexing::IndexSelection do
before do
swapout_view_for_table(:postgres_index_bloat_estimates)
swapout_view_for_table(:postgres_indexes)
create_list(:postgres_index, 10, ondisk_size_bytes: 10.gigabytes).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 2.gigabyte * (i + 1))
end
end
def execute(sql)
ActiveRecord::Base.connection.execute(sql)
end
it 'orders by highest bloat first' do
create_list(:postgres_index, 10).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
it 'orders by highest relative bloat first' do
expected = Gitlab::Database::PostgresIndex.all.sort_by(&:relative_bloat_level).reverse.map(&:name)
expect(subject.map(&:name)).to eq(expected)
end
it 'excludes indexes with a relative bloat level below 20%' do
excluded = create(
:postgres_index_bloat_estimate,
index: create(:postgres_index, ondisk_size_bytes: 10.gigabytes),
bloat_size_bytes: 1.9.gigabyte # 19% relative index bloat
)
expect(subject).not_to include(excluded.index)
end
expected = Gitlab::Database::PostgresIndexBloatEstimate.order(bloat_size_bytes: :desc).map(&:index)
it 'excludes indexes smaller than 1 GB ondisk size' do
excluded = create(
:postgres_index_bloat_estimate,
index: create(:postgres_index, ondisk_size_bytes: 0.99.gigabytes),
bloat_size_bytes: 0.8.gigabyte
)
expect(subject).to eq(expected)
expect(subject).not_to include(excluded.index)
end
it 'excludes indexes larger than 100 GB ondisk size' do
excluded = create(
:postgres_index_bloat_estimate,
index: create(:postgres_index, ondisk_size_bytes: 101.gigabytes),
bloat_size_bytes: 25.gigabyte
)
expect(subject).not_to include(excluded.index)
end
context 'with time frozen' do
......@@ -31,20 +61,17 @@ RSpec.describe Gitlab::Database::Reindexing::IndexSelection do
freeze_time { example.run }
end
it 'does not return indexes with reindex action in the last 7 days' do
not_recently_reindexed = create_list(:postgres_index, 2).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
create(:reindex_action, index: index, action_end: Time.zone.now - 7.days - 1.minute)
it 'does not return indexes with reindex action in the last 10 days' do
not_recently_reindexed = Gitlab::Database::PostgresIndex.all.each do |index|
create(:reindex_action, index: index, action_end: Time.zone.now - 10.days - 1.minute)
end
create_list(:postgres_index, 2).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i)
create_list(:postgres_index, 10, ondisk_size_bytes: 10.gigabytes).each_with_index do |index, i|
create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 2.gigabyte * (i + 1))
create(:reindex_action, index: index, action_end: Time.zone.now)
end
expected = Gitlab::Database::PostgresIndexBloatEstimate.where(identifier: not_recently_reindexed.map(&:identifier)).map(&:index).map(&:identifier).sort
expect(subject.map(&:identifier).sort).to eq(expected)
expect(subject.map(&:name).sort).to eq(not_recently_reindexed.map(&:name).sort)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment