Commit 77f65fac authored by Yannis Roussos's avatar Yannis Roussos

Merge branch 'ab/track-reltuples' into 'master'

Keep track of total tuple count at creation

See merge request gitlab-org/gitlab!58675
parents ff6a7e96 4fbf2f42
---
title: Track total_tuple_count for batched migrations
merge_request: 58675
author:
type: other
# frozen_string_literal: true
class AddTotalTupleCountToBatchedMigrations < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
add_column :batched_background_migrations, :total_tuple_count, :bigint
end
def down
remove_column :batched_background_migrations, :total_tuple_count
end
end
# frozen_string_literal: true
class BackfillTotalTupleCountForBatchedMigrations < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
return unless should_run?
Gitlab::Database::BackgroundMigration::BatchedMigration.all.each do |migration|
total_tuple_count = Gitlab::Database::PgClass.for_table(migration.table_name)&.cardinality_estimate
migration.update(total_tuple_count: total_tuple_count)
end
end
def down
return unless should_run?
Gitlab::Database::BackgroundMigration::BatchedMigration.update_all(total_tuple_count: nil)
end
private
def should_run?
Gitlab.dev_or_test_env? || Gitlab.com?
end
end
b984ddc5765b6f906a3a2046005e8ad45bab360b095eee7e68c44a8ca5534679
\ No newline at end of file
64011f8d0bcb293bcd36125a52897901421dd18d96129f1514ea360b558f4294
\ No newline at end of file
......@@ -9830,6 +9830,7 @@ CREATE TABLE batched_background_migrations (
table_name text NOT NULL,
column_name text NOT NULL,
job_arguments jsonb DEFAULT '"[]"'::jsonb NOT NULL,
total_tuple_count bigint,
CONSTRAINT check_5bb0382d6f CHECK ((char_length(column_name) <= 63)),
CONSTRAINT check_6b6a06254a CHECK ((char_length(table_name) <= 63)),
CONSTRAINT check_batch_size_in_range CHECK ((batch_size >= sub_batch_size)),
......@@ -3,10 +3,6 @@
module Gitlab
module Database
module Count
class PgClass < ActiveRecord::Base
self.table_name = 'pg_class'
end
# This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
#
# Specifically, it relies on the column reltuples in said table. An additional
......@@ -74,7 +70,7 @@ module Gitlab
def get_statistics(table_names, check_statistics: true)
time = 6.hours.ago
query = PgClass.joins("LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.relid = pg_class.oid")
query = ::Gitlab::Database::PgClass.joins("LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.relid = pg_class.oid")
.where(relname: table_names)
.where('schemaname = current_schema()')
.select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
......
......@@ -190,6 +190,10 @@ module Gitlab
migration_status = batch_max_value.nil? ? :finished : :active
batch_max_value ||= batch_min_value
# We keep track of the estimated number of tuples to reason later
# about the overall progress of a migration.
total_tuple_count = Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate
Gitlab::Database::BackgroundMigration::BatchedMigration.create!(
job_class_name: job_class_name,
table_name: batch_table_name,
......@@ -201,7 +205,8 @@ module Gitlab
batch_size: batch_size,
sub_batch_size: sub_batch_size,
job_arguments: job_arguments,
status: migration_status)
status: migration_status,
total_tuple_count: total_tuple_count)
end
def perform_background_migration_inline?
......
# frozen_string_literal: true
module Gitlab
module Database
class PgClass < ActiveRecord::Base
self.table_name = 'pg_class'
def self.for_table(relname)
joins("LEFT JOIN pg_stat_user_tables ON pg_stat_user_tables.relid = pg_class.oid")
.where('schemaname = current_schema()')
.find_by(relname: relname)
end
def cardinality_estimate
tuples = reltuples.to_i
return if tuples < 1
tuples
end
end
end
end
......@@ -9,5 +9,6 @@ FactoryBot.define do
job_class_name { 'CopyColumnUsingBackgroundMigrationJob' }
table_name { :events }
column_name { :id }
total_tuple_count { 10_000 }
end
end
......@@ -263,7 +263,15 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do
end
describe '#queue_batched_background_migration' do
let(:pgclass_info) { instance_double('Gitlab::Database::PgClass', cardinality_estimate: 42) }
before do
allow(Gitlab::Database::PgClass).to receive(:for_table).and_call_original
end
it 'creates the database record for the migration' do
expect(Gitlab::Database::PgClass).to receive(:for_table).with(:projects).and_return(pgclass_info)
expect do
model.queue_batched_background_migration(
'MyJobClass',
......@@ -288,7 +296,8 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do
batch_size: 100,
sub_batch_size: 10,
job_arguments: %w[],
status: 'active')
status: 'active',
total_tuple_count: pgclass_info.cardinality_estimate)
end
context 'when the job interval is lower than the minimum' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::PgClass, type: :model do
describe '#cardinality_estimate' do
context 'when no information is available' do
subject { described_class.new(reltuples: 0.0).cardinality_estimate }
it 'returns nil for the estimate' do
expect(subject).to be_nil
end
end
context 'with reltuples available' do
subject { described_class.new(reltuples: 42.0).cardinality_estimate }
it 'returns the reltuples for the estimate' do
expect(subject).to eq(42)
end
end
end
describe '.for_table' do
let(:relname) { :projects }
subject { described_class.for_table(relname) }
it 'returns PgClass for this table' do
expect(subject).to be_a(described_class)
end
it 'matches the relname' do
expect(subject.relname).to eq(relname.to_s)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20210406144743_backfill_total_tuple_count_for_batched_migrations.rb')
RSpec.describe BackfillTotalTupleCountForBatchedMigrations, :migration, schema: 20210406140057 do
let_it_be(:table_name) { 'projects' }
let_it_be(:migrations) { table(:batched_background_migrations) }
let_it_be(:migration) do
migrations.create!(
created_at: Time.now,
updated_at: Time.now,
min_value: 1,
max_value: 10_000,
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'Foo',
table_name: table_name,
column_name: :id,
total_tuple_count: nil
)
end
describe '#up' do
before do
expect(Gitlab::Database::PgClass).to receive(:for_table).with(table_name).and_return(estimate)
end
let(:estimate) { double('estimate', cardinality_estimate: 42) }
it 'updates total_tuple_count attribute' do
migrate!
migrations.all.each do |migration|
expect(migration.total_tuple_count).to eq(estimate.cardinality_estimate)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment