Commit e646b94a authored by Nick Thomas's avatar Nick Thomas

Fix MR diff file counts for some historic data

Some old diffs in the database for GitLab.com are much bigger than
current diff limits permit - one example has around 45K records. The
`files_count` value isn't *that* important, so we can work around this
without more schema changes by using SMALLINT_MAX as a sentinel value.
parent ad844825
......@@ -17,6 +17,10 @@ class MergeRequestDiff < ApplicationRecord
# diffs to external storage
EXTERNAL_DIFF_CUTOFF = 7.days.freeze
# The files_count column is a 2-byte signed integer. Look up the true value
# from the database if this sentinel is seen
FILES_COUNT_SENTINEL = 2**15 - 1
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
......@@ -202,6 +206,17 @@ class MergeRequestDiff < ApplicationRecord
end
end
def files_count
db_value = read_attribute(:files_count)
case db_value
when nil, FILES_COUNT_SENTINEL
merge_request_diff_files.count
else
db_value
end
end
# This method will rely on repository branch sha
# in case start_commit_sha is nil. Its necesarry for old merge request diff
# created before version 8.4 to work
......@@ -423,7 +438,7 @@ class MergeRequestDiff < ApplicationRecord
# external storage. If external storage isn't an option for this diff, the
# method is a no-op.
def migrate_files_to_external_storage!
return if stored_externally? || !use_external_diff? || merge_request_diff_files.count == 0
return if stored_externally? || !use_external_diff? || files_count == 0
rows = build_merge_request_diff_files(merge_request_diff_files)
rows = build_external_merge_request_diff_files(rows)
......@@ -449,7 +464,7 @@ class MergeRequestDiff < ApplicationRecord
# If this diff isn't in external storage, the method is a no-op.
def migrate_files_to_database!
return unless stored_externally?
return if merge_request_diff_files.count == 0
return if files_count == 0
rows = convert_external_diffs_to_database
......@@ -666,7 +681,7 @@ class MergeRequestDiff < ApplicationRecord
def set_count_columns
update_columns(
commits_count: merge_request_diff_commits.size,
files_count: merge_request_diff_files.size
files_count: [FILES_COUNT_SENTINEL, merge_request_diff_files.size].min
)
end
......
---
title: Fix MR diff file counts for some historic data
merge_request: 41676
author:
type: fixed
......@@ -4,13 +4,18 @@ module Gitlab
module BackgroundMigration
# Sets the MergeRequestDiff#files_count value for old rows
class SetMergeRequestDiffFilesCount
COUNT_SUBQUERY = <<~SQL
files_count = (
SELECT count(*)
FROM merge_request_diff_files
WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id
)
SQL
# Some historic data has a *lot* of files. Apply a sentinel to these cases
FILES_COUNT_SENTINEL = 2**15 - 1
def self.count_subquery
<<~SQL
files_count = (
SELECT LEAST(#{FILES_COUNT_SENTINEL}, count(*))
FROM merge_request_diff_files
WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id
)
SQL
end
class MergeRequestDiff < ActiveRecord::Base # rubocop:disable Style/Documentation
include EachBatch
......@@ -20,7 +25,7 @@ module Gitlab
def perform(start_id, end_id)
MergeRequestDiff.where(id: start_id..end_id).each_batch do |relation|
relation.update_all(COUNT_SUBQUERY)
relation.update_all(self.class.count_subquery)
end
end
end
......
......@@ -13,11 +13,11 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem
let(:project) { projects.create!(namespace_id: namespace.id) }
let(:merge_request) { merge_requests.create!(source_branch: 'x', target_branch: 'master', target_project_id: project.id) }
it 'fills the files_count column' do
empty_diff = merge_request_diffs.create!(merge_request_id: merge_request.id)
filled_diff = merge_request_diffs.create!(merge_request_id: merge_request.id)
let!(:empty_diff) { merge_request_diffs.create!(merge_request_id: merge_request.id) }
let!(:filled_diff) { merge_request_diffs.create!(merge_request_id: merge_request.id) }
3.times do |n|
let!(:filled_diff_files) do
1.upto(3).map do |n|
merge_request_diff_files.create!(
merge_request_diff_id: filled_diff.id,
relative_order: n,
......@@ -31,10 +31,21 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem
new_path: ''
)
end
end
it 'fills the files_count column' do
described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(3)
end
it 'uses the sentinel value if the actual count is too high' do
stub_const("#{described_class}::FILES_COUNT_SENTINEL", filled_diff_files.size - 1)
described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(described_class::FILES_COUNT_SENTINEL)
end
end
......@@ -293,6 +293,7 @@ RSpec.describe MergeRequestDiff do
it 'does nothing with an empty diff' do
stub_external_diffs_setting(enabled: true)
MergeRequestDiffFile.where(merge_request_diff_id: diff.id).delete_all
diff.save! # update files_count
expect(diff).not_to receive(:update!)
......@@ -675,8 +676,39 @@ RSpec.describe MergeRequestDiff do
end
describe '#files_count' do
it 'returns number of diff files' do
expect(diff_with_commits.files_count).to eq(diff_with_commits.merge_request_diff_files.count)
let_it_be(:merge_request) { create(:merge_request) }
let(:diff) { merge_request.merge_request_diff }
let(:actual_count) { diff.merge_request_diff_files.count }
it 'is set by default' do
expect(diff.read_attribute(:files_count)).to eq(actual_count)
end
it 'is set to the sentinel value if the actual value exceeds it' do
stub_const("#{described_class}::FILES_COUNT_SENTINEL", actual_count - 1)
diff.save! # update the files_count column with the stub in place
expect(diff.read_attribute(:files_count)).to eq(described_class::FILES_COUNT_SENTINEL)
end
it 'uses the cached count if present' do
diff.update_columns(files_count: actual_count + 1)
expect(diff.files_count).to eq(actual_count + 1)
end
it 'uses the actual count if nil' do
diff.update_columns(files_count: nil)
expect(diff.files_count).to eq(actual_count)
end
it 'uses the actual count if overflown' do
diff.update_columns(files_count: described_class::FILES_COUNT_SENTINEL)
expect(diff.files_count).to eq(actual_count)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment