Commit e646b94a authored by Nick Thomas's avatar Nick Thomas

Fix MR diff file counts for some historic data

Some old diffs in the database for GitLab.com are much bigger than
current diff limits permit - one example has around 45K records. The
`files_count` value isn't *that* important, so we can work around this
without more schema changes by using SMALLINT_MAX as a sentinel value.
parent ad844825
...@@ -17,6 +17,10 @@ class MergeRequestDiff < ApplicationRecord ...@@ -17,6 +17,10 @@ class MergeRequestDiff < ApplicationRecord
# diffs to external storage # diffs to external storage
EXTERNAL_DIFF_CUTOFF = 7.days.freeze EXTERNAL_DIFF_CUTOFF = 7.days.freeze
# The files_count column is a 2-byte signed integer. Look up the true value
# from the database if this sentinel is seen
FILES_COUNT_SENTINEL = 2**15 - 1
belongs_to :merge_request belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff manual_inverse_association :merge_request, :merge_request_diff
...@@ -202,6 +206,17 @@ class MergeRequestDiff < ApplicationRecord ...@@ -202,6 +206,17 @@ class MergeRequestDiff < ApplicationRecord
end end
end end
def files_count
db_value = read_attribute(:files_count)
case db_value
when nil, FILES_COUNT_SENTINEL
merge_request_diff_files.count
else
db_value
end
end
# This method will rely on repository branch sha # This method will rely on repository branch sha
# in case start_commit_sha is nil. Its necesarry for old merge request diff # in case start_commit_sha is nil. Its necesarry for old merge request diff
# created before version 8.4 to work # created before version 8.4 to work
...@@ -423,7 +438,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -423,7 +438,7 @@ class MergeRequestDiff < ApplicationRecord
# external storage. If external storage isn't an option for this diff, the # external storage. If external storage isn't an option for this diff, the
# method is a no-op. # method is a no-op.
def migrate_files_to_external_storage! def migrate_files_to_external_storage!
return if stored_externally? || !use_external_diff? || merge_request_diff_files.count == 0 return if stored_externally? || !use_external_diff? || files_count == 0
rows = build_merge_request_diff_files(merge_request_diff_files) rows = build_merge_request_diff_files(merge_request_diff_files)
rows = build_external_merge_request_diff_files(rows) rows = build_external_merge_request_diff_files(rows)
...@@ -449,7 +464,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -449,7 +464,7 @@ class MergeRequestDiff < ApplicationRecord
# If this diff isn't in external storage, the method is a no-op. # If this diff isn't in external storage, the method is a no-op.
def migrate_files_to_database! def migrate_files_to_database!
return unless stored_externally? return unless stored_externally?
return if merge_request_diff_files.count == 0 return if files_count == 0
rows = convert_external_diffs_to_database rows = convert_external_diffs_to_database
...@@ -666,7 +681,7 @@ class MergeRequestDiff < ApplicationRecord ...@@ -666,7 +681,7 @@ class MergeRequestDiff < ApplicationRecord
def set_count_columns def set_count_columns
update_columns( update_columns(
commits_count: merge_request_diff_commits.size, commits_count: merge_request_diff_commits.size,
files_count: merge_request_diff_files.size files_count: [FILES_COUNT_SENTINEL, merge_request_diff_files.size].min
) )
end end
......
---
title: Fix MR diff file counts for some historic data
merge_request: 41676
author:
type: fixed
...@@ -4,13 +4,18 @@ module Gitlab ...@@ -4,13 +4,18 @@ module Gitlab
module BackgroundMigration module BackgroundMigration
# Sets the MergeRequestDiff#files_count value for old rows # Sets the MergeRequestDiff#files_count value for old rows
class SetMergeRequestDiffFilesCount class SetMergeRequestDiffFilesCount
COUNT_SUBQUERY = <<~SQL # Some historic data has a *lot* of files. Apply a sentinel to these cases
files_count = ( FILES_COUNT_SENTINEL = 2**15 - 1
SELECT count(*)
FROM merge_request_diff_files def self.count_subquery
WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id <<~SQL
) files_count = (
SQL SELECT LEAST(#{FILES_COUNT_SENTINEL}, count(*))
FROM merge_request_diff_files
WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id
)
SQL
end
class MergeRequestDiff < ActiveRecord::Base # rubocop:disable Style/Documentation class MergeRequestDiff < ActiveRecord::Base # rubocop:disable Style/Documentation
include EachBatch include EachBatch
...@@ -20,7 +25,7 @@ module Gitlab ...@@ -20,7 +25,7 @@ module Gitlab
def perform(start_id, end_id) def perform(start_id, end_id)
MergeRequestDiff.where(id: start_id..end_id).each_batch do |relation| MergeRequestDiff.where(id: start_id..end_id).each_batch do |relation|
relation.update_all(COUNT_SUBQUERY) relation.update_all(self.class.count_subquery)
end end
end end
end end
......
...@@ -13,11 +13,11 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem ...@@ -13,11 +13,11 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem
let(:project) { projects.create!(namespace_id: namespace.id) } let(:project) { projects.create!(namespace_id: namespace.id) }
let(:merge_request) { merge_requests.create!(source_branch: 'x', target_branch: 'master', target_project_id: project.id) } let(:merge_request) { merge_requests.create!(source_branch: 'x', target_branch: 'master', target_project_id: project.id) }
it 'fills the files_count column' do let!(:empty_diff) { merge_request_diffs.create!(merge_request_id: merge_request.id) }
empty_diff = merge_request_diffs.create!(merge_request_id: merge_request.id) let!(:filled_diff) { merge_request_diffs.create!(merge_request_id: merge_request.id) }
filled_diff = merge_request_diffs.create!(merge_request_id: merge_request.id)
3.times do |n| let!(:filled_diff_files) do
1.upto(3).map do |n|
merge_request_diff_files.create!( merge_request_diff_files.create!(
merge_request_diff_id: filled_diff.id, merge_request_diff_id: filled_diff.id,
relative_order: n, relative_order: n,
...@@ -31,10 +31,21 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem ...@@ -31,10 +31,21 @@ RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schem
new_path: '' new_path: ''
) )
end end
end
it 'fills the files_count column' do
described_class.new.perform(empty_diff.id, filled_diff.id) described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0) expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(3) expect(filled_diff.reload.files_count).to eq(3)
end end
it 'uses the sentinel value if the actual count is too high' do
stub_const("#{described_class}::FILES_COUNT_SENTINEL", filled_diff_files.size - 1)
described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(described_class::FILES_COUNT_SENTINEL)
end
end end
...@@ -293,6 +293,7 @@ RSpec.describe MergeRequestDiff do ...@@ -293,6 +293,7 @@ RSpec.describe MergeRequestDiff do
it 'does nothing with an empty diff' do it 'does nothing with an empty diff' do
stub_external_diffs_setting(enabled: true) stub_external_diffs_setting(enabled: true)
MergeRequestDiffFile.where(merge_request_diff_id: diff.id).delete_all MergeRequestDiffFile.where(merge_request_diff_id: diff.id).delete_all
diff.save! # update files_count
expect(diff).not_to receive(:update!) expect(diff).not_to receive(:update!)
...@@ -675,8 +676,39 @@ RSpec.describe MergeRequestDiff do ...@@ -675,8 +676,39 @@ RSpec.describe MergeRequestDiff do
end end
describe '#files_count' do describe '#files_count' do
it 'returns number of diff files' do let_it_be(:merge_request) { create(:merge_request) }
expect(diff_with_commits.files_count).to eq(diff_with_commits.merge_request_diff_files.count)
let(:diff) { merge_request.merge_request_diff }
let(:actual_count) { diff.merge_request_diff_files.count }
it 'is set by default' do
expect(diff.read_attribute(:files_count)).to eq(actual_count)
end
it 'is set to the sentinel value if the actual value exceeds it' do
stub_const("#{described_class}::FILES_COUNT_SENTINEL", actual_count - 1)
diff.save! # update the files_count column with the stub in place
expect(diff.read_attribute(:files_count)).to eq(described_class::FILES_COUNT_SENTINEL)
end
it 'uses the cached count if present' do
diff.update_columns(files_count: actual_count + 1)
expect(diff.files_count).to eq(actual_count + 1)
end
it 'uses the actual count if nil' do
diff.update_columns(files_count: nil)
expect(diff.files_count).to eq(actual_count)
end
it 'uses the actual count if overflown' do
diff.update_columns(files_count: described_class::FILES_COUNT_SENTINEL)
expect(diff.files_count).to eq(actual_count)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment