Commit 71774e4d authored by Markus Koller's avatar Markus Koller

Merge branch 'pks-file-size-check-bulk-load-new-blobs' into 'master'

push_rules: Implement bulk-checking of file sizes

See merge request gitlab-org/gitlab!69449
parents 0955f541 8a5681f2
---
name: new_blobs_via_list_blobs
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/68935
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/339637
milestone: '14.3'
type: development
group: group::gitaly
default_enabled: false
...@@ -13,20 +13,16 @@ module EE ...@@ -13,20 +13,16 @@ module EE
logger.log_timed(LOG_MESSAGE) do logger.log_timed(LOG_MESSAGE) do
max_file_size = push_rule.max_file_size max_file_size = push_rule.max_file_size
changes_access.changes.each do |change| newrevs = changes_access.changes.map { |change| change[:newrev] }
newrev = change[:newrev]
next if newrev.blank? || ::Gitlab::Git.blank_ref?(newrev) blobs = project.repository.new_blobs(newrevs, dynamic_timeout: logger.time_left)
blobs = project.repository.new_blobs(newrev, dynamic_timeout: logger.time_left) large_blob = blobs.find do |blob|
::Gitlab::Utils.bytes_to_megabytes(blob.size) > max_file_size
large_blob = blobs.find do |blob| end
::Gitlab::Utils.bytes_to_megabytes(blob.size) > max_file_size
end
if large_blob if large_blob
raise ::Gitlab::GitAccess::ForbiddenError, %Q{File "#{large_blob.path}" is larger than the allowed size of #{max_file_size} MB} raise ::Gitlab::GitAccess::ForbiddenError, %Q{File "#{large_blob.path}" is larger than the allowed size of #{max_file_size} MB}
end
end end
end end
end end
......
...@@ -360,18 +360,17 @@ module Gitlab ...@@ -360,18 +360,17 @@ module Gitlab
end end
end end
def new_blobs(newrev, dynamic_timeout: nil) def new_blobs(newrevs, dynamic_timeout: nil)
return [] if newrev.blank? || newrev == ::Gitlab::Git::BLANK_SHA newrevs = Array.wrap(newrevs).reject { |rev| rev.blank? || rev == ::Gitlab::Git::BLANK_SHA }
return [] if newrevs.empty?
strong_memoize("new_blobs_#{newrev}") do newrevs = newrevs.uniq.sort
if Feature.enabled?(:new_blobs_via_list_blobs)
blobs(['--not', '--all', '--not', newrev], with_paths: true, dynamic_timeout: dynamic_timeout) @new_blobs ||= Hash.new do |h, revs|
else h[revs] = blobs(['--not', '--all', '--not'] + newrevs, with_paths: true, dynamic_timeout: dynamic_timeout)
wrapped_gitaly_errors do
gitaly_ref_client.list_new_blobs(newrev, REV_LIST_COMMIT_LIMIT, dynamic_timeout: dynamic_timeout)
end
end
end end
@new_blobs[newrevs]
end end
# List blobs reachable via a set of revisions. Supports the # List blobs reachable via a set of revisions. Supports the
......
...@@ -939,15 +939,20 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do ...@@ -939,15 +939,20 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
describe '#new_blobs' do describe '#new_blobs' do
let(:repository) { mutable_repository } let(:repository) { mutable_repository }
let(:repository_rugged) { mutable_repository_rugged } let(:repository_rugged) { mutable_repository_rugged }
let(:new_blob) do let(:blob) { create_blob('This is a new blob') }
repository_rugged.write('This is a new blob', :blob) let(:commit) { create_commit('nested/new-blob.txt' => blob) }
def create_blob(content)
repository_rugged.write(content, :blob)
end end
let(:new_commit) do def create_commit(blobs)
author = { name: 'Test User', email: 'mail@example.com', time: Time.now } author = { name: 'Test User', email: 'mail@example.com', time: Time.now }
index = repository_rugged.index index = repository_rugged.index
index.add(path: 'nested/new-blob.txt', oid: new_blob, mode: 0100644) blobs.each do |path, oid|
index.add(path: path, oid: oid, mode: 0100644)
end
Rugged::Commit.create(repository_rugged, Rugged::Commit.create(repository_rugged,
author: author, author: author,
...@@ -957,52 +962,131 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do ...@@ -957,52 +962,131 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
tree: index.write_tree(repository_rugged)) tree: index.write_tree(repository_rugged))
end end
subject { repository.new_blobs(new_commit).to_a } subject { repository.new_blobs(newrevs).to_a }
context 'with :new_blobs_via_list_blobs enabled' do shared_examples '#new_blobs with revisions' do
before do before do
stub_feature_flags(new_blobs_via_list_blobs: true)
expect_next_instance_of(Gitlab::GitalyClient::BlobService) do |service| expect_next_instance_of(Gitlab::GitalyClient::BlobService) do |service|
expect(service) expect(service)
.to receive(:list_blobs) .to receive(:list_blobs)
.with(['--not', '--all', '--not', new_commit], .with(expected_newrevs,
limit: Gitlab::Git::Repository::REV_LIST_COMMIT_LIMIT, limit: Gitlab::Git::Repository::REV_LIST_COMMIT_LIMIT,
with_paths: true, with_paths: true,
dynamic_timeout: nil) dynamic_timeout: nil)
.once
.and_call_original .and_call_original
end end
end end
it 'enumerates new blobs' do it 'enumerates new blobs' do
expect(subject).to match_array( expect(subject).to match_array(expected_blobs)
[have_attributes(class: Gitlab::Git::Blob, id: new_blob, path: 'nested/new-blob.txt', size: 18)] end
)
it 'memoizes results' do
expect(subject).to match_array(expected_blobs)
expect(subject).to match_array(expected_blobs)
end end
end end
context 'with :new_blobs_via_list_blobs disabled' do context 'with a single revision' do
before do let(:newrevs) { commit }
stub_feature_flags(new_blobs_via_list_blobs: false) let(:expected_newrevs) { ['--not', '--all', '--not', newrevs] }
let(:expected_blobs) do
[have_attributes(class: Gitlab::Git::Blob, id: blob, path: 'nested/new-blob.txt', size: 18)]
end
expect_next_instance_of(Gitlab::GitalyClient::RefService) do |service| it_behaves_like '#new_blobs with revisions'
expect(service) end
.to receive(:list_new_blobs)
.with(new_commit, context 'with a single-entry array' do
Gitlab::Git::Repository::REV_LIST_COMMIT_LIMIT, let(:newrevs) { [commit] }
dynamic_timeout: nil) let(:expected_newrevs) { ['--not', '--all', '--not'] + newrevs }
.and_call_original let(:expected_blobs) do
end [have_attributes(class: Gitlab::Git::Blob, id: blob, path: 'nested/new-blob.txt', size: 18)]
end end
it 'enumerates new blobs' do it_behaves_like '#new_blobs with revisions'
expect(subject).to match_array([Gitaly::NewBlobObject.new( end
size: 18,
oid: new_blob, context 'with multiple revisions' do
path: "nested/new-blob.txt" let(:another_blob) { create_blob('Another blob') }
)]) let(:newrevs) { [commit, create_commit('another_path.txt' => another_blob)] }
let(:expected_newrevs) { ['--not', '--all', '--not'] + newrevs.sort }
let(:expected_blobs) do
[
have_attributes(class: Gitlab::Git::Blob, id: blob, path: 'nested/new-blob.txt', size: 18),
have_attributes(class: Gitlab::Git::Blob, id: another_blob, path: 'another_path.txt', size: 12)
]
end
it_behaves_like '#new_blobs with revisions'
end
context 'with partially blank revisions' do
let(:newrevs) { [nil, commit, Gitlab::Git::BLANK_SHA] }
let(:expected_newrevs) { ['--not', '--all', '--not', commit] }
let(:expected_blobs) do
[
have_attributes(class: Gitlab::Git::Blob, id: blob, path: 'nested/new-blob.txt', size: 18)
]
end
it_behaves_like '#new_blobs with revisions'
end
context 'with repeated revisions' do
let(:newrevs) { [commit, commit, commit] }
let(:expected_newrevs) { ['--not', '--all', '--not', commit] }
let(:expected_blobs) do
[
have_attributes(class: Gitlab::Git::Blob, id: blob, path: 'nested/new-blob.txt', size: 18)
]
end
it_behaves_like '#new_blobs with revisions'
end
context 'with preexisting commits' do
let(:newrevs) { ['refs/heads/master'] }
let(:expected_newrevs) { ['--not', '--all', '--not'] + newrevs }
let(:expected_blobs) { [] }
it_behaves_like '#new_blobs with revisions'
end
shared_examples '#new_blobs without revisions' do
before do
expect(Gitlab::GitalyClient::BlobService).not_to receive(:new)
end
it 'returns an empty array' do
expect(subject).to eq([])
end end
end end
context 'with a single nil newrev' do
let(:newrevs) { nil }
it_behaves_like '#new_blobs without revisions'
end
context 'with a single zero newrev' do
let(:newrevs) { Gitlab::Git::BLANK_SHA }
it_behaves_like '#new_blobs without revisions'
end
context 'with an empty array' do
let(:newrevs) { [] }
it_behaves_like '#new_blobs without revisions'
end
context 'with array containing only empty refs' do
let(:newrevs) { [nil, Gitlab::Git::BLANK_SHA] }
it_behaves_like '#new_blobs without revisions'
end
end end
describe '#new_commits' do describe '#new_commits' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment