Commit 6f9172db authored by Igor Drozdov's avatar Igor Drozdov

Merge branch 'pks-git-access-batched-blob-enumeration' into 'master'

Use batched new blobs check [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!64503
parents 194630ff f73f0c7b
......@@ -480,7 +480,7 @@ end
gem 'spamcheck', '~> 0.1.0'
# Gitaly GRPC protocol definitions
gem 'gitaly', '~> 14.0.0.pre.rc2'
gem 'gitaly', '~> 14.1.0.pre.rc1'
# KAS GRPC protocol definitions
gem 'kas-grpc', '~> 0.0.2'
......
......@@ -454,7 +454,7 @@ GEM
rails (>= 3.2.0)
git (1.7.0)
rchardet (~> 1.8)
gitaly (14.0.0.pre.rc2)
gitaly (14.1.0.pre.rc1)
grpc (~> 1.0)
github-markup (1.7.0)
gitlab (4.16.1)
......@@ -1483,7 +1483,7 @@ DEPENDENCIES
gettext (~> 3.3)
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3)
gitaly (~> 14.0.0.pre.rc2)
gitaly (~> 14.1.0.pre.rc1)
github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5)
gitlab-dangerfiles (~> 2.1.2)
......
---
name: git_access_batched_changes_size
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64503
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/334130
milestone: '14.1'
type: development
group: group::gitaly
default_enabled: false
......@@ -370,6 +370,20 @@ module Gitlab
end
end
# List blobs reachable via a set of revisions. Supports the
# pseudo-revisions `--not` and `--all`. Uses the minimum of
# GitalyClient.medium_timeout and dynamic timeout if the dynamic
# timeout is set, otherwise it'll always use the medium timeout.
def blobs(revisions, dynamic_timeout: nil)
revisions = revisions.reject { |rev| rev.blank? || rev == ::Gitlab::Git::BLANK_SHA }
return [] if revisions.blank?
wrapped_gitaly_errors do
gitaly_blob_client.list_blobs(revisions, limit: REV_LIST_COMMIT_LIMIT, dynamic_timeout: dynamic_timeout)
end
end
def count_commits(options)
options = process_count_commits_options(options.dup)
......
......@@ -498,13 +498,23 @@ module Gitlab
end
def check_changes_size
changes_size = 0
changes_size =
if Feature.enabled?(:git_access_batched_changes_size, project, default_enabled: :yaml)
revs = ['--not', '--all', '--not']
revs += changes_list.map { |change| change[:newrev] }
changes_list.each do |change|
changes_size += repository.new_blobs(change[:newrev]).sum(&:size)
repository.blobs(revs).sum(&:size)
else
changes_size = 0
check_size_against_limit(changes_size)
end
changes_list.each do |change|
changes_size += repository.new_blobs(change[:newrev]).sum(&:size)
end
changes_size
end
check_size_against_limit(changes_size)
end
def check_size_against_limit(size)
......
......@@ -19,6 +19,25 @@ module Gitlab
consume_blob_response(response)
end
def list_blobs(revisions, limit: 0, bytes_limit: 0, dynamic_timeout: nil)
request = Gitaly::ListBlobsRequest.new(
repository: @gitaly_repo,
revisions: Array.wrap(revisions),
limit: limit,
bytes_limit: bytes_limit
)
timeout =
if dynamic_timeout
[dynamic_timeout, GitalyClient.medium_timeout].min
else
GitalyClient.medium_timeout
end
response = GitalyClient.call(@gitaly_repo.storage_name, :blob_service, :list_blobs, request, timeout: timeout)
GitalyClient::BlobsStitcher.new(GitalyClient::ListBlobsAdapter.new(response))
end
def batch_lfs_pointers(blob_ids)
return [] if blob_ids.empty?
......
......@@ -35,8 +35,8 @@ module Gitlab
Gitlab::Git::Blob.new(
id: blob_data[:oid],
mode: blob_data[:mode].to_s(8),
name: File.basename(blob_data[:path]),
mode: blob_data[:mode]&.to_s(8),
name: blob_data[:path] && File.basename(blob_data[:path]),
path: blob_data[:path],
size: blob_data[:size],
commit_id: blob_data[:revision],
......
# frozen_string_literal: true
module Gitlab
module GitalyClient
class ListBlobsAdapter
include Enumerable
def initialize(rpc_response)
@rpc_response = rpc_response
end
def each
@rpc_response.each do |msg|
msg.blobs.each do |blob|
yield blob
end
end
end
end
end
end
......@@ -869,6 +869,71 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
end
end
describe '#blobs' do
let_it_be(:commit_oid) { '4b4918a572fa86f9771e5ba40fbd48e1eb03e2c6' }
shared_examples 'a blob enumeration' do
it 'enumerates blobs' do
blobs = repository.blobs(revisions).to_a
expect(blobs.size).to eq(expected_blobs)
blobs.each do |blob|
expect(blob.data).to be_empty
expect(blob.id.size).to be(40)
end
end
end
context 'single revision' do
let(:revisions) { [commit_oid] }
let(:expected_blobs) { 53 }
it_behaves_like 'a blob enumeration'
end
context 'multiple revisions' do
let(:revisions) { ["^#{commit_oid}~", commit_oid] }
let(:expected_blobs) { 1 }
it_behaves_like 'a blob enumeration'
end
context 'pseudo revisions' do
let(:revisions) { ['master', '--not', '--all'] }
let(:expected_blobs) { 0 }
it_behaves_like 'a blob enumeration'
end
context 'blank revisions' do
let(:revisions) { [::Gitlab::Git::BLANK_SHA] }
let(:expected_blobs) { 0 }
before do
expect_any_instance_of(Gitlab::GitalyClient::BlobService)
.not_to receive(:list_blobs)
end
it_behaves_like 'a blob enumeration'
end
context 'partially blank revisions' do
let(:revisions) { [::Gitlab::Git::BLANK_SHA, commit_oid] }
let(:expected_blobs) { 53 }
before do
expect_next_instance_of(Gitlab::GitalyClient::BlobService) do |service|
expect(service)
.to receive(:list_blobs)
.with([commit_oid], kind_of(Hash))
.and_call_original
end
end
it_behaves_like 'a blob enumeration'
end
end
describe '#count_commits_between' do
subject { repository.count_commits_between('feature', 'master') }
......
......@@ -384,11 +384,12 @@ RSpec.describe Gitlab::GitAccessSnippet do
it_behaves_like 'a push to repository to make it over the limit'
end
context 'when GIT_OBJECT_DIRECTORY_RELATIVE env var is not set' do
shared_examples_for 'a change with GIT_OBJECT_DIRECTORY_RELATIVE env var unset' do
let(:change_size) { 200 }
before do
allow(snippet.repository).to receive(:new_blobs).and_return(
stub_feature_flags(git_access_batched_changes_size: batched)
allow(snippet.repository).to receive(expected_call).and_return(
[double(:blob, size: change_size)]
)
end
......@@ -397,6 +398,20 @@ RSpec.describe Gitlab::GitAccessSnippet do
it_behaves_like 'a push to repository below the limit'
it_behaves_like 'a push to repository to make it over the limit'
end
context 'when batched computation is enabled' do
let(:batched) { true }
let(:expected_call) { :blobs }
it_behaves_like 'a change with GIT_OBJECT_DIRECTORY_RELATIVE env var unset'
end
context 'when batched computation is disabled' do
let(:batched) { false }
let(:expected_call) { :new_blobs }
it_behaves_like 'a change with GIT_OBJECT_DIRECTORY_RELATIVE env var unset'
end
end
describe 'HEAD realignment' do
......
......@@ -88,4 +88,104 @@ RSpec.describe Gitlab::GitalyClient::BlobService do
subject
end
end
describe '#list_blobs' do
let(:limit) { 0 }
let(:bytes_limit) { 0 }
let(:expected_params) { { revisions: revisions, limit: limit, bytes_limit: bytes_limit } }
before do
::Gitlab::GitalyClient.clear_stubs!
end
subject { client.list_blobs(revisions, limit: limit, bytes_limit: bytes_limit) }
context 'with a single revision' do
let(:revisions) { ['master'] }
it 'sends a list_blobs message' do
expect_next_instance_of(Gitaly::BlobService::Stub) do |service|
expect(service)
.to receive(:list_blobs)
.with(gitaly_request_with_params(expected_params), kind_of(Hash))
.and_return([])
end
subject
end
end
context 'with multiple revisions' do
let(:revisions) { ['master', '--not', '--all'] }
it 'sends a list_blobs message' do
expect_next_instance_of(Gitaly::BlobService::Stub) do |service|
expect(service)
.to receive(:list_blobs)
.with(gitaly_request_with_params(expected_params), kind_of(Hash))
.and_return([])
end
subject
end
end
context 'with multiple revisions and limits' do
let(:revisions) { ['master', '--not', '--all'] }
let(:limit) { 10 }
let(:bytes_lmit) { 1024 }
it 'sends a list_blobs message' do
expect_next_instance_of(Gitaly::BlobService::Stub) do |service|
expect(service)
.to receive(:list_blobs)
.with(gitaly_request_with_params(expected_params), kind_of(Hash))
.and_return([])
end
subject
end
end
context 'with split contents' do
let(:revisions) { ['master'] }
it 'sends a list_blobs message', :aggregate_failures do
expect_next_instance_of(Gitaly::BlobService::Stub) do |service|
expect(service)
.to receive(:list_blobs)
.with(gitaly_request_with_params(expected_params), kind_of(Hash))
.and_return([
Gitaly::ListBlobsResponse.new(blobs: [
Gitaly::ListBlobsResponse::Blob.new(oid: "012345", size: 8, data: "0x01"),
Gitaly::ListBlobsResponse::Blob.new(data: "23")
]),
Gitaly::ListBlobsResponse.new(blobs: [
Gitaly::ListBlobsResponse::Blob.new(data: "45"),
Gitaly::ListBlobsResponse::Blob.new(oid: "56", size: 4, data: "0x5"),
Gitaly::ListBlobsResponse::Blob.new(data: "6")
]),
Gitaly::ListBlobsResponse.new(blobs: [
Gitaly::ListBlobsResponse::Blob.new(oid: "78", size: 4, data: "0x78")
])
])
end
blobs = subject.to_a
expect(blobs.size).to be(3)
expect(blobs[0].id).to eq('012345')
expect(blobs[0].size).to eq(8)
expect(blobs[0].data).to eq('0x012345')
expect(blobs[1].id).to eq('56')
expect(blobs[1].size).to eq(4)
expect(blobs[1].data).to eq('0x56')
expect(blobs[2].id).to eq('78')
expect(blobs[2].size).to eq(4)
expect(blobs[2].data).to eq('0x78')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment