Commit 6ffb030b authored by Robert Speicher's avatar Robert Speicher

Merge branch 'feature/migrate-blob-batch-to-gitaly' into 'master'

Migrate Git::Blob.batch to Gitaly

Closes #37445 and gitaly#985

See merge request gitlab-org/gitlab-ce!16928
parents 1d698113 dabc703a
......@@ -410,7 +410,7 @@ group :ed25519 do
end
# Gitaly GRPC client
gem 'gitaly-proto', '~> 0.83.0', require: 'gitaly'
gem 'gitaly-proto', '~> 0.84.0', require: 'gitaly'
# Locked until https://github.com/google/protobuf/issues/4210 is closed
gem 'google-protobuf', '= 3.5.1'
......
......@@ -285,7 +285,7 @@ GEM
po_to_json (>= 1.0.0)
rails (>= 3.2.0)
gherkin-ruby (0.3.2)
gitaly-proto (0.83.0)
gitaly-proto (0.84.0)
google-protobuf (~> 3.1)
grpc (~> 1.0)
github-linguist (4.7.6)
......@@ -1056,7 +1056,7 @@ DEPENDENCIES
gettext (~> 3.2.2)
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.2.0)
gitaly-proto (~> 0.83.0)
gitaly-proto (~> 0.84.0)
github-linguist (~> 4.7.0)
gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-markup (~> 1.6.2)
......
......@@ -53,11 +53,7 @@ module Gitlab
def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE)
Gitlab::GitalyClient.migrate(:list_blobs_by_sha_path) do |is_enabled|
if is_enabled
Gitlab::GitalyClient.allow_n_plus_1_calls do
blob_references.map do |sha, path|
find_by_gitaly(repository, sha, path, limit: blob_size_limit)
end
end
repository.gitaly_blob_client.get_blobs(blob_references, blob_size_limit).to_a
else
blob_references.map do |sha, path|
find_by_rugged(repository, sha, path, limit: blob_size_limit)
......
module Gitlab
module GitalyClient
class BlobService
include Gitlab::EncodingHelper
def initialize(repository)
@gitaly_repo = repository.gitaly_repository
end
......@@ -54,6 +56,30 @@ module Gitlab
end
end
end
def get_blobs(revision_paths, limit = -1)
return [] if revision_paths.empty?
revision_paths.map! do |rev, path|
Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
end
request = Gitaly::GetBlobsRequest.new(
repository: @gitaly_repo,
revision_paths: revision_paths,
limit: limit
)
response = GitalyClient.call(
@gitaly_repo.storage_name,
:blob_service,
:get_blobs,
request,
timeout: GitalyClient.default_timeout
)
GitalyClient::BlobsStitcher.new(response)
end
end
end
end
module Gitlab
module GitalyClient
class BlobsStitcher
include Enumerable
def initialize(rpc_response)
@rpc_response = rpc_response
end
def each
current_blob_data = nil
@rpc_response.each do |msg|
begin
if msg.oid.blank? && msg.data.blank?
next
elsif msg.oid.present?
yield new_blob(current_blob_data) if current_blob_data
current_blob_data = msg.to_h.slice(:oid, :path, :size, :revision, :mode)
current_blob_data[:data] = msg.data.dup
else
current_blob_data[:data] << msg.data
end
end
end
yield new_blob(current_blob_data) if current_blob_data
end
private
def new_blob(blob_data)
Gitlab::Git::Blob.new(
id: blob_data[:oid],
mode: blob_data[:mode].to_s(8),
name: File.basename(blob_data[:path]),
path: blob_data[:path],
size: blob_data[:size],
commit_id: blob_data[:revision],
data: blob_data[:data],
binary: Gitlab::Git::Blob.binary?(blob_data[:data])
)
end
end
end
end
......@@ -178,67 +178,77 @@ describe Gitlab::Git::Blob, seed_helper: true do
end
describe '.batch' do
let(:blob_references) do
[
[SeedRepo::Commit::ID, "files/ruby/popen.rb"],
[SeedRepo::Commit::ID, 'six']
]
end
shared_examples 'loading blobs in batch' do
let(:blob_references) do
[
[SeedRepo::Commit::ID, "files/ruby/popen.rb"],
[SeedRepo::Commit::ID, 'six']
]
end
subject { described_class.batch(repository, blob_references) }
subject { described_class.batch(repository, blob_references) }
it { expect(subject.size).to eq(blob_references.size) }
it { expect(subject.size).to eq(blob_references.size) }
context 'first blob' do
let(:blob) { subject[0] }
context 'first blob' do
let(:blob) { subject[0] }
it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) }
it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) }
it { expect(blob.path).to eq("files/ruby/popen.rb") }
it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) }
it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) }
it { expect(blob.size).to eq(669) }
it { expect(blob.mode).to eq("100644") }
end
it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) }
it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) }
it { expect(blob.path).to eq("files/ruby/popen.rb") }
it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) }
it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) }
it { expect(blob.size).to eq(669) }
it { expect(blob.mode).to eq("100644") }
end
context 'second blob' do
let(:blob) { subject[1] }
context 'second blob' do
let(:blob) { subject[1] }
it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') }
it { expect(blob.data).to eq('') }
it 'does not mark the blob as binary' do
expect(blob).not_to be_binary
it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') }
it { expect(blob.data).to eq('') }
it 'does not mark the blob as binary' do
expect(blob).not_to be_binary
end
end
end
context 'limiting' do
subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) }
context 'limiting' do
subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) }
context 'positive' do
let(:blob_size_limit) { 10 }
context 'positive' do
let(:blob_size_limit) { 10 }
it { expect(subject.first.data.size).to eq(10) }
end
it { expect(subject.first.data.size).to eq(10) }
end
context 'zero' do
let(:blob_size_limit) { 0 }
context 'zero' do
let(:blob_size_limit) { 0 }
it 'only loads the metadata' do
expect(subject.first.size).not_to be(0)
expect(subject.first.data).to eq('')
it 'only loads the metadata' do
expect(subject.first.size).not_to be(0)
expect(subject.first.data).to eq('')
end
end
end
context 'negative' do
let(:blob_size_limit) { -1 }
context 'negative' do
let(:blob_size_limit) { -1 }
it 'ignores MAX_DATA_DISPLAY_SIZE' do
stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
it 'ignores MAX_DATA_DISPLAY_SIZE' do
stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
expect(subject.first.data.size).to eq(669)
expect(subject.first.data.size).to eq(669)
end
end
end
end
context 'when Gitaly list_blobs_by_sha_path feature is enabled' do
it_behaves_like 'loading blobs in batch'
end
context 'when Gitaly list_blobs_by_sha_path feature is disabled', :disable_gitaly do
it_behaves_like 'loading blobs in batch'
end
end
describe '.batch_lfs_pointers' do
......
require 'spec_helper'
describe Gitlab::GitalyClient::BlobsStitcher do
describe 'enumeration' do
it 'combines segregated blob messages together' do
messages = [
OpenStruct.new(oid: 'abcdef1', path: 'path/to/file', size: 1642, revision: 'f00ba7', mode: 0100644, data: "first-line\n"),
OpenStruct.new(oid: '', data: 'second-line'),
OpenStruct.new(oid: '', data: '', revision: 'f00ba7', path: 'path/to/non-existent/file'),
OpenStruct.new(oid: 'abcdef2', path: 'path/to/another-file', size: 2461, revision: 'f00ba8', mode: 0100644, data: "GIF87a\x90\x01".b)
]
blobs = described_class.new(messages).to_a
expect(blobs.size).to be(2)
expect(blobs[0].id).to eq('abcdef1')
expect(blobs[0].mode).to eq('100644')
expect(blobs[0].name).to eq('file')
expect(blobs[0].path).to eq('path/to/file')
expect(blobs[0].size).to eq(1642)
expect(blobs[0].commit_id).to eq('f00ba7')
expect(blobs[0].data).to eq("first-line\nsecond-line")
expect(blobs[0].binary?).to be false
expect(blobs[1].id).to eq('abcdef2')
expect(blobs[1].mode).to eq('100644')
expect(blobs[1].name).to eq('another-file')
expect(blobs[1].path).to eq('path/to/another-file')
expect(blobs[1].size).to eq(2461)
expect(blobs[1].commit_id).to eq('f00ba8')
expect(blobs[1].data).to eq("GIF87a\x90\x01".b)
expect(blobs[1].binary?).to be true
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment