Commit 91f0a077 authored by Igor Drozdov's avatar Igor Drozdov

Improve raw blobs downloading

This commit improves blobs downloading in two ways:

- When we send a blob via Workhorse, we don't need to download the
file first, so we can set the limit to zero and download the metadata
only

- When we serve a file via API, then we shouldn't load all unlimited
data, we'd better have a limit there

Changelog: performance
parent fd05a579
......@@ -19,7 +19,7 @@ class Projects::RawController < Projects::ApplicationController
feature_category :source_code_management
def show
@blob = @repository.blob_at(@ref, @path)
@blob = @repository.blob_at(@ref, @path, limit: Gitlab::Git::Blob::LFS_POINTER_MAX_SIZE)
send_blob(@repository, @blob, inline: (params[:inline] != 'false'), allow_caching: Guest.can?(:download_code, @project))
end
......
......@@ -502,8 +502,8 @@ class Repository
end
end
def blob_at(sha, path)
Blob.decorate(raw_repository.blob_at(sha, path), container)
def blob_at(sha, path, limit: Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE)
Blob.decorate(raw_repository.blob_at(sha, path, limit: limit), container)
rescue Gitlab::Git::Repository::NoRepository
nil
end
......
......@@ -29,14 +29,13 @@ module API
not_found!
end
def assign_blob_vars!
def assign_blob_vars!(limit:)
authorize! :download_code, user_project
@repo = user_project.repository
begin
@blob = Gitlab::Git::Blob.raw(@repo, params[:sha])
@blob.load_all_data!(@repo)
@blob = Gitlab::Git::Blob.raw(@repo, params[:sha], limit: limit)
rescue StandardError
not_found! 'Blob'
end
......@@ -71,7 +70,8 @@ module API
requires :sha, type: String, desc: 'The commit hash'
end
get ':id/repository/blobs/:sha/raw' do
assign_blob_vars!
# Load metadata enough to ask Workhorse to load the whole blob
assign_blob_vars!(limit: 0)
no_cache_headers
......@@ -83,7 +83,7 @@ module API
requires :sha, type: String, desc: 'The commit hash'
end
get ':id/repository/blobs/:sha' do
assign_blob_vars!
assign_blob_vars!(limit: -1)
{
size: @blob.size,
......
......@@ -77,8 +77,8 @@ module Gitlab
end
end
def raw(repository, sha)
repository.gitaly_blob_client.get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE)
def raw(repository, sha, limit: MAX_DATA_DISPLAY_SIZE)
repository.gitaly_blob_client.get_blob(oid: sha, limit: limit)
end
# Returns an array of Blob instances, specified in blob_references as
......
......@@ -827,8 +827,8 @@ module Gitlab
end
end
def blob_at(sha, path)
Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha)
def blob_at(sha, path, limit: Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE)
Gitlab::Git::Blob.find(self, sha, path, limit: limit) unless Gitlab::Git.blank_ref?(sha)
end
# Items should be of format [[commit_id, path], [commit_id1, path1]]
......
......@@ -33,15 +33,25 @@ RSpec.describe Projects::RawController do
end
context 'regular filename' do
let(:filepath) { 'master/README.md' }
let(:filepath) { 'master/CONTRIBUTING.md' }
it 'delivers ASCII file' do
allow(Gitlab::Workhorse).to receive(:send_git_blob).and_call_original
subject
expect(response).to have_gitlab_http_status(:ok)
expect(response.header['Content-Type']).to eq('text/plain; charset=utf-8')
expect(response.header[Gitlab::Workhorse::DETECT_HEADER]).to eq 'true'
expect(response.header[Gitlab::Workhorse::SEND_DATA_HEADER]).to start_with('git-blob:')
expect(Gitlab::Workhorse).to have_received(:send_git_blob) do |repository, blob|
expected_blob = project.repository.blob_at('master', 'CONTRIBUTING.md')
expect(repository).to eq(project.repository)
expect(blob.id).to eq(expected_blob.id)
expect(blob).to be_truncated
end
end
it_behaves_like 'project cache control headers'
......
......@@ -107,13 +107,18 @@ RSpec.describe API::Repositories do
shared_examples_for 'repository blob' do
it 'returns blob attributes as json' do
stub_const("Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE", 5)
get api(route, current_user)
expect(response).to have_gitlab_http_status(:ok)
expect(json_response['size']).to eq(111)
expect(json_response['encoding']).to eq("base64")
expect(Base64.decode64(json_response['content']).lines.first).to eq("class Commit\n")
expect(json_response['sha']).to eq(sample_blob.oid)
content = Base64.decode64(json_response['content'])
expect(content.lines.first).to eq("class Commit\n")
expect(content).to eq(project.repository.gitaly_blob_client.get_blob(oid: sample_blob.oid, limit: -1).data)
end
context 'when sha does not exist' do
......@@ -164,7 +169,10 @@ RSpec.describe API::Repositories do
shared_examples_for 'repository raw blob' do
it 'returns the repository raw blob' do
expect(Gitlab::Workhorse).to receive(:send_git_blob)
expect(Gitlab::Workhorse).to receive(:send_git_blob) do |_, blob|
expect(blob.id).to eq(sample_blob.oid)
expect(blob.loaded_size).to eq(0)
end
get api(route, current_user)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment