Commit fe539c83 authored by John Cai's avatar John Cai

Call GetBlobs RPC in relative link filter

To fix the N+1 in RelativeLinkFilter, call the GetBlobs RPC to get all
of the blob types first, instead of doing a gitaly call per link
parent e8790766
---
title: Use GetBlobs RPC for uri type
merge_request: 16824
author:
type: performance
......@@ -20,16 +20,12 @@ module Banzai
def call
return doc if context[:system_note]
@uri_types = {}
clear_memoization(:linkable_files)
doc.search('a:not(.gfm)').each do |el|
process_link_attr el.attribute('href')
end
load_uri_types
doc.css('img, video').each do |el|
process_link_attr el.attribute('src')
process_link_attr el.attribute('data-src')
linkable_attributes.each do |attr|
process_link_attr(attr)
end
doc
......@@ -37,16 +33,81 @@ module Banzai
protected
def load_uri_types
return unless linkable_files?
return {} unless repository
clear_memoization(:linkable_attributes)
@uri_types = request_path.present? ? get_uri_types([request_path]) : {}
paths = linkable_attributes.flat_map do |attr|
[get_uri(attr).to_s, relative_file_path(get_uri(attr))]
end
paths.reject!(&:blank?)
paths.uniq!
@uri_types.merge!(get_uri_types(paths))
end
def linkable_files?
strong_memoize(:linkable_files) do
context[:project_wiki].nil? && repository.try(:exists?) && !repository.empty?
end
end
def process_link_attr(html_attr)
return if html_attr.blank?
return if html_attr.value.start_with?('//')
def linkable_attributes
strong_memoize(:linkable_attributes) do
attrs = []
attrs += doc.search('a:not(.gfm)').map do |el|
el.attribute('href')
end
attrs += doc.search('img, video').flat_map do |el|
[el.attribute('src'), el.attribute('data-src')]
end
attrs.reject do |attr|
attr.blank? || attr.value.start_with?('//')
end
end
end
def get_uri_types(paths)
return {} if paths.empty?
uri_types = Hash[paths.collect { |name| [name, nil] }]
get_blob_types(paths).each do |name, type|
if type == :blob
blob = ::Blob.decorate(Gitlab::Git::Blob.new(name: name), project)
uri_types[name] = blob.image? || blob.video? ? :raw : :blob
else
uri_types[name] = type
end
end
uri_types
end
def get_blob_types(paths)
revision_paths = paths.collect do |path|
[current_commit.sha, path.chomp("/")]
end
Gitlab::GitalyClient::BlobService.new(repository).get_blob_types(revision_paths, 1)
end
def get_uri(html_attr)
uri = URI(html_attr.value)
uri if uri.relative? && uri.path.present?
rescue URI::Error, Addressable::URI::InvalidURIError
end
def process_link_attr(html_attr)
if html_attr.value.start_with?('/uploads/')
process_link_to_upload_attr(html_attr)
elsif linkable_files? && repo_visible_to_user?
......@@ -81,6 +142,7 @@ module Banzai
def process_link_to_repository_attr(html_attr)
uri = URI(html_attr.value)
if uri.relative? && uri.path.present?
html_attr.value = rebuild_relative_uri(uri).to_s
end
......@@ -89,7 +151,7 @@ module Banzai
end
def rebuild_relative_uri(uri)
file_path = relative_file_path(uri)
file_path = nested_file_path_if_exists(uri)
uri.path = [
relative_url_root,
......@@ -102,13 +164,29 @@ module Banzai
uri
end
def relative_file_path(uri)
path = Addressable::URI.unescape(uri.path).delete("\0")
request_path = Addressable::URI.unescape(context[:requested_path])
nested_path = build_relative_path(path, request_path)
def nested_file_path_if_exists(uri)
path = cleaned_file_path(uri)
nested_path = relative_file_path(uri)
file_exists?(nested_path) ? nested_path : path
end
def cleaned_file_path(uri)
Addressable::URI.unescape(uri.path).delete("\0").chomp("/")
end
def relative_file_path(uri)
return if uri.nil?
build_relative_path(cleaned_file_path(uri), request_path)
end
def request_path
return unless context[:requested_path]
Addressable::URI.unescape(context[:requested_path]).chomp("/")
end
# Convert a relative path into its correct location based on the currently
# requested path
#
......@@ -136,6 +214,7 @@ module Banzai
return path[1..-1] if path.start_with?('/')
parts = request_path.split('/')
parts.pop if uri_type(request_path) != :tree
path.sub!(%r{\A\./}, '')
......@@ -149,14 +228,11 @@ module Banzai
end
def file_exists?(path)
path.present? && !!uri_type(path)
path.present? && uri_type(path).present?
end
def uri_type(path)
# https://gitlab.com/gitlab-org/gitlab-foss/issues/58657
Gitlab::GitalyClient.allow_n_plus_1_calls do
@uri_types[path] ||= current_commit.uri_type(path)
end
@uri_types[path] == :unknown ? "" : @uri_types[path]
end
def current_commit
......
......@@ -76,6 +76,30 @@ module Gitlab
GitalyClient::BlobsStitcher.new(response)
end
def get_blob_types(revision_paths, limit = -1)
return {} if revision_paths.empty?
request_revision_paths = revision_paths.map do |rev, path|
Gitaly::GetBlobsRequest::RevisionPath.new(revision: rev, path: encode_binary(path))
end
request = Gitaly::GetBlobsRequest.new(
repository: @gitaly_repo,
revision_paths: request_revision_paths,
limit: limit
)
response = GitalyClient.call(
@gitaly_repo.storage_name,
:blob_service,
:get_blobs,
request,
timeout: GitalyClient.fast_timeout
)
map_blob_types(response)
end
def get_new_lfs_pointers(revision, limit, not_in, dynamic_timeout = nil)
request = Gitaly::GetNewLFSPointersRequest.new(
repository: @gitaly_repo,
......@@ -132,6 +156,16 @@ module Gitlab
end
end
end
def map_blob_types(response)
types = {}
response.each do |msg|
types[msg.path.dup.force_encoding('utf-8')] = msg.type.downcase
end
types
end
end
end
end
......@@ -3,6 +3,9 @@
require 'spec_helper'
describe Banzai::Filter::RelativeLinkFilter do
include GitHelpers
include RepoHelpers
def filter(doc, contexts = {})
contexts.reverse_merge!({
commit: commit,
......@@ -34,6 +37,12 @@ describe Banzai::Filter::RelativeLinkFilter do
%(<div>#{element}</div>)
end
def allow_gitaly_n_plus_1
Gitlab::GitalyClient.allow_n_plus_1_calls do
yield
end
end
let(:project) { create(:project, :repository, :public) }
let(:user) { create(:user) }
let(:group) { nil }
......@@ -44,6 +53,19 @@ describe Banzai::Filter::RelativeLinkFilter do
let(:requested_path) { '/' }
let(:only_path) { true }
it 'does not trigger a gitaly n+1', :request_store do
raw_doc = ""
allow_gitaly_n_plus_1 do
30.times do |i|
create_file_in_repo(project, ref, ref, "new_file_#{i}", "x" )
raw_doc += link("new_file_#{i}")
end
end
expect { filter(raw_doc) }.to change { Gitlab::GitalyClient.get_request_count }.by(2)
end
shared_examples :preserve_unchanged do
it 'does not modify any relative URL in anchor' do
doc = filter(link('README.md'))
......@@ -244,7 +266,8 @@ describe Banzai::Filter::RelativeLinkFilter do
end
context 'when ref name contains special chars' do
let(:ref) {'mark#\'@],+;-._/#@!$&()+down'}
let(:ref) { 'mark#\'@],+;-._/#@!$&()+down' }
let(:path) { 'files/images/logo-black.png' }
it 'correctly escapes the ref' do
# Addressable won't escape the '#', so we do this manually
......@@ -252,8 +275,9 @@ describe Banzai::Filter::RelativeLinkFilter do
# Stub this method so the branch doesn't actually need to be in the repo
allow_any_instance_of(described_class).to receive(:uri_type).and_return(:raw)
allow_any_instance_of(described_class).to receive(:get_uri_types).and_return({ path: :tree })
doc = filter(link('files/images/logo-black.png'))
doc = filter(link(path))
expect(doc.at_css('a')['href'])
.to eq "/#{project_path}/raw/#{ref_escaped}/files/images/logo-black.png"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment