Commit f8731fa3 authored by Mark Chao's avatar Mark Chao

ES: Wrap blob search results as FoundBlob

Extract common logic between blob and commit search.
Internalize highlight for searches.

Assign project to search results, which can be
passed to FoundBlob, where it is used for
policy checks.
parent 7b84e864
......@@ -20,6 +20,16 @@ module Elastic
results
end
# @return [Kaminari::PaginatableArray]
def elastic_search_as_found_blob(query, page: 1, per: 20, options: {})
# Highlight is required for parse_search_result to locate relevant line
options = options.merge(highlight: true)
elastic_search_and_wrap(query, type: es_type, page: page, per: per, options: options) do |result, project|
::Gitlab::Elastic::SearchResults.parse_search_result(result, project)
end
end
private
def extract_repository_ids(options)
......@@ -165,6 +175,56 @@ module Elastic
total_count: res.size
}
end
# Wrap returned results into GitLab model objects and paginate
#
# @return [Kaminari::PaginatableArray]
def elastic_search_and_wrap(query, type:, page: 1, per: 20, options: {}, &blk)
type = type.to_s
response = elastic_search(
query,
type: type,
page: page,
per: per,
options: options
)[type.pluralize.to_sym][:results]
items, total_count = yield_each_search_result(response, type, &blk)
# Before "map" we had a paginated array so we need to recover it
offset = per * ((page || 1) - 1)
Kaminari.paginate_array(items, total_count: total_count, limit: per, offset: offset)
end
def yield_each_search_result(response, type)
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map { |result| project_id_for_commit_or_blob(result, type) }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id)
total_count = response.total_count
items = response.map do |result|
project_id = project_id_for_commit_or_blob(result, type)
project = projects[project_id]
if project.nil? || project.pending_delete?
total_count -= 1
next
end
yield(result, project)
end
# Remove results for deleted projects
items.compact!
[items, total_count]
end
# Indexed commit does not include project_id
def project_id_for_commit_or_blob(result, type)
result.dig('_source', 'project_id') || result.dig('_source', type, 'rid').to_i
end
end
end
end
......@@ -16,10 +16,18 @@ module Elastic
end
def elastic_search(query, type: :all, page: 1, per: 20, options: {})
options[:repository_id] = repository_id if options[:repository_id].nil?
options = repository_specific_options(options)
self.class.elastic_search(query, type: type, page: page, per: per, options: options)
end
# @return [Kaminari::PaginatableArray]
def elastic_search_as_found_blob(query, page: 1, per: 20, options: {})
options = repository_specific_options(options)
self.class.elastic_search_as_found_blob(query, page: page, per: per, options: options)
end
def delete_index_for_commits_and_blobs(wiki: false)
types =
if wiki
......@@ -62,6 +70,14 @@ module Elastic
def repository_id
raise NotImplementedError
end
def repository_specific_options(options)
if options[:repository_id].nil?
options = options.merge(repository_id: repository_id)
end
options
end
end
end
end
......@@ -9,30 +9,9 @@ module Elastic
'blob'
end
# @return [Kaminari::PaginatableArray]
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {})
response = elastic_search(
query,
type: :commit,
page: page,
per: per_page,
options: options
)[:commits][:results]
response_count = response.total_count
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id)
commits = response.map do |result|
project_id = result["_source"]["commit"]["rid"].to_i
project = projects[project_id]
if project.nil? || project.pending_delete?
response_count -= 1
next
end
elastic_search_and_wrap(query, type: :commit, page: page, per: per_page, options: options) do |result, project|
raw_commit = Gitlab::Git::Commit.new(
project.repository.raw,
prepare_commit(result['_source']['commit']),
......@@ -40,13 +19,6 @@ module Elastic
)
Commit.new(raw_commit, project)
end
# Remove results for deleted projects
commits.compact!
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response_count, limit: per_page, offset: offset)
end
private
......
......@@ -112,7 +112,7 @@ module Gitlab
false
end
def self.parse_search_result(result)
def self.parse_search_result(result, project)
ref = result["_source"]["blob"]["commit_sha"]
path = result["_source"]["blob"]["path"]
extname = File.extname(path)
......@@ -156,6 +156,7 @@ module Gitlab
ref: ref,
startline: from + 1,
data: data.join,
project: project,
project_id: project_id
)
end
......
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::Latest::GitClassProxy do
let_it_be(:project) { create(:project, :repository) }
let(:included_class) { Elastic::Latest::RepositoryClassProxy }
subject { included_class.new(project.repository) }
describe '#elastic_search_as_found_blob', :elastic do
before do
stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true)
Gitlab::Elastic::Indexer.new(project).run
Gitlab::Elastic::Helper.refresh_index
end
it 'returns FoundBlob', :sidekiq_inline do
results = subject.elastic_search_as_found_blob('def popen')
expect(results).not_to be_empty
expect(results).to all(be_a(Gitlab::Search::FoundBlob))
result = results.first
expect(result.ref).to eq('b83d6e391c22777fca1ed3012fce84f633d7fed0')
expect(result.path).to eq('files/ruby/popen.rb')
expect(result.startline).to eq(2)
expect(result.data).to include('Popen')
expect(result.project).to eq(project)
end
end
end
......@@ -51,6 +51,33 @@ describe Elastic::Latest::GitInstanceProxy do
end
end
describe '#elastic_search_as_found_blob' do
let(:params) do
{
page: 2,
per: 30,
options: { foo: :bar }
}
end
it 'provides repository_id if not provided' do
expected_params = params.deep_dup
expected_params[:options][:repository_id] = project.id
expect(subject.class).to receive(:elastic_search_as_found_blob).with('foo', expected_params)
subject.elastic_search_as_found_blob('foo', params)
end
it 'uses provided repository_id' do
params[:options][:repository_id] = 42
expect(subject.class).to receive(:elastic_search_as_found_blob).with('foo', params)
subject.elastic_search_as_found_blob('foo', params)
end
end
describe '#delete_index_for_commits_and_blobs' do
let(:write_targets) { [double(:write_target_1), double(:write_target_2)] }
let(:read_target) { double(:read_target) }
......
......@@ -67,6 +67,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
end
describe 'parse_search_result' do
let(:project) { double(:project) }
let(:blob) do
{
'blob' => {
......@@ -78,11 +79,12 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
end
it 'returns an unhighlighted blob when no highlight data is present' do
parsed = described_class.parse_search_result('_source' => blob)
parsed = described_class.parse_search_result({ '_source' => blob }, project)
expect(parsed).to be_kind_of(::Gitlab::Search::FoundBlob)
expect(parsed).to have_attributes(
startline: 1,
project: project,
data: "foo\n"
)
end
......@@ -95,7 +97,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
}
}
parsed = described_class.parse_search_result(result)
parsed = described_class.parse_search_result(result, project)
expect(parsed).to be_kind_of(::Gitlab::Search::FoundBlob)
expect(parsed).to have_attributes(
......@@ -104,6 +106,7 @@ describe Gitlab::Elastic::SearchResults, :elastic, :sidekiq_might_not_need_inlin
basename: 'path/file',
ref: 'sha',
startline: 2,
project: project,
data: "bar\n"
)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment