Commit 58bfd733 authored by Jan Provaznik's avatar Jan Provaznik

Optimized file search to work without limits

* removed 100 limit on file search results because we
  load all results anyway
* expensive processing (parsing match content, utf encoding)
  is done only for selected page in paginated output
parent 00acef43
- project = find_project_for_result_blob(blob) - project = find_project_for_result_blob(blob)
- return unless project - return unless project
- file_name, blob = parse_search_result(blob) - blob = parse_search_result(blob)
- blob_link = project_blob_path(project, tree_join(blob.ref, file_name)) - blob_link = project_blob_path(project, tree_join(blob.ref, blob.filename))
= render partial: 'search/results/blob_data', locals: { blob: blob, project: project, file_name: file_name, blob_link: blob_link } = render partial: 'search/results/blob_data', locals: { blob: blob, project: project, file_name: blob.filename, blob_link: blob_link }
- project = find_project_for_result_blob(wiki_blob) - project = find_project_for_result_blob(wiki_blob)
- file_name, wiki_blob = parse_search_result(wiki_blob) - wiki_blob = parse_search_result(wiki_blob)
- wiki_blob_link = project_wiki_path(project, wiki_blob.basename) - wiki_blob_link = project_wiki_path(project, wiki_blob.basename)
= render partial: 'search/results/blob_data', locals: { blob: wiki_blob, project: project, file_name: file_name, blob_link: wiki_blob_link } = render partial: 'search/results/blob_data', locals: { blob: wiki_blob, project: project, file_name: wiki_blob.filename, blob_link: wiki_blob_link }
---
title: Remove limit of 100 when searching repository code.
merge_request: 8671
author:
type: fixed
...@@ -722,6 +722,17 @@ Example response: ...@@ -722,6 +722,17 @@ Example response:
### Scope: wiki_blobs ### Scope: wiki_blobs
Wiki blobs searches are performed on both filenames and contents. Search
results:
- Found in filenames are displayed before results found in contents.
- May contain multiple matches for the same blob because the search string
might be found in both the filename and content, and matches of the different
types are displayed separately.
- May contain multiple matches for the same blob because the search string
might be found if the search string appears multiple times in the content.
```bash ```bash
curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=wiki_blobs&search=bye curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=wiki_blobs&search=bye
``` ```
...@@ -783,6 +794,15 @@ Filters are available for this scope: ...@@ -783,6 +794,15 @@ Filters are available for this scope:
to use a filter simply include it in your query like so: `a query filename:some_name*`. to use a filter simply include it in your query like so: `a query filename:some_name*`.
Blobs searches are performed on both filenames and contents. Search results:
- Found in filenames are displayed before results found in contents.
- May contain multiple matches for the same blob because the search string
might be found in both the filename and content, and matches of the different
types are displayed separately.
- May contain multiple matches for the same blob because the search string
might be found if the search string appears multiple times in the content.
You may use wildcards (`*`) to use glob matching. You may use wildcards (`*`) to use glob matching.
```bash ```bash
......
...@@ -35,12 +35,7 @@ module API ...@@ -35,12 +35,7 @@ module API
end end
def process_results(results) def process_results(results)
case params[:scope] paginate(results)
when 'blobs', 'wiki_blobs'
paginate(results).map { |blob| blob[1] }
else
paginate(results)
end
end end
def snippets? def snippets?
......
...@@ -4,8 +4,6 @@ ...@@ -4,8 +4,6 @@
# the result is joined and sorted by file name # the result is joined and sorted by file name
module Gitlab module Gitlab
class FileFinder class FileFinder
BATCH_SIZE = 100
attr_reader :project, :ref attr_reader :project, :ref
delegate :repository, to: :project delegate :repository, to: :project
...@@ -16,60 +14,35 @@ module Gitlab ...@@ -16,60 +14,35 @@ module Gitlab
end end
def find(query) def find(query)
query = Gitlab::Search::Query.new(query) do query = Gitlab::Search::Query.new(query, encode_binary: true) do
filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i } filter :filename, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}$/i }
filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i } filter :path, matcher: ->(filter, blob) { blob.binary_filename =~ /#{filter[:regex_value]}/i }
filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i } filter :extension, matcher: ->(filter, blob) { blob.binary_filename =~ /\.#{filter[:regex_value]}$/i }
end end
by_content = find_by_content(query.term) files = find_by_filename(query.term) + find_by_content(query.term)
already_found = Set.new(by_content.map(&:filename))
by_filename = find_by_filename(query.term, except: already_found)
files = (by_content + by_filename) files = query.filter_results(files) if query.filters.any?
.sort_by(&:filename)
query.filter_results(files).map { |blob| [blob.filename, blob] } files
end end
private private
def find_by_content(query) def find_by_content(query)
results = repository.search_files_by_content(query, ref).first(BATCH_SIZE) repository.search_files_by_content(query, ref).map do |result|
results.map { |result| Gitlab::ProjectSearchResults.parse_search_result(result, project) } Gitlab::Search::FoundBlob.new(content_match: result, project: project, ref: ref, repository: repository)
end
def find_by_filename(query, except: [])
filenames = search_filenames(query, except)
blobs(filenames).map do |blob|
Gitlab::SearchResults::FoundBlob.new(
id: blob.id,
filename: blob.path,
basename: File.basename(blob.path, File.extname(blob.path)),
ref: ref,
startline: 1,
data: blob.data,
project: project
)
end end
end end
def search_filenames(query, except) def find_by_filename(query)
filenames = repository.search_files_by_name(query, ref).first(BATCH_SIZE) search_filenames(query).map do |filename|
Gitlab::Search::FoundBlob.new(blob_filename: filename, project: project, ref: ref, repository: repository)
filenames.delete_if { |filename| except.include?(filename) } unless except.empty? end
filenames
end
def blob_refs(filenames)
filenames.map { |filename| [ref, filename] }
end end
def blobs(filenames) def search_filenames(query)
Gitlab::Git::Blob.batch(repository, blob_refs(filenames), blob_size_limit: 1024) repository.search_files_by_name(query, ref)
end end
end end
end end
...@@ -17,9 +17,9 @@ module Gitlab ...@@ -17,9 +17,9 @@ module Gitlab
when 'notes' when 'notes'
notes.page(page).per(per_page) notes.page(page).per(per_page)
when 'blobs' when 'blobs'
Kaminari.paginate_array(blobs).page(page).per(per_page) paginated_blobs(blobs, page)
when 'wiki_blobs' when 'wiki_blobs'
Kaminari.paginate_array(wiki_blobs).page(page).per(per_page) paginated_blobs(wiki_blobs, page)
when 'commits' when 'commits'
Kaminari.paginate_array(commits).page(page).per(per_page) Kaminari.paginate_array(commits).page(page).per(per_page)
else else
...@@ -55,37 +55,6 @@ module Gitlab ...@@ -55,37 +55,6 @@ module Gitlab
@commits_count ||= commits.count @commits_count ||= commits.count
end end
def self.parse_search_result(result, project = nil)
ref = nil
filename = nil
basename = nil
data = []
startline = 0
result.each_line.each_with_index do |line, index|
prefix ||= line.match(/^(?<ref>[^:]*):(?<filename>[^\x00]*)\x00(?<startline>\d+)\x00/)&.tap do |matches|
ref = matches[:ref]
filename = matches[:filename]
startline = matches[:startline]
startline = startline.to_i - index
extname = Regexp.escape(File.extname(filename))
basename = filename.sub(/#{extname}$/, '')
end
data << line.sub(prefix.to_s, '')
end
FoundBlob.new(
filename: filename,
basename: basename,
ref: ref,
startline: startline,
data: data.join,
project: project
)
end
def single_commit_result? def single_commit_result?
return false if commits_count != 1 return false if commits_count != 1
...@@ -97,6 +66,14 @@ module Gitlab ...@@ -97,6 +66,14 @@ module Gitlab
private private
def paginated_blobs(blobs, page)
results = Kaminari.paginate_array(blobs).page(page).per(per_page)
Gitlab::Search::FoundBlob.preload_blobs(results)
results
end
def blobs def blobs
return [] unless Ability.allowed?(@current_user, :download_code, @project) return [] unless Ability.allowed?(@current_user, :download_code, @project)
......
# frozen_string_literal: true
module Gitlab
module Search
class FoundBlob
include EncodingHelper
include Presentable
include BlobLanguageFromGitAttributes
include Gitlab::Utils::StrongMemoize
attr_reader :project, :content_match, :blob_filename
FILENAME_REGEXP = /\A(?<ref>[^:]*):(?<filename>[^\x00]*)\x00/.freeze
CONTENT_REGEXP = /^(?<ref>[^:]*):(?<filename>[^\x00]*)\x00(?<startline>\d+)\x00/.freeze
def self.preload_blobs(blobs)
to_fetch = blobs.select { |blob| blob.is_a?(self) && blob.blob_filename }
to_fetch.each { |blob| blob.fetch_blob }
end
def initialize(opts = {})
@id = opts.fetch(:id, nil)
@binary_filename = opts.fetch(:filename, nil)
@binary_basename = opts.fetch(:basename, nil)
@ref = opts.fetch(:ref, nil)
@startline = opts.fetch(:startline, nil)
@binary_data = opts.fetch(:data, nil)
@per_page = opts.fetch(:per_page, 20)
@project = opts.fetch(:project, nil)
# Some caller does not have project object (e.g. elastic search),
# yet they can trigger many calls in one go,
# causing duplicated queries.
# Allow those to just pass project_id instead.
@project_id = opts.fetch(:project_id, nil)
@content_match = opts.fetch(:content_match, nil)
@blob_filename = opts.fetch(:blob_filename, nil)
@repository = opts.fetch(:repository, nil)
end
def id
@id ||= parsed_content[:id]
end
def ref
@ref ||= parsed_content[:ref]
end
def startline
@startline ||= parsed_content[:startline]
end
# binary_filename is used for running filters on all matches,
# for grepped results (which use content_match), we get
# filename from the beginning of the grepped result which is faster
# then parsing whole snippet
def binary_filename
@binary_filename ||= content_match ? search_result_filename : parsed_content[:binary_filename]
end
def filename
@filename ||= encode_utf8(@binary_filename || parsed_content[:binary_filename])
end
def basename
@basename ||= encode_utf8(@binary_basename || parsed_content[:binary_basename])
end
def data
@data ||= encode_utf8(@binary_data || parsed_content[:binary_data])
end
def path
filename
end
def project_id
@project_id || @project&.id
end
def present
super(presenter_class: BlobPresenter)
end
def fetch_blob
path = [ref, blob_filename]
missing_blob = { binary_filename: blob_filename }
BatchLoader.for(path).batch(default_value: missing_blob) do |refs, loader|
Gitlab::Git::Blob.batch(repository, refs, blob_size_limit: 1024).each do |blob|
# if the blob couldn't be fetched for some reason,
# show at least the blob filename
data = {
id: blob.id,
binary_filename: blob.path,
binary_basename: File.basename(blob.path, File.extname(blob.path)),
ref: ref,
startline: 1,
binary_data: blob.data,
project: project
}
loader.call([ref, blob.path], data)
end
end
end
private
def search_result_filename
content_match.match(FILENAME_REGEXP) { |matches| matches[:filename] }
end
def parsed_content
strong_memoize(:parsed_content) do
if content_match
parse_search_result
elsif blob_filename
fetch_blob
else
{}
end
end
end
def parse_search_result
ref = nil
filename = nil
basename = nil
data = []
startline = 0
content_match.each_line.each_with_index do |line, index|
prefix ||= line.match(CONTENT_REGEXP)&.tap do |matches|
ref = matches[:ref]
filename = matches[:filename]
startline = matches[:startline]
startline = startline.to_i - index
extname = Regexp.escape(File.extname(filename))
basename = filename.sub(/#{extname}$/, '')
end
data << line.sub(prefix.to_s, '')
end
{
binary_filename: filename,
binary_basename: basename,
ref: ref,
startline: startline,
binary_data: data.join,
project: project
}
end
def repository
@repository ||= project.repository
end
end
end
end
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
module Gitlab module Gitlab
module Search module Search
class Query < SimpleDelegator class Query < SimpleDelegator
include EncodingHelper
def initialize(query, filter_opts = {}, &block) def initialize(query, filter_opts = {}, &block)
@raw_query = query.dup @raw_query = query.dup
@filters = [] @filters = []
...@@ -50,7 +52,9 @@ module Gitlab ...@@ -50,7 +52,9 @@ module Gitlab
end end
def parse_filter(filter, input) def parse_filter(filter, input)
filter[:parser].call(input) result = filter[:parser].call(input)
@filter_options[:encode_binary] ? encode_binary(result) : result
end end
end end
end end
......
...@@ -2,42 +2,6 @@ ...@@ -2,42 +2,6 @@
module Gitlab module Gitlab
class SearchResults class SearchResults
class FoundBlob
include EncodingHelper
include Presentable
include BlobLanguageFromGitAttributes
attr_reader :id, :filename, :basename, :ref, :startline, :data, :project
def initialize(opts = {})
@id = opts.fetch(:id, nil)
@filename = encode_utf8(opts.fetch(:filename, nil))
@basename = encode_utf8(opts.fetch(:basename, nil))
@ref = opts.fetch(:ref, nil)
@startline = opts.fetch(:startline, nil)
@data = encode_utf8(opts.fetch(:data, nil))
@per_page = opts.fetch(:per_page, 20)
@project = opts.fetch(:project, nil)
# Some caller does not have project object (e.g. elastic search),
# yet they can trigger many calls in one go,
# causing duplicated queries.
# Allow those to just pass project_id instead.
@project_id = opts.fetch(:project_id, nil)
end
def path
filename
end
def project_id
@project_id || @project&.id
end
def present
super(presenter_class: BlobPresenter)
end
end
attr_reader :current_user, :query, :per_page attr_reader :current_user, :query, :per_page
# Limit search results by passed projects # Limit search results by passed projects
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
module Gitlab module Gitlab
class WikiFileFinder < FileFinder class WikiFileFinder < FileFinder
BATCH_SIZE = 100
attr_reader :repository attr_reader :repository
def initialize(project, ref) def initialize(project, ref)
...@@ -12,13 +14,11 @@ module Gitlab ...@@ -12,13 +14,11 @@ module Gitlab
private private
def search_filenames(query, except) def search_filenames(query)
safe_query = Regexp.escape(query.tr(' ', '-')) safe_query = Regexp.escape(query.tr(' ', '-'))
safe_query = Regexp.new(safe_query, Regexp::IGNORECASE) safe_query = Regexp.new(safe_query, Regexp::IGNORECASE)
filenames = repository.ls_files(ref) filenames = repository.ls_files(ref)
filenames.delete_if { |filename| except.include?(filename) } unless except.empty?
filenames.grep(safe_query).first(BATCH_SIZE) filenames.grep(safe_query).first(BATCH_SIZE)
end end
end end
......
...@@ -54,11 +54,18 @@ describe Gitlab::ProjectSearchResults do ...@@ -54,11 +54,18 @@ describe Gitlab::ProjectSearchResults do
end end
it 'finds by name' do it 'finds by name' do
expect(results.map(&:first)).to include(expected_file_by_name) expect(results.map(&:filename)).to include(expected_file_by_name)
end
it "loads all blobs for filename matches in single batch" do
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
expected = project.repository.search_files_by_name(query, 'master')
expect(results.map(&:filename)).to include(*expected)
end end
it 'finds by content' do it 'finds by content' do
blob = results.select { |result| result.first == expected_file_by_content }.flatten.last blob = results.select { |result| result.filename == expected_file_by_content }.flatten.last
expect(blob.filename).to eq(expected_file_by_content) expect(blob.filename).to eq(expected_file_by_content)
end end
...@@ -122,126 +129,6 @@ describe Gitlab::ProjectSearchResults do ...@@ -122,126 +129,6 @@ describe Gitlab::ProjectSearchResults do
let(:blob_type) { 'blobs' } let(:blob_type) { 'blobs' }
let(:entity) { project } let(:entity) { project }
end end
describe 'parsing results' do
let(:results) { project.repository.search_files_by_content('feature', 'master') }
let(:search_result) { results.first }
subject { described_class.parse_search_result(search_result) }
it "returns a valid FoundBlob" do
is_expected.to be_an Gitlab::SearchResults::FoundBlob
expect(subject.id).to be_nil
expect(subject.path).to eq('CHANGELOG')
expect(subject.filename).to eq('CHANGELOG')
expect(subject.basename).to eq('CHANGELOG')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(188)
expect(subject.data.lines[2]).to eq(" - Feature: Replace teams with group membership\n")
end
context 'when the matching filename contains a colon' do
let(:search_result) { "master:testdata/project::function1.yaml\x001\x00---\n" }
it 'returns a valid FoundBlob' do
expect(subject.filename).to eq('testdata/project::function1.yaml')
expect(subject.basename).to eq('testdata/project::function1')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("---\n")
end
end
context 'when the matching content contains a number surrounded by colons' do
let(:search_result) { "master:testdata/foo.txt\x001\x00blah:9:blah" }
it 'returns a valid FoundBlob' do
expect(subject.filename).to eq('testdata/foo.txt')
expect(subject.basename).to eq('testdata/foo')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq('blah:9:blah')
end
end
context 'when the matching content contains multiple null bytes' do
let(:search_result) { "master:testdata/foo.txt\x001\x00blah\x001\x00foo" }
it 'returns a valid FoundBlob' do
expect(subject.filename).to eq('testdata/foo.txt')
expect(subject.basename).to eq('testdata/foo')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("blah\x001\x00foo")
end
end
context 'when the search result ends with an empty line' do
let(:results) { project.repository.search_files_by_content('Role models', 'master') }
it 'returns a valid FoundBlob that ends with an empty line' do
expect(subject.filename).to eq('files/markdown/ruby-style-guide.md')
expect(subject.basename).to eq('files/markdown/ruby-style-guide')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("# Prelude\n\n> Role models are important. <br/>\n> -- Officer Alex J. Murphy / RoboCop\n\n")
end
end
context 'when the search returns non-ASCII data' do
context 'with UTF-8' do
let(:results) { project.repository.search_files_by_content('файл', 'master') }
it 'returns results as UTF-8' do
expect(subject.filename).to eq('encoding/russian.rb')
expect(subject.basename).to eq('encoding/russian')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("Хороший файл\n")
end
end
context 'with UTF-8 in the filename' do
let(:results) { project.repository.search_files_by_content('webhook', 'master') }
it 'returns results as UTF-8' do
expect(subject.filename).to eq('encoding/テスト.txt')
expect(subject.basename).to eq('encoding/テスト')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(3)
expect(subject.data).to include('WebHookの確認')
end
end
context 'with ISO-8859-1' do
let(:search_result) { "master:encoding/iso8859.txt\x001\x00\xC4\xFC\nmaster:encoding/iso8859.txt\x002\x00\nmaster:encoding/iso8859.txt\x003\x00foo\n".force_encoding(Encoding::ASCII_8BIT) }
it 'returns results as UTF-8' do
expect(subject.filename).to eq('encoding/iso8859.txt')
expect(subject.basename).to eq('encoding/iso8859')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("Äü\n\nfoo\n")
end
end
end
context "when filename has extension" do
let(:search_result) { "master:CONTRIBUTE.md\x005\x00- [Contribute to GitLab](#contribute-to-gitlab)\n" }
it { expect(subject.path).to eq('CONTRIBUTE.md') }
it { expect(subject.filename).to eq('CONTRIBUTE.md') }
it { expect(subject.basename).to eq('CONTRIBUTE') }
end
context "when file under directory" do
let(:search_result) { "master:a/b/c.md\x005\x00a b c\n" }
it { expect(subject.path).to eq('a/b/c.md') }
it { expect(subject.filename).to eq('a/b/c.md') }
it { expect(subject.basename).to eq('a/b/c') }
end
end
end end
describe 'wiki search' do describe 'wiki search' do
......
# coding: utf-8
require 'spec_helper'
describe Gitlab::Search::FoundBlob do
describe 'parsing results' do
let(:project) { create(:project, :public, :repository) }
let(:results) { project.repository.search_files_by_content('feature', 'master') }
let(:search_result) { results.first }
subject { described_class.new(content_match: search_result, project: project) }
it "returns a valid FoundBlob" do
is_expected.to be_an described_class
expect(subject.id).to be_nil
expect(subject.path).to eq('CHANGELOG')
expect(subject.filename).to eq('CHANGELOG')
expect(subject.basename).to eq('CHANGELOG')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(188)
expect(subject.data.lines[2]).to eq(" - Feature: Replace teams with group membership\n")
end
it "doesn't parses content if not needed" do
expect(subject).not_to receive(:parse_search_result)
expect(subject.project_id).to eq(project.id)
expect(subject.binary_filename).to eq('CHANGELOG')
end
it "parses content only once when needed" do
expect(subject).to receive(:parse_search_result).once.and_call_original
expect(subject.filename).to eq('CHANGELOG')
expect(subject.startline).to eq(188)
end
context 'when the matching filename contains a colon' do
let(:search_result) { "master:testdata/project::function1.yaml\x001\x00---\n" }
it 'returns a valid FoundBlob' do
expect(subject.filename).to eq('testdata/project::function1.yaml')
expect(subject.basename).to eq('testdata/project::function1')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("---\n")
end
end
context 'when the matching content contains a number surrounded by colons' do
let(:search_result) { "master:testdata/foo.txt\x001\x00blah:9:blah" }
it 'returns a valid FoundBlob' do
expect(subject.filename).to eq('testdata/foo.txt')
expect(subject.basename).to eq('testdata/foo')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq('blah:9:blah')
end
end
context 'when the matching content contains multiple null bytes' do
let(:search_result) { "master:testdata/foo.txt\x001\x00blah\x001\x00foo" }
it 'returns a valid FoundBlob' do
expect(subject.filename).to eq('testdata/foo.txt')
expect(subject.basename).to eq('testdata/foo')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("blah\x001\x00foo")
end
end
context 'when the search result ends with an empty line' do
let(:results) { project.repository.search_files_by_content('Role models', 'master') }
it 'returns a valid FoundBlob that ends with an empty line' do
expect(subject.filename).to eq('files/markdown/ruby-style-guide.md')
expect(subject.basename).to eq('files/markdown/ruby-style-guide')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("# Prelude\n\n> Role models are important. <br/>\n> -- Officer Alex J. Murphy / RoboCop\n\n")
end
end
context 'when the search returns non-ASCII data' do
context 'with UTF-8' do
let(:results) { project.repository.search_files_by_content('файл', 'master') }
it 'returns results as UTF-8' do
expect(subject.filename).to eq('encoding/russian.rb')
expect(subject.basename).to eq('encoding/russian')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("Хороший файл\n")
end
end
context 'with UTF-8 in the filename' do
let(:results) { project.repository.search_files_by_content('webhook', 'master') }
it 'returns results as UTF-8' do
expect(subject.filename).to eq('encoding/テスト.txt')
expect(subject.basename).to eq('encoding/テスト')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(3)
expect(subject.data).to include('WebHookの確認')
end
end
context 'with ISO-8859-1' do
let(:search_result) { "master:encoding/iso8859.txt\x001\x00\xC4\xFC\nmaster:encoding/iso8859.txt\x002\x00\nmaster:encoding/iso8859.txt\x003\x00foo\n".force_encoding(Encoding::ASCII_8BIT) }
it 'returns results as UTF-8' do
expect(subject.filename).to eq('encoding/iso8859.txt')
expect(subject.basename).to eq('encoding/iso8859')
expect(subject.ref).to eq('master')
expect(subject.startline).to eq(1)
expect(subject.data).to eq("Äü\n\nfoo\n")
end
end
end
context "when filename has extension" do
let(:search_result) { "master:CONTRIBUTE.md\x005\x00- [Contribute to GitLab](#contribute-to-gitlab)\n" }
it { expect(subject.path).to eq('CONTRIBUTE.md') }
it { expect(subject.filename).to eq('CONTRIBUTE.md') }
it { expect(subject.basename).to eq('CONTRIBUTE') }
end
context "when file under directory" do
let(:search_result) { "master:a/b/c.md\x005\x00a b c\n" }
it { expect(subject.path).to eq('a/b/c.md') }
it { expect(subject.filename).to eq('a/b/c.md') }
it { expect(subject.basename).to eq('a/b/c') }
end
end
end
...@@ -3,18 +3,19 @@ shared_examples 'file finder' do ...@@ -3,18 +3,19 @@ shared_examples 'file finder' do
let(:search_results) { subject.find(query) } let(:search_results) { subject.find(query) }
it 'finds by name' do it 'finds by name' do
filename, blob = search_results.find { |_, blob| blob.filename == expected_file_by_name } blob = search_results.find { |blob| blob.filename == expected_file_by_name }
expect(filename).to eq(expected_file_by_name)
expect(blob).to be_a(Gitlab::SearchResults::FoundBlob) expect(blob.filename).to eq(expected_file_by_name)
expect(blob).to be_a(Gitlab::Search::FoundBlob)
expect(blob.ref).to eq(subject.ref) expect(blob.ref).to eq(subject.ref)
expect(blob.data).not_to be_empty expect(blob.data).not_to be_empty
end end
it 'finds by content' do it 'finds by content' do
filename, blob = search_results.find { |_, blob| blob.filename == expected_file_by_content } blob = search_results.find { |blob| blob.filename == expected_file_by_content }
expect(filename).to eq(expected_file_by_content) expect(blob.filename).to eq(expected_file_by_content)
expect(blob).to be_a(Gitlab::SearchResults::FoundBlob) expect(blob).to be_a(Gitlab::Search::FoundBlob)
expect(blob.ref).to eq(subject.ref) expect(blob.ref).to eq(subject.ref)
expect(blob.data).not_to be_empty expect(blob.data).not_to be_empty
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment