Commit 3f2833b9 authored by Sean McGivern's avatar Sean McGivern

Merge branch '5024-filename-search' into 'master'

Add filters to blob search with and without Elasticsearch

Closes #5024

See merge request gitlab-org/gitlab-ee!5590
parents c094a742 54e0996f
...@@ -345,6 +345,15 @@ Example response: ...@@ -345,6 +345,15 @@ Example response:
This scope is available only if [Elasticsearch](../integration/elasticsearch.md) is enabled. This scope is available only if [Elasticsearch](../integration/elasticsearch.md) is enabled.
Filters are available for this scope:
- filename
- path
- extension
to use a filter simply include it in your query like so: `a query filename:some_name*`.
You may use wildcards (`*`) to use glob matching.
```bash ```bash
curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/search?scope=blobs&search=installation curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/search?scope=blobs&search=installation
``` ```
...@@ -649,6 +658,15 @@ Example response: ...@@ -649,6 +658,15 @@ Example response:
This scope is available only if [Elasticsearch](../integration/elasticsearch.md) is enabled. This scope is available only if [Elasticsearch](../integration/elasticsearch.md) is enabled.
Filters are available for this scope:
- filename
- path
- extension
to use a filter simply include it in your query like so: `a query filename:some_name*`.
You may use wildcards (`*`) to use glob matching.
```bash ```bash
curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/groups/6/search?scope=blobs&search=installation curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/groups/6/search?scope=blobs&search=installation
``` ```
...@@ -946,6 +964,15 @@ Example response: ...@@ -946,6 +964,15 @@ Example response:
### Scope: blobs ### Scope: blobs
Filters are available for this scope:
- filename
- path
- extension
to use a filter simply include it in your query like so: `a query filename:some_name*`.
You may use wildcards (`*`) to use glob matching.
```bash ```bash
curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=blobs&search=installation curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=blobs&search=installation
``` ```
......
---
title: Add filename filtering to code search with Elasticsearch
merge_request: 5590
author:
type: added
module EE
module Gitlab
module Search
module ParsedQuery
def elasticsearch_filters(object)
filters.map do |filter|
prepare_for_elasticsearch(object, filter)
end
end
private
def prepare_for_elasticsearch(object, filter)
type = filter[:type] || :wildcard
field = filter[:field] || filter[:name]
{
type => {
"#{object}.#{field}" => filter[:value]
}
}
end
end
end
end
end
...@@ -468,12 +468,18 @@ module Elasticsearch ...@@ -468,12 +468,18 @@ module Elasticsearch
def search_blob(query, type: :all, page: 1, per: 20, options: {}) def search_blob(query, type: :all, page: 1, per: 20, options: {})
page ||= 1 page ||= 1
query = ::Gitlab::Search::Query.new(query) do
filter :filename, field: :file_name
filter :path, parser: ->(input) { "*#{input.downcase}*" }
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
end
query_hash = { query_hash = {
query: { query: {
bool: { bool: {
must: { must: {
simple_query_string: { simple_query_string: {
query: query, query: query.term,
default_operator: :and, default_operator: :and,
fields: %w[blob.content blob.file_name] fields: %w[blob.content blob.file_name]
} }
...@@ -485,6 +491,8 @@ module Elasticsearch ...@@ -485,6 +491,8 @@ module Elasticsearch
from: per * (page - 1) from: per * (page - 1)
} }
query_hash[:query][:bool][:filter] += query.elasticsearch_filters(:blob)
if options[:repository_id] if options[:repository_id]
query_hash[:query][:bool][:filter] << { query_hash[:query][:bool][:filter] << {
terms: { terms: {
......
...@@ -23,6 +23,15 @@ describe Repository, :elastic do ...@@ -23,6 +23,15 @@ describe Repository, :elastic do
expect(project.repository.search('initial')[:commits][:total_count]).to eq(1) expect(project.repository.search('initial')[:commits][:total_count]).to eq(1)
end end
it 'can filter blobs' do
project = create :project, :repository
index!(project)
expect(project.repository.search('def | popen filename:test')[:blobs][:total_count]).to eq(1)
expect(project.repository.search('def | popen path:ruby')[:blobs][:total_count]).to eq(4)
expect(project.repository.search('def | popen extension:md')[:blobs][:total_count]).to eq(1)
end
def search_and_check!(on, query, type:, per: 1000) def search_and_check!(on, query, type:, per: 1000)
results = on.search(query, type: type, per: per)["#{type}s".to_sym][:results] results = on.search(query, type: type, per: per)["#{type}s".to_sym][:results]
......
...@@ -78,6 +78,36 @@ describe API::Search do ...@@ -78,6 +78,36 @@ describe API::Search do
end end
it_behaves_like 'response is correct', schema: 'public_api/v4/blobs' it_behaves_like 'response is correct', schema: 'public_api/v4/blobs'
context 'filters' do
it 'by filename' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon filename:PROCESS.md'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(1)
expect(json_response.first['filename']).to eq('PROCESS.md')
end
it 'by path' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon path:markdown'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(1)
json_response.each do |file|
expect(file['filename']).to match(%r[/markdown/])
end
end
it 'by extension' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon extension:md'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(3)
json_response.each do |file|
expect(file['filename']).to match(/\A.+\.md\z/)
end
end
end
end end
end end
......
...@@ -14,14 +14,21 @@ module Gitlab ...@@ -14,14 +14,21 @@ module Gitlab
end end
def find(query) def find(query)
by_content = find_by_content(query) query = Gitlab::Search::Query.new(query) do
filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i }
filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i }
filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i }
end
by_content = find_by_content(query.term)
already_found = Set.new(by_content.map(&:filename)) already_found = Set.new(by_content.map(&:filename))
by_filename = find_by_filename(query, except: already_found) by_filename = find_by_filename(query.term, except: already_found)
(by_content + by_filename) files = (by_content + by_filename)
.sort_by(&:filename) .sort_by(&:filename)
.map { |blob| [blob.filename, blob] }
query.filter_results(files).map { |blob| [blob.filename, blob] }
end end
private private
......
module Gitlab
module Search
class ParsedQuery
prepend EE::Gitlab::Search::ParsedQuery
attr_reader :term, :filters
def initialize(term, filters)
@term = term
@filters = filters
end
def filter_results(results)
filters = @filters.reject { |filter| filter[:matcher].nil? }
return unless filters
results.select do |result|
filters.all? do |filter|
filter[:matcher].call(filter, result)
end
end
end
end
end
end
module Gitlab
module Search
class Query < SimpleDelegator
def initialize(query, filter_opts = {}, &block)
@raw_query = query.dup
@filters = []
@filter_options = { default_parser: :downcase.to_proc }.merge(filter_opts)
self.instance_eval(&block) if block_given?
@query = Gitlab::Search::ParsedQuery.new(*extract_filters)
# set the ParsedQuery as our default delegator thanks to SimpleDelegator
super(@query)
end
private
def filter(name, **attributes)
filter = { parser: @filter_options[:default_parser], name: name }.merge(attributes)
@filters << filter
end
def filter_options(**options)
@filter_options.merge!(options)
end
def extract_filters
fragments = []
filters = @filters.each_with_object([]) do |filter, parsed_filters|
match = @raw_query.split.find { |part| part =~ /\A#{filter[:name]}:/ }
next unless match
input = match.split(':')[1..-1].join
next if input.empty?
filter[:value] = parse_filter(filter, input)
filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?')
fragments << match
parsed_filters << filter
end
query = (@raw_query.split - fragments).join(' ')
[query, filters]
end
def parse_filter(filter, input)
filter[:parser].call(input)
end
end
end
end
...@@ -3,11 +3,29 @@ require 'spec_helper' ...@@ -3,11 +3,29 @@ require 'spec_helper'
describe Gitlab::FileFinder do describe Gitlab::FileFinder do
describe '#find' do describe '#find' do
let(:project) { create(:project, :public, :repository) } let(:project) { create(:project, :public, :repository) }
subject { described_class.new(project, project.default_branch) }
it_behaves_like 'file finder' do it_behaves_like 'file finder' do
subject { described_class.new(project, project.default_branch) }
let(:expected_file_by_name) { 'files/images/wm.svg' } let(:expected_file_by_name) { 'files/images/wm.svg' }
let(:expected_file_by_content) { 'CHANGELOG' } let(:expected_file_by_content) { 'CHANGELOG' }
end end
it 'filters by name' do
results = subject.find('files filename:wm.svg')
expect(results.count).to eq(1)
end
it 'filters by path' do
results = subject.find('white path:images')
expect(results.count).to eq(1)
end
it 'filters by extension' do
results = subject.find('files extension:svg')
expect(results.count).to eq(1)
end
end end
end end
require 'spec_helper'
describe Gitlab::Search::Query do
let(:query) { 'base filter:wow anotherfilter:noway name:maybe other:mmm leftover' }
let(:subject) do
described_class.new(query) do
filter :filter
filter :name, parser: :upcase.to_proc
filter :other
end
end
it { expect(described_class).to be < SimpleDelegator }
it 'leaves undefined filters in the main query' do
expect(subject.term).to eq('base anotherfilter:noway leftover')
end
it 'parses filters' do
expect(subject.filters.count).to eq(3)
expect(subject.filters.map { |f| f[:value] }).to match_array(%w[wow MAYBE mmm])
end
context 'with an empty filter' do
let(:query) { 'some bar name: baz' }
it 'ignores empty filters' do
expect(subject.term).to eq('some bar name: baz')
end
end
context 'with a pipe' do
let(:query) { 'base | nofilter' }
it 'does not escape the pipe' do
expect(subject.term).to eq(query)
end
end
end
...@@ -312,6 +312,30 @@ describe API::Search do ...@@ -312,6 +312,30 @@ describe API::Search do
end end
it_behaves_like 'response is correct', schema: 'public_api/v4/blobs', size: 2 it_behaves_like 'response is correct', schema: 'public_api/v4/blobs', size: 2
context 'filters' do
it 'by filename' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon filename:PROCESS.md'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(2)
expect(json_response.first['filename']).to eq('PROCESS.md')
end
it 'by path' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon path:markdown'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(8)
end
it 'by extension' do
get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon extension:md'
expect(response).to have_gitlab_http_status(200)
expect(json_response.size).to eq(11)
end
end
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment