From 7357209f91ae4c0b504f47e36220bd04a0e2feca Mon Sep 17 00:00:00 2001
From: Mario de la Ossa <mariodelaossa@gmail.com>
Date: Wed, 6 Jun 2018 18:14:10 -0600
Subject: [PATCH] Implement filtering by filename on code search

---
 .../unreleased/ce-5024-filename-search.yml    |  5 ++
 doc/api/search.md                             |  9 +++
 lib/gitlab/file_finder.rb                     | 17 ++++--
 lib/gitlab/search/parsed_query.rb             | 23 ++++++++
 lib/gitlab/search/query.rb                    | 55 +++++++++++++++++++
 spec/lib/gitlab/file_finder_spec.rb           | 20 ++++++-
 spec/lib/gitlab/search/query_spec.rb          | 39 +++++++++++++
 spec/requests/api/search_spec.rb              | 24 ++++++++
 8 files changed, 186 insertions(+), 6 deletions(-)
 create mode 100644 changelogs/unreleased/ce-5024-filename-search.yml
 create mode 100644 lib/gitlab/search/parsed_query.rb
 create mode 100644 lib/gitlab/search/query.rb
 create mode 100644 spec/lib/gitlab/search/query_spec.rb

diff --git a/changelogs/unreleased/ce-5024-filename-search.yml b/changelogs/unreleased/ce-5024-filename-search.yml
new file mode 100644
index 00000000000..a8bf9b1f802
--- /dev/null
+++ b/changelogs/unreleased/ce-5024-filename-search.yml
@@ -0,0 +1,5 @@
+---
+title: Add filename filtering to code search
+merge_request: 19509
+author:
+type: added
diff --git a/doc/api/search.md b/doc/api/search.md
index 107ddaffa6a..9716f682ace 100644
--- a/doc/api/search.md
+++ b/doc/api/search.md
@@ -776,6 +776,15 @@ Example response:
 
 ### Scope: blobs
 
+Filters are available for this scope:
+- filename
+- path
+- extension
+
+to use a filter simply include it in your query like so: `a query filename:some_name*`.
+
+You may use wildcards (`*`) to use glob matching.
+
 ```bash
 curl --request GET --header "PRIVATE-TOKEN: 9koXpg98eAheJpvBs5tK" https://gitlab.example.com/api/v4/projects/6/search?scope=blobs&search=installation
 ```
diff --git a/lib/gitlab/file_finder.rb b/lib/gitlab/file_finder.rb
index f42088f980e..af8270c8db8 100644
--- a/lib/gitlab/file_finder.rb
+++ b/lib/gitlab/file_finder.rb
@@ -14,14 +14,21 @@ module Gitlab
     end
 
     def find(query)
-      by_content = find_by_content(query)
+      query = Gitlab::Search::Query.new(query) do
+        filter :filename, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}$/i }
+        filter :path, matcher: ->(filter, blob) { blob.filename =~ /#{filter[:regex_value]}/i }
+        filter :extension, matcher: ->(filter, blob) { blob.filename =~ /\.#{filter[:regex_value]}$/i }
+      end
+
+      by_content = find_by_content(query.term)
 
       already_found = Set.new(by_content.map(&:filename))
-      by_filename = find_by_filename(query, except: already_found)
+      by_filename = find_by_filename(query.term, except: already_found)
+
+      files = (by_content + by_filename)
+              .sort_by(&:filename)
 
-      (by_content + by_filename)
-        .sort_by(&:filename)
-        .map { |blob| [blob.filename, blob] }
+      query.filter_results(files).map { |blob| [blob.filename, blob] }
     end
 
     private
diff --git a/lib/gitlab/search/parsed_query.rb b/lib/gitlab/search/parsed_query.rb
new file mode 100644
index 00000000000..23595f23f01
--- /dev/null
+++ b/lib/gitlab/search/parsed_query.rb
@@ -0,0 +1,23 @@
+module Gitlab
+  module Search
+    class ParsedQuery
+      attr_reader :term, :filters
+
+      def initialize(term, filters)
+        @term = term
+        @filters = filters
+      end
+
+      def filter_results(results)
+        filters = @filters.reject { |filter| filter[:matcher].nil? }
+        return unless filters
+
+        results.select do |result|
+          filters.all? do |filter|
+            filter[:matcher].call(filter, result)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/search/query.rb b/lib/gitlab/search/query.rb
new file mode 100644
index 00000000000..8583bce7792
--- /dev/null
+++ b/lib/gitlab/search/query.rb
@@ -0,0 +1,55 @@
+module Gitlab
+  module Search
+    class Query < SimpleDelegator
+      def initialize(query, filter_opts = {}, &block)
+        @raw_query = query.dup
+        @filters = []
+        @filter_options = { default_parser: :downcase.to_proc }.merge(filter_opts)
+
+        self.instance_eval(&block) if block_given?
+
+        @query = Gitlab::Search::ParsedQuery.new(*extract_filters)
+        # set the ParsedQuery as our default delegator thanks to SimpleDelegator
+        super(@query)
+      end
+
+      private
+
+      def filter(name, **attributes)
+        filter = { parser: @filter_options[:default_parser], name: name }.merge(attributes)
+
+        @filters << filter
+      end
+
+      def filter_options(**options)
+        @filter_options.merge!(options)
+      end
+
+      def extract_filters
+        fragments = []
+
+        filters = @filters.each_with_object([]) do |filter, parsed_filters|
+          match = @raw_query.split.find { |part| part =~ /\A#{filter[:name]}:/ }
+          next unless match
+
+          input = match.split(':')[1..-1].join
+          next if input.empty?
+
+          filter[:value] = parse_filter(filter, input)
+          filter[:regex_value] = Regexp.escape(filter[:value]).gsub('\*', '.*?')
+          fragments << match
+
+          parsed_filters << filter
+        end
+
+        query = (@raw_query.split - fragments).join(' ')
+
+        [query, filters]
+      end
+
+      def parse_filter(filter, input)
+        filter[:parser].call(input)
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/file_finder_spec.rb b/spec/lib/gitlab/file_finder_spec.rb
index d6d9e4001a3..b49c5817131 100644
--- a/spec/lib/gitlab/file_finder_spec.rb
+++ b/spec/lib/gitlab/file_finder_spec.rb
@@ -3,11 +3,29 @@ require 'spec_helper'
 describe Gitlab::FileFinder do
   describe '#find' do
     let(:project) { create(:project, :public, :repository) }
+    subject { described_class.new(project, project.default_branch) }
 
     it_behaves_like 'file finder' do
-      subject { described_class.new(project, project.default_branch) }
       let(:expected_file_by_name) { 'files/images/wm.svg' }
       let(:expected_file_by_content) { 'CHANGELOG' }
     end
+
+    it 'filters by name' do
+      results = subject.find('files filename:wm.svg')
+
+      expect(results.count).to eq(1)
+    end
+
+    it 'filters by path' do
+      results = subject.find('white path:images')
+
+      expect(results.count).to eq(1)
+    end
+
+    it 'filters by extension' do
+      results = subject.find('files extension:svg')
+
+      expect(results.count).to eq(1)
+    end
   end
 end
diff --git a/spec/lib/gitlab/search/query_spec.rb b/spec/lib/gitlab/search/query_spec.rb
new file mode 100644
index 00000000000..2d00428fffa
--- /dev/null
+++ b/spec/lib/gitlab/search/query_spec.rb
@@ -0,0 +1,39 @@
+require 'spec_helper'
+
+describe Gitlab::Search::Query do
+  let(:query) { 'base filter:wow anotherfilter:noway name:maybe other:mmm leftover' }
+  let(:subject) do
+    described_class.new(query) do
+      filter :filter
+      filter :name, parser: :upcase.to_proc
+      filter :other
+    end
+  end
+
+  it { expect(described_class).to be < SimpleDelegator }
+
+  it 'leaves undefined filters in the main query' do
+    expect(subject.term).to eq('base anotherfilter:noway leftover')
+  end
+
+  it 'parses filters' do
+    expect(subject.filters.count).to eq(3)
+    expect(subject.filters.map { |f| f[:value] }).to match_array(%w[wow MAYBE mmm])
+  end
+
+  context 'with an empty filter' do
+    let(:query) { 'some bar name: baz' }
+
+    it 'ignores empty filters' do
+      expect(subject.term).to eq('some bar name: baz')
+    end
+  end
+
+  context 'with a pipe' do
+    let(:query) { 'base | nofilter' }
+
+    it 'does not escape the pipe' do
+      expect(subject.term).to eq(query)
+    end
+  end
+end
diff --git a/spec/requests/api/search_spec.rb b/spec/requests/api/search_spec.rb
index aca4aa40027..f8e468be170 100644
--- a/spec/requests/api/search_spec.rb
+++ b/spec/requests/api/search_spec.rb
@@ -312,6 +312,30 @@ describe API::Search do
         end
 
         it_behaves_like 'response is correct', schema: 'public_api/v4/blobs', size: 2
+
+        context 'filters' do
+          it 'by filename' do
+            get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon filename:PROCESS.md'
+
+            expect(response).to have_gitlab_http_status(200)
+            expect(json_response.size).to eq(2)
+            expect(json_response.first['filename']).to eq('PROCESS.md')
+          end
+
+          it 'by path' do
+            get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon path:markdown'
+
+            expect(response).to have_gitlab_http_status(200)
+            expect(json_response.size).to eq(8)
+          end
+
+          it 'by extension' do
+            get api("/projects/#{repo_project.id}/search", user), scope: 'blobs', search: 'mon extension:md'
+
+            expect(response).to have_gitlab_http_status(200)
+            expect(json_response.size).to eq(11)
+          end
+        end
       end
     end
   end
-- 
2.30.9