Commit fe605235 authored by Valery Sizov's avatar Valery Sizov

ES: all searches are done but it reqiores lots of refactoring

parent c0f6df43
......@@ -91,6 +91,11 @@ gem "six", '~> 0.2.0'
# Seed data
gem "seed-fu", '~> 2.3.5'
# Search
gem 'elasticsearch-model'
gem 'elasticsearch-rails'
gem 'gitlab-elasticsearch-git', require: "elasticsearch/git"
# Markdown and HTML processing
gem 'html-pipeline', '~> 1.11.0'
gem 'task_list', '~> 1.0.2', require: 'task_list/railtie'
......
......@@ -4,56 +4,45 @@ class SearchController < ApplicationController
layout 'search'
def show
# return if params[:search].nil? || params[:search].blank?
return if params[:search].nil? || params[:search].blank?
# @search_term = params[:search]
@search_term = params[:search]
# if params[:project_id].present?
# @project = Project.find_by(id: params[:project_id])
# @project = nil unless can?(current_user, :download_code, @project)
# end
# if params[:group_id].present?
# @group = Group.find_by(id: params[:group_id])
# @group = nil unless can?(current_user, :read_group, @group)
# end
# @scope = params[:scope]
# @show_snippets = params[:snippets].eql? 'true'
# @search_results =
# if @project
# unless %w(blobs notes issues merge_requests milestones wiki_blobs
# commits).include?(@scope)
# @scope = 'blobs'
# end
# Search::ProjectService.new(@project, current_user, params).execute
# elsif @show_snippets
# unless %w(snippet_blobs snippet_titles).include?(@scope)
# @scope = 'snippet_blobs'
# end
# Search::SnippetService.new(current_user, params).execute
# else
# unless %w(projects issues merge_requests milestones).include?(@scope)
# @scope = 'projects'
# end
# Search::GlobalService.new(current_user, params).execute
# end
# @objects = @search_results.objects(@scope, params[:page])
@group = Group.find_by(id: params[:group_id]) if params[:group_id].present?
if params[:project_id].present?
@project = Project.find_by(id: params[:project_id])
@project = nil unless can?(current_user, :download_code, @project)
end
if project
return access_denied! unless can?(current_user, :download_code, project)
@search_results = SearchService.new(current_user, params).project_search(project)
else
@search_results = SearchService.new(current_user, params).global_search
if params[:group_id].present?
@group = Group.find_by(id: params[:group_id])
@group = nil unless can?(current_user, :read_group, @group)
end
@search_results = SearchDecorator.new(@search_results, params[:type])
@scope = params[:scope]
@show_snippets = params[:snippets].eql? 'true'
@search_results =
if @project
unless %w(blobs notes issues merge_requests milestones wiki_blobs
commits).include?(@scope)
@scope = 'blobs'
end
Search::ProjectService.new(@project, current_user, params).execute
elsif @show_snippets
unless %w(snippet_blobs snippet_titles).include?(@scope)
@scope = 'snippet_blobs'
end
Search::SnippetService.new(current_user, params).execute
else
unless %w(projects issues merge_requests milestones).include?(@scope)
@scope = 'projects'
end
Search::GlobalService.new(current_user, params).execute
end
@objects = @search_results.objects(@scope, params[:page])
end
def autocomplete
......
......@@ -4,52 +4,49 @@ module ApplicationSearch
included do
include Elasticsearch::Model
# $ host git-elasticsearch-1.production.infra.home
# git-elasticsearch-1.production.infra.home has address 10.40.56.23
self.__elasticsearch__.client = Elasticsearch::Client.new host: Gitlab.config.elasticsearch.host, port: Gitlab.config.elasticsearch.port
index_name [Rails.application.class.parent_name.downcase, self.name.downcase, Rails.env].join('-')
settings \
index: {
query: {
default_field: :name
},
analysis: {
:analyzer => {
:index_analyzer => {
type: "custom",
tokenizer: "ngram_tokenizer",
filter: %w(lowercase asciifolding name_ngrams)
},
:search_analyzer => {
type: "custom",
tokenizer: "standard",
filter: %w(lowercase asciifolding )
}
},
tokenizer: {
ngram_tokenizer: {
type: "NGram",
min_gram: 1,
max_gram: 20,
token_chars: %w(letter digit connector_punctuation punctuation)
}
query: {
default_field: :name
},
filter: {
name_ngrams: {
type: "NGram",
max_gram: 20,
min_gram: 1
analysis: {
:analyzer => {
:my_analyzer => {
type: "custom",
tokenizer: "ngram_tokenizer",
filter: %w(lowercase asciifolding name_ngrams)
},
:search_analyzer => {
type: "custom",
tokenizer: "standard",
filter: %w(lowercase asciifolding)
}
},
tokenizer: {
ngram_tokenizer: {
type: "nGram",
min_gram: 1,
max_gram: 20,
token_chars: %w(letter digit connector_punctuation punctuation)
}
},
filter: {
name_ngrams: {
type: "nGram",
max_gram: 20,
min_gram: 1
}
}
}
}
}
after_commit lambda { Resque.enqueue(Elastic::BaseIndexer, :index, self.class.to_s, self.id) }, on: :create
after_commit lambda { Resque.enqueue(Elastic::BaseIndexer, :update, self.class.to_s, self.id) }, on: :update
after_commit lambda { Resque.enqueue(Elastic::BaseIndexer, :delete, self.class.to_s, self.id) }, on: :destroy
after_touch lambda { Resque.enqueue(Elastic::BaseIndexer, :update, self.class.to_s, self.id) }
after_commit ->{ ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id) }, on: :create
after_commit ->{ ElasticIndexerWorker.perform_async(:update, self.class.to_s, self.id) }, on: :update
after_commit ->{ ElasticIndexerWorker.perform_async(:delete, self.class.to_s, self.id) }, on: :destroy
end
module ClassMethods
......@@ -59,11 +56,7 @@ module ApplicationSearch
memo
end
{
pre_tags: ["gitlabelasticsearch→"],
post_tags: ["←gitlabelasticsearch"],
fields: es_fields
}
{ fields: es_fields }
end
end
end
......@@ -8,49 +8,33 @@ module IssuesSearch
indexes :id, type: :integer, index: :not_analyzed
indexes :iid, type: :integer, index: :not_analyzed
indexes :title, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :title, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer, index: :not_analyzed
indexes :author_id, type: :integer, index: :not_analyzed
#indexes :assignee_id, type: :integer, index: :not_analyzed
indexes :project, type: :nested
indexes :author, type: :nested
#indexes :assignee, type: :nested
indexes :title_sort, type: :string, index: 'not_analyzed'
indexes :updated_at_sort, type: :date, index: :not_analyzed
indexes :created_at_sort, type: :string, index: :not_analyzed
end
def as_indexed_json(options = {})
as_json(
include: {
project: { only: :id },
author: { only: :id },
#assignee: { only: :id }
author: { only: :id }
}
).merge({
title_sort: title.downcase,
updated_at_sort: updated_at,
created_at_sort: created_at
})
).merge({ updated_at_sort: updated_at })
end
def self.search(query, page: 1, per: 20, options: {})
page ||= 1
if options[:in].blank?
options[:in] = %w(title^2 description)
else
options[:in].push(%w(title^2 description) - options[:in])
end
def self.elastic_search(query, options: {})
options[:in] = %w(title^2 description)
query_hash = {
query: {
filtered: {
......@@ -62,9 +46,7 @@ module IssuesSearch
}
},
},
},
size: per,
from: per * (page.to_i - 1)
}
}
if query.blank?
......@@ -81,30 +63,13 @@ module IssuesSearch
}
end
options[:order] = :default if options[:order].blank?
order = case options[:order].to_sym
when :newest
{ created_at_sort: { order: :asc, mode: :min } }
when :oldest
{ created_at_sort: { order: :desc, mode: :min } }
when :recently_updated
{ updated_at_sort: { order: :asc, mode: :min } }
when :last_updated
{ updated_at_sort: { order: :desc, mode: :min } }
else
{ title_sort: { order: :asc, mode: :min } }
end
query_hash[:sort] = [
order,
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
if options[:highlight]
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
end
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
self.__elasticsearch__.search(query_hash)
end
end
......
......@@ -8,10 +8,10 @@ module MergeRequestsSearch
indexes :id, type: :integer, index: :not_analyzed
indexes :iid, type: :integer, index: :not_analyzed
indexes :target_branch, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :source_branch, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :title, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :target_branch, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :source_branch, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :title, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
......@@ -20,15 +20,11 @@ module MergeRequestsSearch
indexes :source_project_id, type: :integer, index: :not_analyzed
indexes :target_project_id, type: :integer, index: :not_analyzed
indexes :author_id, type: :integer, index: :not_analyzed
#indexes :assignee_id, type: :integer, index: :not_analyzed
indexes :source_project, type: :nested
indexes :target_project, type: :nested
indexes :author, type: :nested
#indexes :assignee, type: :nested
indexes :title_sort, type: :string, index: 'not_analyzed'
indexes :created_at_sort, type: :string, index: 'not_analyzed'
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
......@@ -37,25 +33,13 @@ module MergeRequestsSearch
include: {
source_project: { only: :id },
target_project: { only: :id },
author: { only: :id },
#assignee: { only: :id }
author: { only: :id }
}
).merge({
title_sort: title.downcase,
updated_at_sort: updated_at,
created_at_sort: created_at
})
).merge({ updated_at_sort: updated_at })
end
def self.search(query, page: 1, per: 20, options: {})
page ||= 1
if options[:in].blank?
options[:in] = %w(title^2 description)
else
options[:in].push(%w(title^2 description) - options[:in])
end
def self.elastic_search(query, options: {})
options[:in] = %w(title^2 description)
query_hash = {
query: {
......@@ -68,18 +52,7 @@ module MergeRequestsSearch
}
},
},
},
facets: {
targetProjectFacet: {
terms: {
field: :target_project_id,
all_terms: true,
size: Project.count
}
}
},
size: per,
from: per * (page.to_i - 1)
}
}
if query.blank?
......@@ -101,30 +74,13 @@ module MergeRequestsSearch
}
end
options[:order] = :default if options[:order].blank?
order = case options[:order].to_sym
when :newest
{ created_at_sort: { order: :asc, mode: :min } }
when :oldest
{ created_at_sort: { order: :desc, mode: :min } }
when :recently_updated
{ updated_at_sort: { order: :asc, mode: :min } }
when :last_updated
{ updated_at_sort: { order: :desc, mode: :min } }
else
{ title_sort: { order: :asc, mode: :min } }
end
query_hash[:sort] = [
order,
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
if options[:highlight]
query_hash[:highlight] = highlight_options(options[:in])
end
query_hash[:highlight] = highlight_options(options[:in])
self.__elasticsearch__.search(query_hash)
end
end
......
module GroupsSearch
module MilestonesSearch
extend ActiveSupport::Concern
included do
......@@ -6,33 +6,20 @@ module GroupsSearch
mappings do
indexes :id, type: :integer
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :path, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :title, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :project_id, type: :integer, index: :not_analyzed
indexes :created_at, type: :date
indexes :name_sort, type: :string, index: :not_analyzed
indexes :created_at_sort, type: :string, index: 'not_analyzed'
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options = {})
as_json.merge({
name_sort: name.downcase,
created_at_sort: created_at,
updated_at_sort: updated_at
})
as_json.merge({ updated_at_sort: updated_at })
end
def self.search(query, page: 1, per: 20, options: {})
page ||= 1
if options[:in].blank?
options[:in] = %w(name^10 path^5)
else
options[:in].push(%w(name^10 path^5) - options[:in])
end
def self.elastic_search(query, options: {})
options[:in] = %w(title^2 description)
query_hash = {
query: {
......@@ -45,9 +32,7 @@ module GroupsSearch
}
},
},
},
size: per,
from: per * (page.to_i - 1)
}
}
if query.blank?
......@@ -55,42 +40,22 @@ module GroupsSearch
query_hash[:track_scores] = true
end
if options[:gids]
if options[:project_ids]
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
ids: {
values: options[:gids]
terms: {
project_id: [options[:project_ids]].flatten
}
}
end
options[:order] = :default if options[:order].blank?
order = case options[:order].to_sym
when :newest
{ created_at_sort: { order: :asc, mode: :min } }
when :oldest
{ created_at_sort: { order: :desc, mode: :min } }
when :recently_updated
{ updated_at_sort: { order: :asc, mode: :min } }
when :last_updated
{ updated_at_sort: { order: :desc, mode: :min } }
else
{ name_sort: { order: :asc, mode: :min } }
end
query_hash[:sort] = [
order,
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
#query_hash[:sort] = [
#{ name_sort: { order: :asc, mode: :min }},
#:_score
#]
if options[:highlight]
query_hash[:highlight] = highlight_options(options[:in])
end
query_hash[:highlight] = highlight_options(options[:in])
self.__elasticsearch__.search(query_hash)
end
......
module NotesSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer
indexes :note, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :project_id, type: :integer, index: :not_analyzed
indexes :created_at, type: :date
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options = {})
as_json.merge({ updated_at_sort: updated_at })
end
def self.elastic_search(query, options: {})
options[:in] = ["note"]
query_hash = {
query: {
filtered: {
query: {match: {note: query}},
},
}
}
if query.blank?
query_hash[:query][:filtered][:query] = { match_all: {}}
query_hash[:track_scores] = true
end
if options[:project_ids]
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
terms: {
project_id: [options[:project_ids]].flatten
}
}
end
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
query_hash[:highlight] = highlight_options(options[:in])
self.__elasticsearch__.search(query_hash)
end
end
end
......@@ -7,11 +7,11 @@ module ProjectsSearch
mappings do
indexes :id, type: :integer, index: 'not_analyzed'
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :path, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :name_with_namespace, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :path_with_namespace, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :path, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :name_with_namespace, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :path_with_namespace, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :description, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :namespace_id, type: :integer, index: 'not_analyzed'
......@@ -20,49 +20,17 @@ module ProjectsSearch
indexes :visibility_level, type: :integer, index: 'not_analyzed'
indexes :last_activity_at, type: :date
indexes :last_pushed_at, type: :date
indexes :owners, type: :nested
indexes :masters, type: :nested
indexes :developers, type: :nested
indexes :reporters, type: :nested
indexes :guests, type: :nested
indexes :categories, type: :nested
indexes :name_with_namespace_sort, type: :string, index: 'not_analyzed'
indexes :created_at_sort, type: :string, index: 'not_analyzed'
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options={})
as_json(
include: {
owners: { only: :id },
masters: { only: :id },
developers: { only: :id },
reporters: { only: :id },
guests: { only: :id },
categories: { only: :name}
}
).merge({
def as_indexed_json(options = {})
as_json.merge({
name_with_namespace: name_with_namespace,
name_with_namespace_sort: name_with_namespace.downcase,
path_with_namespace: path_with_namespace,
updated_at_sort: updated_at,
created_at_sort: created_at
path_with_namespace: path_with_namespace
})
end
def self.search(query, page: 1, per: 20, options: {})
page ||= 1
if options[:in].blank?
options[:in] = %w(name^10 name_with_namespace^2 path_with_namespace path^9)
else
options[:in].push(%w(name^10 name_with_namespace^2 path_with_namespace path^9) - options[:in])
end
def self.elastic_search(query, options: {})
options[:in] = %w(name^10 name_with_namespace^2 path_with_namespace path^9)
query_hash = {
query: {
......@@ -76,25 +44,6 @@ module ProjectsSearch
},
},
},
facets: {
namespaceFacet: {
terms: {
field: :namespace_id,
all_terms: true,
size: Namespace.count
}
},
categoryFacet: {
terms: {
field: "categories.name",
all_terms: true,
# FIXME. Remove to_a
size: Project.category_counts.to_a.count
}
}
},
size: per,
from: per * (page.to_i - 1)
}
if query.blank?
......@@ -135,18 +84,6 @@ module ProjectsSearch
}
end
if options[:category]
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
nested: {
path: :categories,
filter: {
term: { "categories.name" => options[:category] }
}
}
}
end
if options[:non_archived]
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
......@@ -169,9 +106,9 @@ module ProjectsSearch
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
nested: {
path: :owners,
path: :owner,
filter: {
term: { "owners.id" => options[:owner_id] }
term: { "owner.id" => options[:owner_id] }
}
}
}
......@@ -186,28 +123,10 @@ module ProjectsSearch
}
end
options[:order] = :default if options[:order].blank?
order = case options[:order].to_sym
when :newest
{ created_at_sort: { order: :asc, mode: :min } }
when :oldest
{ created_at_sort: { order: :desc, mode: :min } }
when :recently_updated
{ updated_at_sort: { order: :asc, mode: :min } }
when :last_updated
{ updated_at_sort: { order: :desc, mode: :min } }
else
{ name_with_namespace_sort: { order: :asc, mode: :min } }
end
query_hash[:sort] = [
order,
:_score
]
if options[:highlight]
query_hash[:highlight] = highlight_options(options[:in])
end
query_hash[:sort] = [:_score]
query_hash[:highlight] = highlight_options(options[:in])
self.__elasticsearch__.search(query_hash)
end
......
......@@ -23,14 +23,8 @@ module RepositoriesSearch
Project.find_each do |project|
if project.repository.exists? && !project.repository.empty?
begin
project.repository.index_commits
rescue
end
begin
project.repository.index_blobs
rescue
end
project.repository.index_commits
project.repository.index_blobs
end
end
end
......
module SnippetsSearch
extend ActiveSupport::Concern
included do
include ApplicationSearch
mappings do
indexes :id, type: :integer, index: :not_analyzed
indexes :title, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :file_name, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :content, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :created_at, type: :date
indexes :updated_at, type: :date
indexes :state, type: :string
indexes :project_id, type: :integer, index: :not_analyzed
indexes :author_id, type: :integer, index: :not_analyzed
indexes :project, type: :nested
indexes :author, type: :nested
indexes :updated_at_sort, type: :date, index: :not_analyzed
end
def as_indexed_json(options = {})
as_json(
include: {
project: { only: :id },
author: { only: :id }
}
)
end
def self.elastic_search(query, options: {})
options[:in] = %w(title file_name)
query_hash = {
query: {
filtered: {
query: {
multi_match: {
fields: options[:in],
query: "#{query}",
operator: :and
}
},
},
}
}
if query.blank?
query_hash[:query][:filtered][:query] = { match_all: {}}
query_hash[:track_scores] = true
end
if options[:ids]
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
terms: {
id: [options[:ids]].flatten
}
}
end
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
self.__elasticsearch__.search(query_hash)
end
def self.elastic_search_code(query, options: {})
options[:in] = %w(title file_name)
query_hash = {
query: {
filtered: {
query: {match: {content: query}},
},
}
}
if options[:ids]
query_hash[:query][:filtered][:filter] ||= { and: [] }
query_hash[:query][:filtered][:filter][:and] << {
terms: {
id: [options[:ids]].flatten
}
}
end
query_hash[:sort] = [
{ updated_at_sort: { order: :desc, mode: :min } },
:_score
]
query_hash[:highlight] = { fields: options[:in].inject({}) { |a, o| a[o.to_sym] = {} } }
self.__elasticsearch__.search(query_hash)
end
end
end
......@@ -6,21 +6,17 @@ module UsersSearch
mappings do
indexes :id, type: :integer
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :username, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :email, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :name, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :username, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :bio, type: :string
indexes :skype, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :skype, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :linkedin, type: :string
indexes :twitter, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, index_analyzer: :index_analyzer
indexes :twitter, type: :string, index_options: 'offsets', search_analyzer: :search_analyzer, analyzer: :my_analyzer
indexes :state, type: :string
indexes :website_url, type: :string
indexes :created_at, type: :date
indexes :admin, type: :boolean
indexes :name_sort, type: :string, index: 'not_analyzed'
indexes :created_at_sort, type: :string, index: 'not_analyzed'
indexes :updated_at_sort, type: :string, index: 'not_analyzed'
end
def as_indexed_json(options = {})
......@@ -31,16 +27,8 @@ module UsersSearch
})
end
def self.search(query, page: 1, per: 20, options: {})
page ||= 1
per ||= 20
if options[:in].blank?
options[:in] = %w(name^3 username^2 email)
else
options[:in].push(%w(name^3 username^2 email) - options[:in])
end
def self.elastic_search(query, options: {})
options[:in] = %w(name^3 username^2 email)
query_hash = {
query: {
......@@ -53,9 +41,7 @@ module UsersSearch
}
},
},
},
size: per,
from: per * (page.to_i - 1)
}
}
if query.blank?
......@@ -81,30 +67,10 @@ module UsersSearch
}
end
options[:order] = :default if options[:order].blank?
order = case options[:order].to_sym
when :newest
{ created_at_sort: { order: :asc, mode: :min } }
when :oldest
{ created_at_sort: { order: :desc, mode: :min } }
when :recently_updated
{ updated_at_sort: { order: :asc, mode: :min } }
when :last_updated
{ updated_at_sort: { order: :desc, mode: :min } }
else
{ name_sort: { order: :asc, mode: :min } }
end
query_hash[:sort] = [
order,
:_score
]
if options[:highlight]
query_hash[:highlight] = highlight_options(options[:in])
end
query_hash[:sort] = [:_score]
query_hash[:highlight] = highlight_options(options[:in])
self.__elasticsearch__.search(query_hash)
end
end
......
module WikiRepositoriesSearch
extend ActiveSupport::Concern
included do
include Elasticsearch::Git::Repository
self.__elasticsearch__.client = Elasticsearch::Client.new host: Gitlab.config.elasticsearch.host, port: Gitlab.config.elasticsearch.port
def repository_id
"wiki_#{project.id}"
end
def self.repositories_count
Project.where(wiki_enabled: true).count
end
def client_for_indexing
self.__elasticsearch__.client
end
def self.import
ProjectWiki.__elasticsearch__.create_index! force: true
Project.where(wiki_enabled: true).find_each do |project|
unless project.wiki.empty?
project.wiki.index_blobs
end
end
end
end
end
......@@ -19,7 +19,6 @@ require 'file_size_validator'
class Group < Namespace
include Gitlab::ConfigHelper
include Referable
include GroupsSearch
has_many :group_members, dependent: :destroy, as: :source, class_name: 'GroupMember'
alias_method :members, :group_members
......
......@@ -24,6 +24,7 @@ class Milestone < ActiveRecord::Base
include Sortable
include Referable
include StripAttribute
include MilestonesSearch
belongs_to :project
has_many :issues
......
......@@ -26,6 +26,7 @@ class Note < ActiveRecord::Base
include Gitlab::CurrentSettings
include Participable
include Mentionable
include NotesSearch
default_value_for :system, false
......
......@@ -943,6 +943,10 @@ class Project < ActiveRecord::Base
false
end
def wiki
ProjectWiki.new(self, self.owner)
end
def reference_issue_tracker?
default_issues_tracker? || jira_tracker_active?
end
......
class ProjectWiki
include Gitlab::ShellAdapter
include WikiRepositoriesSearch
MARKUPS = {
'Markdown' => :md,
......@@ -12,6 +13,8 @@ class ProjectWiki
# Returns a string describing what went wrong after
# an operation fails.
attr_reader :error_message
attr_reader :project
def initialize(project, user = nil)
@project = project
......
......@@ -108,6 +108,12 @@ class Repository
commits
end
def find_commits_by_message_with_elastic(query)
project.repository.search(query, type: :commit)[:commits][:results].map do |result|
commit result["_source"]["commit"]["sha"]
end
end
def find_branch(name)
raw_repository.branches.find { |branch| branch.name == name }
end
......@@ -660,7 +666,53 @@ class Repository
Gitlab::Popen.popen(args, path_to_repo).first.scrub.split(/^--$/)
end
def parse_search_result(result)
def parse_search_result(result, elastic = false)
if elastic
parse_search_result_from_elastic(result)
else
parse_search_result_from_grep(result)
end
end
def parse_search_result_from_elastic(result)
ref = result["_source"]["blob"]["oid"]
filename = result["_source"]["blob"]["path"]
content = result["_source"]["blob"]["content"]
total_lines = content.lines.size
term = result["highlight"]["blob.content"][0].match(/gitlabelasticsearch→(.*)←gitlabelasticsearch/)[1]
found_line_number = 0
content.each_line.each_with_index do |line, index|
if line.include?(term)
found_line_number = index
break
end
end
from = if found_line_number >= 2
found_line_number - 2
else
found_line_number
end
to = if (total_lines - found_line_number) > 3
found_line_number + 2
else
found_line_number
end
data = content.lines[from..to]
OpenStruct.new(
filename: filename,
ref: ref,
startline: from + 1,
data: data.join
)
end
def parse_search_result_from_grep(result)
ref = nil
filename = nil
startline = 0
......
......@@ -21,6 +21,7 @@ class Snippet < ActiveRecord::Base
include Participable
include Referable
include Sortable
include SnippetsSearch
default_value_for :visibility_level, Snippet::PRIVATE
......
......@@ -12,7 +12,11 @@ module Search
projects = projects.in_namespace(group.id) if group
project_ids = projects.pluck(:id)
Gitlab::SearchResults.new(project_ids, params[:search])
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::SearchResults.new(project_ids, params[:search])
else
Gitlab::SearchResults.new(project_ids, params[:search])
end
end
end
end
......@@ -7,9 +7,15 @@ module Search
end
def execute
Gitlab::ProjectSearchResults.new(project.id,
params[:search],
params[:repository_ref])
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::ProjectSearchResults.new(project.id,
params[:search],
params[:repository_ref])
else
Gitlab::ProjectSearchResults.new(project.id,
params[:search],
params[:repository_ref])
end
end
end
end
......@@ -8,7 +8,12 @@ module Search
def execute
snippet_ids = Snippet.accessible_to(current_user).pluck(:id)
Gitlab::SnippetSearchResults.new(snippet_ids, params[:search])
if Gitlab.config.elasticsearch.enabled
Gitlab::Elastic::SnippetSearchResults.new(snippet_ids, params[:search])
else
Gitlab::SnippetSearchResults.new(snippet_ids, params[:search])
end
end
end
end
class SearchService < BaseService
def global_search
query = params[:search]
{
groups: search_in_groups(query),
users: search_in_users(query),
projects: search_in_projects(query),
merge_requests: search_in_merge_requests(query),
issues: search_in_issues(query),
repositories: search_in_repository(query),
}
end
def project_search(project)
query = params[:search]
{
groups: {},
users: {},
projects: {},
merge_requests: search_in_merge_requests(query, project),
issues: search_in_issues(query, project),
repositories: search_in_repository(query, project),
}
end
private
def search_in_projects(query)
opt = {
pids: projects_ids,
order: params[:order],
fields: %w(name^10 path^9 description^5
name_with_namespace^2 path_with_namespace),
highlight: true
}
group = Group.find_by(id: params[:group_id]) if params[:group_id].present?
opt[:namespace_id] = group.id if group
opt[:category] = params[:category] if params[:category].present?
begin
response = Project.search(query, options: opt, page: page)
categories_list = if query.blank?
Project.category_counts.map do |category|
{ category: category.name, count: category.count }
end
else
response.response["facets"]["categoryFacet"]["terms"].map do |term|
{ category: term["term"], count: term["count"] }
end
end
{
records: response.records,
results: response.results,
response: response.response,
total_count: response.total_count,
namespaces: namespaces(response.response["facets"]["namespaceFacet"]["terms"]),
categories: categories_list
}
rescue Exception => e
{}
end
end
def search_in_groups(query)
opt = {
gids: current_user ? current_user.authorized_groups.ids : [],
order: params[:order],
fields: %w(name^10 path^5 description),
highlight: true
}
begin
response = Group.search(query, options: opt, page: page)
{
records: response.records,
results: response.results,
response: response.response,
total_count: response.total_count
}
rescue Exception => e
{}
end
end
def search_in_users(query)
opt = {
active: true,
order: params[:order],
highlight: true
}
begin
response = User.search(query, options: opt, page: page)
{
records: response.records,
results: response.results,
response: response.response,
total_count: response.total_count
}
rescue Exception => e
{}
end
end
def search_in_merge_requests(query, project = nil)
opt = {
projects_ids: project ? [project.id] : projects_ids,
order: params[:order],
highlight: true
}
begin
response = MergeRequest.search(query, options: opt, page: page)
{
records: response.records,
results: response.results,
response: response.response,
total_count: response.total_count
}
rescue Exception => e
{}
end
end
def search_in_issues(query, project = nil)
opt = {
projects_ids: project ? [project.id] : projects_ids,
order: params[:order]
}
begin
response = Issue.search(query, options: opt, page: page)
{
records: response.records,
results: response.results,
response: response.response,
total_count: response.total_count
}
rescue Exception => e
{}
end
end
def search_in_repository(query, project = nil)
opt = {
repository_id: project ? [project.id] : projects_ids,
highlight: true,
order: params[:order]
}
if params[:language].present? && params[:language] != 'All'
opt.merge!({ language: params[:language] })
end
begin
res = Repository.search(query, options: opt, page: page)
res[:blobs][:projects] = project_filter(res[:blobs][:repositories]) || []
res[:commits][:projects] = project_filter(res[:commits][:repositories]) || []
res
rescue Exception => e
{}
end
end
def projects_ids
@allowed_projects_ids ||= begin
if params[:namespace].present?
namespace = Namespace.find_by(path: params[:namespace])
if namespace
return namespace.projects.where(id: known_projects_ids).pluck(:id)
end
end
known_projects_ids
end
end
def page
return @current_page if defined?(@current_page)
@current_page = params[:page].to_i
@current_page = 1 if @current_page == 0
@current_page
end
def known_projects_ids
known_projects_ids = []
known_projects_ids += current_user.known_projects.pluck(:id) if current_user
known_projects_ids + Project.public_or_internal_only(current_user).pluck(:id)
end
def project_filter(es_results)
terms = es_results.
select { |term| term['count'] > 0 }.
inject({}) do |memo, term|
memo[term["term"]] = term["count"]
memo
end
projects_meta_data = Project.joins(:namespace).where(id: terms.keys).
pluck(['projects.name','projects.path',
'namespaces.name as namespace_name',
'namespaces.path as namespace_path',
'projects.id'].join(","))
if projects_meta_data.any?
projects_meta_data.map do |meta|
{
name: meta[2] + ' / ' + meta[0],
path: meta[3] + ' / ' + meta[1],
count: terms[meta[4]]
}
end.sort { |x, y| y[:count] <=> x[:count] }
else
[]
end
end
def namespaces(terms)
founded_terms = terms.select { |term| term['count'] > 0 }
grouped_terms = founded_terms.inject({}) do |memo, term|
memo[term["term"]] = term["count"]
memo
end
namespaces_meta_data = Namespace.find(grouped_terms.keys)
if namespaces_meta_data.any?
namespaces_meta_data.map do |namespace|
{ namespace: namespace, count: grouped_terms[namespace.id] }
end.sort { |x, y| y[:count] <=> x[:count] }
else
[]
end
end
end
- blob = @project.repository.parse_search_result(blob)
- blob = @project.repository.parse_search_result(blob, true)
.blob-result
.file-holder
.file-title
......
- wiki_blob = @project.repository.parse_search_result(wiki_blob)
- wiki_blob = @project.repository.parse_search_result(wiki_blob, true)
.blob-result
.file-holder
.file-title
......
class ElasticIndexerWorker
include Sidekiq::Worker
sidekiq_options queue: :elasticsearch
Client = Elasticsearch::Client.new(host: Gitlab.config.elasticsearch.host,
port: Gitlab.config.elasticsearch.port)
def perform(operation, klass, record_id, options = {})
klass = "Snippet" if klass =~ /Snippet$/
cklass = klass.constantize
case operation.to_s
when /index|update/
record = cklass.find(record_id)
record.__elasticsearch__.client = Client
record.__elasticsearch__.__send__ "#{operation}_document"
when /delete/
Client.delete index: cklass.index_name, type: cklass.document_type, id: record_id
end
end
end
......@@ -136,6 +136,14 @@ production: &base
# The location where LFS objects are stored (default: shared/lfs-objects).
# storage_path: shared/lfs-objects
## Elasticsearch (EE only)
# Enable it if you are going to use elasticsearch instead of
# regular database search
elasticsearch:
enabled: false
host: localhost
port: 9200
## GitLab Pages
pages:
enabled: false
......
......@@ -246,6 +246,12 @@ Settings.gitlab['restricted_signup_domains'] ||= []
Settings.gitlab['import_sources'] ||= ['github','bitbucket','gitlab','gitorious','google_code','fogbugz','git']
#
# Elasticseacrh
#
Settings['elasticsearch'] ||= Settingslogic.new({})
Settings.elasticsearch['enabled'] = false if Settings.elasticsearch['enabled'].nil?
#
# CI
#
......
module Gitlab
module Elastic
class ProjectSearchResults < SearchResults
attr_reader :project, :repository_ref
def initialize(project_id, query, repository_ref = nil)
@project = Project.find(project_id)
@repository_ref = if repository_ref.present?
repository_ref
else
nil
end
@query = query
end
def objects(scope, page = nil)
case scope
when 'notes'
notes.records.page(page).per(per_page)
when 'blobs'
blobs.response.page(page).per(per_page)
when 'wiki_blobs'
wiki_blobs.response.page(page).per(per_page)
when 'commits'
Kaminari.paginate_array(commits).page(page).per(per_page)
else
super
end
end
def total_count
@total_count ||= issues_count + merge_requests_count + blobs_count +
notes_count + wiki_blobs_count + commits_count
end
def blobs_count
@blobs_count ||= blobs.total_count
end
def notes_count
@notes_count ||= notes.total_count
end
def wiki_blobs_count
@wiki_blobs_count ||= wiki_blobs.total_count
end
def commits_count
@commits_count ||= commits.count
end
private
def blobs
if project.empty_repo? || query.blank?
Kaminari.paginate_array([])
else
project.repository.search(query, type: :blob, options: {highlight: true})[:blobs][:results]
end
end
def wiki_blobs
if project.wiki_enabled? && !project.wiki.empty? && query.present?
project.wiki.search(query, type: :blob, options: {highlight: true})[:blobs][:results]
else
Kaminari.paginate_array([])
end
end
def notes
opt = {
project_ids: limit_project_ids
}
Note.elastic_search(query, options: opt)
end
def commits
if project.empty_repo? || query.blank?
Kaminari.paginate_array([])
else
project.repository.find_commits_by_message_with_elastic(query)
end
end
def limit_project_ids
[project.id]
end
end
end
end
\ No newline at end of file
module Gitlab
module Elastic
class SearchResults
attr_reader :query
# Limit search results by passed project ids
# It allows us to search only for projects user has access to
attr_reader :limit_project_ids
def initialize(limit_project_ids, query)
@limit_project_ids = limit_project_ids || Project.all
@query = Shellwords.shellescape(query) if query.present?
end
def objects(scope, page = nil)
case scope
when 'projects'
projects.records.page(page).per(per_page)
when 'issues'
issues.records.page(page).per(per_page)
when 'merge_requests'
merge_requests.records.page(page).per(per_page)
when 'milestones'
milestones.records.page(page).per(per_page)
else
Kaminari.paginate_array([])
end
end
def total_count
@total_count ||= projects_count + issues_count + merge_requests_count + milestones_count
end
def projects_count
@projects_count ||= projects.total_count
end
def issues_count
@issues_count ||= issues.total_count
end
def merge_requests_count
@merge_requests_count ||= merge_requests.total_count
end
def milestones_count
@milestones_count ||= milestones.total_count
end
def empty?
total_count.zero?
end
private
def projects
opt = {
pids: limit_project_ids
}
@projects = Project.elastic_search(query, options: opt)
end
def issues
opt = {
projects_ids: limit_project_ids
}
if query =~ /#(\d+)\z/
issues = Issue.where(project_id: limit_project_ids).where(iid: $1)
else
issues = Issue.elastic_search(query, options: opt)
end
end
def milestones
opt = {
projects_ids: limit_project_ids
}
milestones = Milestone.elastic_search(query, options: opt)
end
def merge_requests
opt = {
projects_ids: limit_project_ids
}
if query =~ /[#!](\d+)\z/
merge_requests = MergeRequest.in_projects(limit_project_ids).where(iid: $1)
else
merge_requests = MergeRequest.elastic_search(query, options: opt)
end
end
def default_scope
'projects'
end
def per_page
20
end
end
end
end
module Gitlab
module Elastic
class SnippetSearchResults < SearchResults
attr_reader :limit_snippet_ids
def initialize(limit_snippet_ids, query)
@limit_snippet_ids = limit_snippet_ids
@query = query
end
def objects(scope, page = nil)
case scope
when 'snippet_titles'
snippet_titles.records.page(page).per(per_page)
when 'snippet_blobs'
# We process whole list of items then paginate it. Not too smart
# Should be refactored in the CE side first to prevent conflicts hell
Kaminari.paginate_array(
snippet_blobs.records.map do
|snippet| chunk_snippet(snippet)
end
).page(page).per(per_page)
else
super
end
end
def total_count
@total_count ||= snippet_titles_count + snippet_blobs_count
end
def snippet_titles_count
@snippet_titles_count ||= snippet_titles.total_count
end
def snippet_blobs_count
@snippet_blobs_count ||= snippet_blobs.total_count
end
private
def snippet_titles
opt = {
ids: limit_snippet_ids
}
Snippet.elastic_search(query, options: opt)
end
def snippet_blobs
opt = {
ids: limit_snippet_ids
}
Snippet.elastic_search_code(query, options: opt)
end
def default_scope
'snippet_blobs'
end
# Get an array of line numbers surrounding a matching
# line, bounded by min/max.
#
# @returns Array of line numbers
def bounded_line_numbers(line, min, max)
lower = line - surrounding_lines > min ? line - surrounding_lines : min
upper = line + surrounding_lines < max ? line + surrounding_lines : max
(lower..upper).to_a
end
# Returns a sorted set of lines to be included in a snippet preview.
# This ensures matching adjacent lines do not display duplicated
# surrounding code.
#
# @returns Array, unique and sorted.
def matching_lines(lined_content)
used_lines = []
lined_content.each_with_index do |line, line_number|
used_lines.concat bounded_line_numbers(
line_number,
0,
lined_content.size
) if line.include?(query)
end
used_lines.uniq.sort
end
# 'Chunkify' entire snippet. Splits the snippet data into matching lines +
# surrounding_lines() worth of unmatching lines.
#
# @returns a hash with {snippet_object, snippet_chunks:{data,start_line}}
def chunk_snippet(snippet)
lined_content = snippet.content.split("\n")
used_lines = matching_lines(lined_content)
snippet_chunk = []
snippet_chunks = []
snippet_start_line = 0
last_line = -1
# Go through each used line, and add consecutive lines as a single chunk
# to the snippet chunk array.
used_lines.each do |line_number|
if last_line < 0
# Start a new chunk.
snippet_start_line = line_number
snippet_chunk << lined_content[line_number]
elsif last_line == line_number - 1
# Consecutive line, continue chunk.
snippet_chunk << lined_content[line_number]
else
# Non-consecutive line, add chunk to chunk array.
snippet_chunks << {
data: snippet_chunk.join("\n"),
start_line: snippet_start_line + 1
}
# Start a new chunk.
snippet_chunk = [lined_content[line_number]]
snippet_start_line = line_number
end
last_line = line_number
end
# Add final chunk to chunk array
snippet_chunks << {
data: snippet_chunk.join("\n"),
start_line: snippet_start_line + 1
}
# Return snippet with chunk array
{ snippet_object: snippet, snippet_chunks: snippet_chunks }
end
# Defines how many unmatching lines should be
# included around the matching lines in a snippet
def surrounding_lines
3
end
end
end
end
namespace :gitlab do
namespace :elastic do
desc "Indexing repositories"
task index_repository: :environment do
Repository.import
end
desc "Indexing all wikis"
task index_wiki: :environment do
ProjectWiki.import
end
desc "Create indexes in the Elasticsearch from database records"
task create_index: :environment do
[Project, User, Issue, MergeRequest, Snippet, Note, Milestone].each do |klass|
klass.__elasticsearch__.create_index!
klass.import
end
end
end
end
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment