Commit d7b8bd27 authored by Thong Kuah's avatar Thong Kuah

Merge branch '12548-decouple' into 'master'

[Elasticsearch] Decouple code for versioned schema for repository and WIki on ruby side

Closes #12548

See merge request gitlab-org/gitlab!17991
parents f99e7f8c abb256e8
...@@ -7,6 +7,17 @@ require 'gitlab/current_settings' ...@@ -7,6 +7,17 @@ require 'gitlab/current_settings'
Gitlab.ee do Gitlab.ee do
require 'elasticsearch/model' require 'elasticsearch/model'
### Monkey patches
Elasticsearch::Model::Response::Records.prepend GemExtensions::Elasticsearch::Model::Response::Records
Elasticsearch::Model::Adapter::Multiple::Records.prepend GemExtensions::Elasticsearch::Model::Adapter::Multiple::Records
Elasticsearch::Model::Indexing::InstanceMethods.prepend GemExtensions::Elasticsearch::Model::Indexing::InstanceMethods
Elasticsearch::Model::Adapter::ActiveRecord::Importing.prepend GemExtensions::Elasticsearch::Model::Adapter::ActiveRecord::Importing
Elasticsearch::Model::Client::InstanceMethods.prepend GemExtensions::Elasticsearch::Model::Client
Elasticsearch::Model::Client::ClassMethods.prepend GemExtensions::Elasticsearch::Model::Client
Elasticsearch::Model::ClassMethods.prepend GemExtensions::Elasticsearch::Model::Client
Elasticsearch::Model.singleton_class.prepend GemExtensions::Elasticsearch::Model::Client
### Modified from elasticsearch-model/lib/elasticsearch/model.rb ### Modified from elasticsearch-model/lib/elasticsearch/model.rb
[ [
...@@ -32,15 +43,4 @@ Gitlab.ee do ...@@ -32,15 +43,4 @@ Gitlab.ee do
target.respond_to?(:as_indexed_json) ? target.__send__(:as_indexed_json, options) : super target.respond_to?(:as_indexed_json) ? target.__send__(:as_indexed_json, options) : super
end end
CODE CODE
### Monkey patches
Elasticsearch::Model::Response::Records.prepend GemExtensions::Elasticsearch::Model::Response::Records
Elasticsearch::Model::Adapter::Multiple::Records.prepend GemExtensions::Elasticsearch::Model::Adapter::Multiple::Records
Elasticsearch::Model::Indexing::InstanceMethods.prepend GemExtensions::Elasticsearch::Model::Indexing::InstanceMethods
Elasticsearch::Model::Adapter::ActiveRecord::Importing.prepend GemExtensions::Elasticsearch::Model::Adapter::ActiveRecord::Importing
Elasticsearch::Model::Client::InstanceMethods.prepend GemExtensions::Elasticsearch::Model::Client
Elasticsearch::Model::Client::ClassMethods.prepend GemExtensions::Elasticsearch::Model::Client
Elasticsearch::Model::ClassMethods.prepend GemExtensions::Elasticsearch::Model::Client
Elasticsearch::Model.singleton_class.prepend GemExtensions::Elasticsearch::Model::Client
end end
...@@ -88,8 +88,8 @@ def instrument_classes(instrumentation) ...@@ -88,8 +88,8 @@ def instrument_classes(instrumentation)
instrumentation.instrument_instance_methods(Gitlab::Highlight) instrumentation.instrument_instance_methods(Gitlab::Highlight)
Gitlab.ee do Gitlab.ee do
instrumentation.instrument_methods(Elasticsearch::Git::Repository) instrumentation.instrument_instance_methods(Elastic::Latest::GitInstanceProxy)
instrumentation.instrument_instance_methods(Elasticsearch::Git::Repository) instrumentation.instrument_instance_methods(Elastic::Latest::GitClassProxy)
instrumentation.instrument_instance_methods(Search::GlobalService) instrumentation.instrument_instance_methods(Search::GlobalService)
instrumentation.instrument_instance_methods(Search::ProjectService) instrumentation.instrument_instance_methods(Search::ProjectService)
......
...@@ -33,6 +33,7 @@ module Elastic ...@@ -33,6 +33,7 @@ module Elastic
# Add to the registry if it's a class (and not in intermediate module) # Add to the registry if it's a class (and not in intermediate module)
Elasticsearch::Model::Registry.add(self) if self.is_a?(Class) Elasticsearch::Model::Registry.add(self) if self.is_a?(Class)
if self < ActiveRecord::Base
after_commit on: :create do after_commit on: :create do
if Gitlab::CurrentSettings.elasticsearch_indexing? && self.searchable? if Gitlab::CurrentSettings.elasticsearch_indexing? && self.searchable?
ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id, self.es_id) ElasticIndexerWorker.perform_async(:index, self.class.to_s, self.id, self.es_id)
...@@ -63,6 +64,7 @@ module Elastic ...@@ -63,6 +64,7 @@ module Elastic
end end
end end
end end
end
class_methods do class_methods do
def __elasticsearch__ def __elasticsearch__
......
...@@ -4,92 +4,18 @@ module Elastic ...@@ -4,92 +4,18 @@ module Elastic
module RepositoriesSearch module RepositoriesSearch
extend ActiveSupport::Concern extend ActiveSupport::Concern
included do include ApplicationVersionedSearch
include Elasticsearch::Git::Repository
index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-')
def repository_id
project.id
end
def es_type
'blob'
end
delegate :id, to: :project, prefix: true included do
delegate(:find_commits_by_message_with_elastic, :delete_index_for_commits_and_blobs, :elastic_search, to: :__elasticsearch__)
def client_for_indexing
self.__elasticsearch__.client
end
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20)
response = project.repository.search(query, type: :commit, page: page, per: per_page)[:commits][:results]
commits = response.map do |result|
commit result["_source"]["commit"]["sha"]
end.compact
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response.total_count, limit: per_page, offset: offset)
end
end
class_methods do
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {})
response = Repository.search(
query,
type: :commit,
page: page,
per: per_page,
options: options
)[:commits][:results]
response_count = response.total_count
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq
projects = Project.includes(:route).where(id: project_ids).index_by(&:id)
commits = response.map do |result|
project_id = result["_source"]["commit"]["rid"].to_i
project = projects[project_id]
if project.nil? || project.pending_delete?
response_count -= 1
next
end
raw_commit = Gitlab::Git::Commit.new( class << self
project.repository.raw, delegate(:find_commits_by_message_with_elastic, to: :__elasticsearch__)
prepare_commit(result['_source']['commit']),
lazy_load_parents: true
)
Commit.new(raw_commit, project)
end end
# Remove results for deleted projects
commits.compact!
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response_count, limit: per_page, offset: offset)
end end
def prepare_commit(raw_commit_hash) def index_commits_and_blobs(from_rev: nil, to_rev: nil)
{ ::ElasticCommitIndexerWorker.perform_async(project.id, from_rev, to_rev)
id: raw_commit_hash['sha'],
message: raw_commit_hash['message'],
parent_ids: nil,
author_name: raw_commit_hash['author']['name'],
author_email: raw_commit_hash['author']['email'],
authored_date: Time.parse(raw_commit_hash['author']['time']).utc,
committer_name: raw_commit_hash['committer']['name'],
committer_email: raw_commit_hash['committer']['email'],
committed_date: Time.parse(raw_commit_hash['committer']['time']).utc
}
end
end end
end end
end end
...@@ -4,36 +4,12 @@ module Elastic ...@@ -4,36 +4,12 @@ module Elastic
module WikiRepositoriesSearch module WikiRepositoriesSearch
extend ActiveSupport::Concern extend ActiveSupport::Concern
included do include ApplicationVersionedSearch
include Elasticsearch::Git::Repository
index_name [Rails.application.class.parent_name.downcase, Rails.env].join('-') delegate(:delete_index_for_commits_and_blobs, :elastic_search, to: :__elasticsearch__)
def repository_id
"wiki_#{project.id}"
end
def es_type
'wiki_blob'
end
delegate :id, to: :project, prefix: true
def client_for_indexing
self.__elasticsearch__.client
end
def index_wiki_blobs(to_sha = nil) def index_wiki_blobs(to_sha = nil)
ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true) ElasticCommitIndexerWorker.perform_async(project.id, nil, to_sha, true)
end end
def self.import
Project.with_wiki_enabled.find_each do |project|
if project.use_elasticsearch? && !project.wiki.empty?
project.wiki.index_wiki_blobs
end
end
end
end
end end
end end
...@@ -26,12 +26,50 @@ module Elastic ...@@ -26,12 +26,50 @@ module Elastic
my_ngram_analyzer: { my_ngram_analyzer: {
tokenizer: 'my_ngram_tokenizer', tokenizer: 'my_ngram_tokenizer',
filter: ['lowercase'] filter: ['lowercase']
},
path_analyzer: {
type: 'custom',
tokenizer: 'path_tokenizer',
filter: %w(lowercase asciifolding)
},
sha_analyzer: {
type: 'custom',
tokenizer: 'sha_tokenizer',
filter: %w(lowercase asciifolding)
},
code_analyzer: {
type: 'custom',
tokenizer: 'whitespace',
filter: %w(code edgeNGram_filter lowercase asciifolding)
},
code_search_analyzer: {
type: 'custom',
tokenizer: 'whitespace',
filter: %w(lowercase asciifolding)
} }
}, },
filter: { filter: {
my_stemmer: { my_stemmer: {
type: 'stemmer', type: 'stemmer',
name: 'light_english' name: 'light_english'
},
code: {
type: "pattern_capture",
preserve_original: true,
patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)",
"(?=([\\p{Lu}]+[\\p{L}]+))",
'"((?:\\"|[^"]|\\")*)"', # capture terms inside quotes, removing the quotes
"'((?:\\'|[^']|\\')*)'", # same as above, for single quotes
'\.([^.]+)(?=\.|\s|\Z)', # separate terms on periods
'\/?([^\/]+)(?=\/|\b)' # separate path terms (like/this/one)
]
},
edgeNGram_filter: {
type: 'edgeNGram',
min_gram: 2,
max_gram: 40
} }
}, },
tokenizer: { tokenizer: {
...@@ -40,6 +78,16 @@ module Elastic ...@@ -40,6 +78,16 @@ module Elastic
min_gram: 2, min_gram: 2,
max_gram: 3, max_gram: 3,
token_chars: %w(letter digit) token_chars: %w(letter digit)
},
sha_tokenizer: {
type: "edgeNGram",
min_gram: 5,
max_gram: 40,
token_chars: %w(letter digit)
},
path_tokenizer: {
type: 'path_hierarchy',
reverse: true
} }
} }
} }
......
# frozen_string_literal: true
module Elastic
module Latest
module GitClassProxy
def elastic_search(query, type: :all, page: 1, per: 20, options: {})
results = { blobs: [], commits: [] }
case type.to_sym
when :all
results[:blobs] = search_blob(query, page: page, per: per, options: options)
results[:commits] = search_commit(query, page: page, per: per, options: options)
results[:wiki_blobs] = search_blob(query, type: :wiki_blob, page: page, per: per, options: options)
when :commit
results[:commits] = search_commit(query, page: page, per: per, options: options)
when :blob, :wiki_blob
results[type.to_s.pluralize.to_sym] = search_blob(query, type: type, page: page, per: per, options: options)
end
results
end
private
def search_commit(query, page: 1, per: 20, options: {})
page ||= 1
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
query_hash = {
query: {
bool: {
must: {
simple_query_string: {
fields: fields,
query: query,
default_operator: :and
}
},
filter: [{ term: { 'type' => 'commit' } }]
}
},
size: per,
from: per * (page - 1)
}
if query.blank?
query_hash[:query][:bool][:must] = { match_all: {} }
query_hash[:track_scores] = true
end
if options[:repository_id]
query_hash[:query][:bool][:filter] << {
terms: {
'commit.rid' => [options[:repository_id]].flatten
}
}
end
if options[:additional_filter]
query_hash[:query][:bool][:filter] << options[:additional_filter]
end
if options[:highlight]
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
memo[field.to_sym] = {}
end
query_hash[:highlight] = {
pre_tags: ["gitlabelasticsearch→"],
post_tags: ["←gitlabelasticsearch"],
fields: es_fields
}
end
options[:order] = :default if options[:order].blank?
query_hash[:sort] = [:_score]
res = search(query_hash)
{
results: res.results,
total_count: res.size
}
end
def search_blob(query, type: :blob, page: 1, per: 20, options: {})
page ||= 1
query = ::Gitlab::Search::Query.new(query) do
filter :filename, field: :file_name
filter :path, parser: ->(input) { "*#{input.downcase}*" }
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
end
query_hash = {
query: {
bool: {
must: {
simple_query_string: {
query: query.term,
default_operator: :and,
fields: %w[blob.content blob.file_name]
}
},
filter: [
{ term: { type: type } }
]
}
},
size: per,
from: per * (page - 1)
}
query_hash[:query][:bool][:filter] += query.elasticsearch_filters(:blob)
if options[:repository_id]
query_hash[:query][:bool][:filter] << {
terms: {
'blob.rid' => [options[:repository_id]].flatten
}
}
end
if options[:additional_filter]
query_hash[:query][:bool][:filter] << options[:additional_filter]
end
if options[:language]
query_hash[:query][:bool][:filter] << {
terms: {
'blob.language' => [options[:language]].flatten
}
}
end
options[:order] = :default if options[:order].blank?
query_hash[:sort] = [:_score]
if options[:highlight]
query_hash[:highlight] = {
pre_tags: ["gitlabelasticsearch→"],
post_tags: ["←gitlabelasticsearch"],
order: "score",
fields: {
"blob.content" => {},
"blob.file_name" => {}
}
}
end
res = search(query_hash)
{
results: res.results,
total_count: res.size
}
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
module GitInstanceProxy
extend ActiveSupport::Concern
class_methods do
def methods_for_all_write_targets
super + [:delete_index_for_commits_and_blobs]
end
end
def es_parent
"project_#{project_id}"
end
def elastic_search(query, type: :all, page: 1, per: 20, options: {})
options[:repository_id] = repository_id if options[:repository_id].nil?
self.class.elastic_search(query, type: type, page: page, per: per, options: options)
end
def delete_index_for_commits_and_blobs(wiki: false)
types =
if wiki
%w[wiki_blob]
else
%w[commit blob]
end
client.delete_by_query(
index: index_name,
routing: es_parent,
body: {
query: {
bool: {
filter: [
{
terms: {
type: types
}
},
{
has_parent: {
parent_type: 'project',
query: {
term: {
id: project_id
}
}
}
}
]
}
}
}
)
end
private
def repository_id
raise NotImplementedError
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class ProjectWikiClassProxy < ApplicationClassProxy
include GitClassProxy
def es_type
'wiki_blob'
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class ProjectWikiInstanceProxy < ApplicationInstanceProxy
include GitInstanceProxy
delegate :project, to: :target
delegate :id, to: :project, prefix: true
private
def repository_id
"wiki_#{project.id}"
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class RepositoryClassProxy < ApplicationClassProxy
include GitClassProxy
def es_type
'blob'
end
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {})
response = elastic_search(
query,
type: :commit,
page: page,
per: per_page,
options: options
)[:commits][:results]
response_count = response.total_count
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id)
commits = response.map do |result|
project_id = result["_source"]["commit"]["rid"].to_i
project = projects[project_id]
if project.nil? || project.pending_delete?
response_count -= 1
next
end
raw_commit = Gitlab::Git::Commit.new(
project.repository.raw,
prepare_commit(result['_source']['commit']),
lazy_load_parents: true
)
Commit.new(raw_commit, project)
end
# Remove results for deleted projects
commits.compact!
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response_count, limit: per_page, offset: offset)
end
private
def prepare_commit(raw_commit_hash)
{
id: raw_commit_hash['sha'],
message: raw_commit_hash['message'],
parent_ids: nil,
author_name: raw_commit_hash['author']['name'],
author_email: raw_commit_hash['author']['email'],
authored_date: Time.parse(raw_commit_hash['author']['time']).utc,
committer_name: raw_commit_hash['committer']['name'],
committer_email: raw_commit_hash['committer']['email'],
committed_date: Time.parse(raw_commit_hash['committer']['time']).utc
}
end
end
end
end
# frozen_string_literal: true
module Elastic
module Latest
class RepositoryInstanceProxy < ApplicationInstanceProxy
include GitInstanceProxy
delegate :project, to: :target
delegate :id, to: :project, prefix: true
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20)
response = elastic_search(query, type: :commit, page: page, per: per_page)[:commits][:results]
commits = response.map do |result|
commit result["_source"]["commit"]["sha"]
end.compact
# Before "map" we had a paginated array so we need to recover it
offset = per_page * ((page || 1) - 1)
Kaminari.paginate_array(commits, total_count: response.total_count, limit: per_page, offset: offset)
end
private
def repository_id
project.id
end
end
end
end
...@@ -19,6 +19,8 @@ module Elastic ...@@ -19,6 +19,8 @@ module Elastic
end end
end end
private
def proxy_class_name def proxy_class_name
"#{@data_class.name}ClassProxy" "#{@data_class.name}ClassProxy"
end end
......
...@@ -12,6 +12,8 @@ module Elastic ...@@ -12,6 +12,8 @@ module Elastic
generate_forwarding generate_forwarding
end end
private
def proxy_class_name def proxy_class_name
"#{@data_class.name}InstanceProxy" "#{@data_class.name}InstanceProxy"
end end
......
...@@ -16,8 +16,6 @@ module Elastic ...@@ -16,8 +16,6 @@ module Elastic
version.const_get(proxy_class_name, false).new(data_target) version.const_get(proxy_class_name, false).new(data_target)
end end
private
# TODO: load from db table https://gitlab.com/gitlab-org/gitlab/issues/12555 # TODO: load from db table https://gitlab.com/gitlab-org/gitlab/issues/12555
def elastic_reading_target def elastic_reading_target
strong_memoize(:elastic_reading_target) do strong_memoize(:elastic_reading_target) do
...@@ -32,6 +30,8 @@ module Elastic ...@@ -32,6 +30,8 @@ module Elastic
end end
end end
private
def get_data_class(klass) def get_data_class(klass)
klass < ActiveRecord::Base ? klass.base_class : klass klass < ActiveRecord::Base ? klass.base_class : klass
end end
......
# frozen_string_literal: true
module Elastic
module V12p1
ProjectWikiClassProxy = Elastic::Latest::ProjectWikiClassProxy
end
end
# frozen_string_literal: true
module Elastic
module V12p1
ProjectWikiInstanceProxy = Elastic::Latest::ProjectWikiInstanceProxy
end
end
# frozen_string_literal: true # frozen_string_literal: true
module Elasticsearch module Elastic
module Git module V12p1
RepositoryClassProxy = Elastic::Latest::RepositoryClassProxy
end end
end end
# frozen_string_literal: true
module Elastic
module V12p1
RepositoryInstanceProxy = Elastic::Latest::RepositoryInstanceProxy
end
end
# frozen_string_literal: true
module Elasticsearch
module Git
module EncoderHelper
extend ActiveSupport::Concern
included do
def encode!(message)
return unless message.respond_to? :force_encoding
# if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
# encoding message to detect encoding
if detect && detect[:encoding]
message.force_encoding(detect[:encoding])
end
# encode and clean the bad chars
message.replace clean(message)
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
end
private
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
.gsub("\0".encode("UTF-8"), "")
end
end
end
end
end
# frozen_string_literal: true
module Elasticsearch
module Git
module Model
extend ActiveSupport::Concern
included do
extend ActiveModel::Naming
include ActiveModel::Model
include Elasticsearch::Model
env = if defined?(::Rails)
::Rails.env.to_s
else
nil
end
index_name [self.name.downcase, 'index', env].compact.join('-')
document_type 'doc'
settings \
index: {
analysis: {
analyzer: {
path_analyzer: {
type: 'custom',
tokenizer: 'path_tokenizer',
filter: %w(lowercase asciifolding)
},
sha_analyzer: {
type: 'custom',
tokenizer: 'sha_tokenizer',
filter: %w(lowercase asciifolding)
},
code_analyzer: {
type: 'custom',
tokenizer: 'whitespace',
filter: %w(code edgeNGram_filter lowercase asciifolding)
},
code_search_analyzer: {
type: 'custom',
tokenizer: 'whitespace',
filter: %w(lowercase asciifolding)
}
},
tokenizer: {
sha_tokenizer: {
type: "edgeNGram",
min_gram: 5,
max_gram: 40,
token_chars: %w(letter digit)
},
path_tokenizer: {
type: 'path_hierarchy',
reverse: true
}
},
filter: {
code: {
type: "pattern_capture",
preserve_original: true,
patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)",
"(?=([\\p{Lu}]+[\\p{L}]+))",
'"((?:\\"|[^"]|\\")*)"', # capture terms inside quotes, removing the quotes
"'((?:\\'|[^']|\\')*)'", # same as above, for single quotes
'\.([^.]+)(?=\.|\s|\Z)', # separate terms on periods
'\/?([^\/]+)(?=\/|\b)' # separate path terms (like/this/one)
]
},
edgeNGram_filter: {
type: 'edgeNGram',
min_gram: 2,
max_gram: 40
}
}
}
}
end
end
end
end
# frozen_string_literal: true
module Elasticsearch
module Git
module Repository
extend ActiveSupport::Concern
included do
include Elasticsearch::Git::Model
include Elasticsearch::Git::EncoderHelper
def es_parent
"project_#{project_id}"
end
def es_type
'blob'
end
def index_commits_and_blobs(from_rev: nil, to_rev: nil)
::ElasticCommitIndexerWorker.perform_async(project_id, from_rev, to_rev)
end
def delete_index_for_commits_and_blobs(wiki: false)
types =
if wiki
%w[wiki_blob]
else
%w[commit blob]
end
client_for_indexing.delete_by_query(
index: self.class.index_name,
routing: es_parent,
body: {
query: {
bool: {
filter: [
{
terms: {
type: types
}
},
{
has_parent: {
parent_type: 'project',
query: {
term: {
id: project_id
}
}
}
}
]
}
}
}
)
end
def search(query, type: :all, page: 1, per: 20, options: {})
options[:repository_id] = repository_id if options[:repository_id].nil?
self.class.search(query, type: type, page: page, per: per, options: options)
end
# For Overwrite
def repository_id
@repository_id
end
unless method_defined?(:path_to_repo)
def path_to_repo
@path_to_repo.presence || raise(NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method')
end
end
def client_for_indexing
@client_for_indexing ||= Elasticsearch::Client.new retry_on_failure: 5
end
end
class_methods do
def search(query, type: :all, page: 1, per: 20, options: {})
results = { blobs: [], commits: [] }
case type.to_sym
when :all
results[:blobs] = search_blob(query, page: page, per: per, options: options)
results[:commits] = search_commit(query, page: page, per: per, options: options)
results[:wiki_blobs] = search_blob(query, type: :wiki_blob, page: page, per: per, options: options)
when :commit
results[:commits] = search_commit(query, page: page, per: per, options: options)
when :blob, :wiki_blob
results[type.to_s.pluralize.to_sym] = search_blob(query, type: type, page: page, per: per, options: options)
end
results
end
def search_commit(query, page: 1, per: 20, options: {})
page ||= 1
fields = %w(message^10 sha^5 author.name^2 author.email^2 committer.name committer.email).map {|i| "commit.#{i}"}
query_hash = {
query: {
bool: {
must: {
simple_query_string: {
fields: fields,
query: query,
default_operator: :and
}
},
filter: [{ term: { 'type' => 'commit' } }]
}
},
size: per,
from: per * (page - 1)
}
if query.blank?
query_hash[:query][:bool][:must] = { match_all: {} }
query_hash[:track_scores] = true
end
if options[:repository_id]
query_hash[:query][:bool][:filter] << {
terms: {
'commit.rid' => [options[:repository_id]].flatten
}
}
end
if options[:additional_filter]
query_hash[:query][:bool][:filter] << options[:additional_filter]
end
if options[:highlight]
es_fields = fields.map { |field| field.split('^').first }.each_with_object({}) do |field, memo|
memo[field.to_sym] = {}
end
query_hash[:highlight] = {
pre_tags: ["gitlabelasticsearch→"],
post_tags: ["←gitlabelasticsearch"],
fields: es_fields
}
end
options[:order] = :default if options[:order].blank?
query_hash[:sort] = [:_score]
res = self.__elasticsearch__.search(query_hash)
{
results: res.results,
total_count: res.size
}
end
def search_blob(query, type: :blob, page: 1, per: 20, options: {})
page ||= 1
query = ::Gitlab::Search::Query.new(query) do
filter :filename, field: :file_name
filter :path, parser: ->(input) { "*#{input.downcase}*" }
filter :extension, field: :path, parser: ->(input) { '*.' + input.downcase }
end
query_hash = {
query: {
bool: {
must: {
simple_query_string: {
query: query.term,
default_operator: :and,
fields: %w[blob.content blob.file_name]
}
},
filter: [
{ term: { type: type } }
]
}
},
size: per,
from: per * (page - 1)
}
query_hash[:query][:bool][:filter] += query.elasticsearch_filters(:blob)
if options[:repository_id]
query_hash[:query][:bool][:filter] << {
terms: {
'blob.rid' => [options[:repository_id]].flatten
}
}
end
if options[:additional_filter]
query_hash[:query][:bool][:filter] << options[:additional_filter]
end
if options[:language]
query_hash[:query][:bool][:filter] << {
terms: {
'blob.language' => [options[:language]].flatten
}
}
end
options[:order] = :default if options[:order].blank?
query_hash[:sort] = [:_score]
if options[:highlight]
query_hash[:highlight] = {
pre_tags: ["gitlabelasticsearch→"],
post_tags: ["←gitlabelasticsearch"],
order: "score",
fields: {
"blob.content" => {},
"blob.file_name" => {}
}
}
end
res = self.__elasticsearch__.search(query_hash)
{
results: res.results,
total_count: res.size
}
end
end
end
end
end
...@@ -22,17 +22,6 @@ module Gitlab ...@@ -22,17 +22,6 @@ module Gitlab
@project = project @project = project
@wiki = wiki @wiki = wiki
# We accept any form of settings, including string and array
# This is why JSON is needed
@vars = {
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env
}
@vars['GITALY_CONNECTION_INFO'] = {
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage)).to_json
# Use the eager-loaded association if available. # Use the eager-loaded association if available.
@index_status = project.index_status @index_status = project.index_status
end end
...@@ -47,7 +36,9 @@ module Gitlab ...@@ -47,7 +36,9 @@ module Gitlab
return return
end end
run_indexer!(to_sha) repository.__elasticsearch__.elastic_writing_targets.each do |target|
run_indexer!(to_sha, target)
end
update_index_status(to_sha) update_index_status(to_sha)
true true
...@@ -63,9 +54,19 @@ module Gitlab ...@@ -63,9 +54,19 @@ module Gitlab
wiki? ? project.wiki.repository : project.repository wiki? ? project.wiki.repository : project.repository
end end
def run_indexer!(to_sha) def run_indexer!(to_sha, target)
# We accept any form of settings, including string and array
# This is why JSON is needed
vars = {
'RAILS_ENV' => Rails.env,
'ELASTIC_CONNECTION_INFO' => elasticsearch_config(target),
'GITALY_CONNECTION_INFO' => gitaly_connection_info,
'FROM_SHA' => from_sha,
'TO_SHA' => to_sha
}
if index_status && !repository_contains_last_indexed_commit? if index_status && !repository_contains_last_indexed_commit?
repository.delete_index_for_commits_and_blobs(wiki: wiki?) target.delete_index_for_commits_and_blobs(wiki: wiki?)
end end
path_to_indexer = Gitlab.config.elasticsearch.indexer_path path_to_indexer = Gitlab.config.elasticsearch.indexer_path
...@@ -77,8 +78,6 @@ module Gitlab ...@@ -77,8 +78,6 @@ module Gitlab
[path_to_indexer, project.id.to_s, repository_path] [path_to_indexer, project.id.to_s, repository_path]
end end
vars = @vars.merge('FROM_SHA' => from_sha, 'TO_SHA' => to_sha)
output, status = Gitlab::Popen.popen(command, nil, vars) output, status = Gitlab::Popen.popen(command, nil, vars)
raise Error, output unless status&.zero? raise Error, output unless status&.zero?
...@@ -106,6 +105,18 @@ module Gitlab ...@@ -106,6 +105,18 @@ module Gitlab
"#{repository.disk_path}.git" "#{repository.disk_path}.git"
end end
def elasticsearch_config(target)
Gitlab::CurrentSettings.elasticsearch_config.merge(
index_name: target.index_name
).to_json
end
def gitaly_connection_info
{
storage: project.repository_storage
}.merge(Gitlab::GitalyClient.connection_data(project.repository_storage)).to_json
end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def update_index_status(to_sha) def update_index_status(to_sha)
head_commit = repository.try(:commit) head_commit = repository.try(:commit)
......
...@@ -50,7 +50,7 @@ module Gitlab ...@@ -50,7 +50,7 @@ module Gitlab
else else
# We use elastic for default branch only # We use elastic for default branch only
if root_ref? if root_ref?
project.repository.search( project.repository.elastic_search(
query, query,
type: :blob, type: :blob,
options: { highlight: true } options: { highlight: true }
...@@ -67,7 +67,7 @@ module Gitlab ...@@ -67,7 +67,7 @@ module Gitlab
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :read_wiki, project) return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :read_wiki, project)
if project.wiki_enabled? && !project.wiki.empty? && query.present? if project.wiki_enabled? && !project.wiki.empty? && query.present?
project.wiki.search( project.wiki.elastic_search(
query, query,
type: :wiki_blob, type: :wiki_blob,
options: { highlight: true } options: { highlight: true }
......
...@@ -230,7 +230,7 @@ module Gitlab ...@@ -230,7 +230,7 @@ module Gitlab
additional_filter: repository_filter additional_filter: repository_filter
} }
Repository.search( Repository.elastic_search(
query, query,
type: :blob, type: :blob,
options: opt.merge({ highlight: true }) options: opt.merge({ highlight: true })
...@@ -246,7 +246,7 @@ module Gitlab ...@@ -246,7 +246,7 @@ module Gitlab
additional_filter: wiki_filter additional_filter: wiki_filter
} }
ProjectWiki.search( ProjectWiki.elastic_search(
query, query,
type: :wiki_blob, type: :wiki_blob,
options: opt.merge({ highlight: true }) options: opt.merge({ highlight: true })
......
...@@ -23,7 +23,7 @@ describe 'Repository index', :elastic do ...@@ -23,7 +23,7 @@ describe 'Repository index', :elastic do
end end
def indexed_file_paths_for(term) def indexed_file_paths_for(term)
blobs = Repository.search(term, type: :blob)[:blobs][:results].response blobs = Repository.elastic_search(term, type: :blob)[:blobs][:results].response
blobs.map do |blob| blobs.map do |blob|
blob['_source']['blob']['path'] blob['_source']['blob']['path']
end end
......
...@@ -65,7 +65,7 @@ describe SearchHelper do ...@@ -65,7 +65,7 @@ describe SearchHelper do
project.repository.index_commits_and_blobs project.repository.index_commits_and_blobs
Gitlab::Elastic::Helper.refresh_index Gitlab::Elastic::Helper.refresh_index
result = project.repository.search( result = project.repository.elastic_search(
'def popen', 'def popen',
type: :blob, type: :blob,
options: { highlight: true } options: { highlight: true }
...@@ -89,7 +89,7 @@ describe SearchHelper do ...@@ -89,7 +89,7 @@ describe SearchHelper do
end end
def es_blob_search def es_blob_search
Repository.search( Repository.elastic_search(
'def popen', 'def popen',
type: :blob, type: :blob,
options: { highlight: true } options: { highlight: true }
......
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::Latest::GitInstanceProxy do
let(:project) { create(:project, :repository) }
let(:included_class) { Elastic::Latest::RepositoryInstanceProxy }
subject { included_class.new(project.repository) }
describe '.methods_for_all_write_targets' do
it 'contains extra method' do
expect(included_class.methods_for_all_write_targets).to contain_exactly(
*Elastic::Latest::ApplicationInstanceProxy.methods_for_all_write_targets,
:delete_index_for_commits_and_blobs
)
end
end
describe '#es_parent' do
it 'contains project id' do
expect(subject.es_parent).to eq("project_#{project.id}")
end
end
describe '#elastic_search' do
let(:params) do
{
type: :fake_type,
page: 2,
per: 30,
options: { foo: :bar }
}
end
it 'provides repository_id if not provided' do
expected_params = params.deep_dup
expected_params[:options][:repository_id] = project.id
expect(subject.class).to receive(:elastic_search).with('foo', expected_params)
subject.elastic_search('foo', params)
end
it 'uses provided repository_id' do
params[:options][:repository_id] = 42
expect(subject.class).to receive(:elastic_search).with('foo', params)
subject.elastic_search('foo', params)
end
end
describe '#delete_index_for_commits_and_blobs' do
let(:write_targets) { [double(:write_target_1), double(:write_target_2)] }
let(:read_target) { double(:read_target) }
before do
project.repository.__elasticsearch__.tap do |proxy|
allow(proxy).to receive(:elastic_writing_targets).and_return(write_targets)
allow(proxy).to receive(:elastic_reading_target).and_return(read_target)
end
end
it 'is forwarded to all write targets' do
expect(read_target).not_to receive(:delete_index_for_commits_and_blobs)
expect(write_targets).to all(
receive(:delete_index_for_commits_and_blobs).and_return({ '_shards' => {} })
)
project.repository.delete_index_for_commits_and_blobs
end
end
end
...@@ -12,6 +12,20 @@ describe Elastic::MultiVersionClassProxy do ...@@ -12,6 +12,20 @@ describe Elastic::MultiVersionClassProxy do
expect(result).to be_a(Elastic::V12p1::SnippetClassProxy) expect(result).to be_a(Elastic::V12p1::SnippetClassProxy)
expect(result.target).to eq(ProjectSnippet) expect(result.target).to eq(ProjectSnippet)
end end
context 'repository' do
it 'returns class proxy in specified version' do
repository_proxy = described_class.new(Repository)
repository_result = repository_proxy.version('V12p1')
wiki_proxy = described_class.new(ProjectWiki)
wiki_result = wiki_proxy.version('V12p1')
expect(repository_result).to be_a(Elastic::V12p1::RepositoryClassProxy)
expect(repository_result.target).to eq(Repository)
expect(wiki_result).to be_a(Elastic::V12p1::ProjectWikiClassProxy)
expect(wiki_result.target).to eq(ProjectWiki)
end
end
end end
describe 'method forwarding' do describe 'method forwarding' do
......
...@@ -14,6 +14,24 @@ describe Elastic::MultiVersionInstanceProxy do ...@@ -14,6 +14,24 @@ describe Elastic::MultiVersionInstanceProxy do
expect(result).to be_a(Elastic::V12p1::SnippetInstanceProxy) expect(result).to be_a(Elastic::V12p1::SnippetInstanceProxy)
expect(result.target).to eq(snippet) expect(result.target).to eq(snippet)
end end
context 'repository' do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:wiki) { project.wiki }
it 'returns instance proxy in specified version' do
repository_proxy = described_class.new(repository)
repository_result = repository_proxy.version('V12p1')
wiki_proxy = described_class.new(wiki)
wiki_result = wiki_proxy.version('V12p1')
expect(repository_result).to be_a(Elastic::V12p1::RepositoryInstanceProxy)
expect(repository_result.target).to eq(repository)
expect(wiki_result).to be_a(Elastic::V12p1::ProjectWikiInstanceProxy)
expect(wiki_result.target).to eq(wiki)
end
end
end end
describe 'method forwarding' do describe 'method forwarding' do
......
# frozen_string_literal: true
require 'spec_helper'
describe GemExtensions::Elasticsearch::Model::Indexing::InstanceMethods do
describe '#index_document' do
let(:project) { Project.new(id: 1) }
it 'overrides _id with type being prepended' do
proxy = Elastic::Latest::ProjectInstanceProxy.new(project)
expect(proxy.client).to receive(:index).with(
index: 'gitlab-test',
type: 'doc',
id: 'project_1',
body: proxy.as_indexed_json
)
proxy.index_document
end
end
end
...@@ -51,7 +51,7 @@ describe Gitlab::Elastic::Indexer do ...@@ -51,7 +51,7 @@ describe Gitlab::Elastic::Indexer do
], ],
nil, nil,
hash_including( hash_including(
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json, 'ELASTIC_CONNECTION_INFO' => elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env, 'RAILS_ENV' => Rails.env,
'FROM_SHA' => expected_from_sha, 'FROM_SHA' => expected_from_sha,
'TO_SHA' => nil 'TO_SHA' => nil
...@@ -80,7 +80,7 @@ describe Gitlab::Elastic::Indexer do ...@@ -80,7 +80,7 @@ describe Gitlab::Elastic::Indexer do
end end
def indexed_wiki_paths_for(term) def indexed_wiki_paths_for(term)
blobs = ProjectWiki.search( blobs = ProjectWiki.elastic_search(
term, term,
type: :wiki_blob type: :wiki_blob
)[:wiki_blobs][:results].response )[:wiki_blobs][:results].response
...@@ -141,7 +141,7 @@ describe Gitlab::Elastic::Indexer do ...@@ -141,7 +141,7 @@ describe Gitlab::Elastic::Indexer do
nil, nil,
hash_including( hash_including(
'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json, 'GITALY_CONNECTION_INFO' => gitaly_connection_data.to_json,
'ELASTIC_CONNECTION_INFO' => Gitlab::CurrentSettings.elasticsearch_config.to_json, 'ELASTIC_CONNECTION_INFO' => elasticsearch_config.to_json,
'RAILS_ENV' => Rails.env, 'RAILS_ENV' => Rails.env,
'FROM_SHA' => expected_from_sha, 'FROM_SHA' => expected_from_sha,
'TO_SHA' => to_sha 'TO_SHA' => to_sha
...@@ -213,7 +213,7 @@ describe Gitlab::Elastic::Indexer do ...@@ -213,7 +213,7 @@ describe Gitlab::Elastic::Indexer do
end end
def indexed_file_paths_for(term) def indexed_file_paths_for(term)
blobs = Repository.search( blobs = Repository.elastic_search(
term, term,
type: :blob type: :blob
)[:blobs][:results].response )[:blobs][:results].response
...@@ -280,4 +280,10 @@ describe Gitlab::Elastic::Indexer do ...@@ -280,4 +280,10 @@ describe Gitlab::Elastic::Indexer do
expect(status.indexed_at).not_to be_nil expect(status.indexed_at).not_to be_nil
expect(status.last_commit).to eq(sha) expect(status.last_commit).to eq(sha)
end end
def elasticsearch_config
Gitlab::CurrentSettings.elasticsearch_config.merge(
index_name: 'gitlab-test'
)
end
end end
...@@ -18,8 +18,8 @@ describe ProjectWiki, :elastic do ...@@ -18,8 +18,8 @@ describe ProjectWiki, :elastic do
end end
it "searches wiki page" do it "searches wiki page" do
expect(project.wiki.search('term1', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1) expect(project.wiki.elastic_search('term1', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
expect(project.wiki.search('term1 | term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(2) expect(project.wiki.elastic_search('term1 | term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(2)
end end
it 'indexes' do it 'indexes' do
...@@ -29,7 +29,7 @@ describe ProjectWiki, :elastic do ...@@ -29,7 +29,7 @@ describe ProjectWiki, :elastic do
end end
it 'can delete wiki pages' do it 'can delete wiki pages' do
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1) expect(project.wiki.elastic_search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(1)
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
project.wiki.find_page('omega_page').delete project.wiki.find_page('omega_page').delete
...@@ -44,6 +44,6 @@ describe ProjectWiki, :elastic do ...@@ -44,6 +44,6 @@ describe ProjectWiki, :elastic do
Gitlab::Elastic::Helper.refresh_index Gitlab::Elastic::Helper.refresh_index
end end
expect(project.wiki.search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(0) expect(project.wiki.elastic_search('term2', type: :wiki_blob)[:wiki_blobs][:total_count]).to eq(0)
end end
end end
...@@ -19,9 +19,9 @@ describe Repository, :elastic do ...@@ -19,9 +19,9 @@ describe Repository, :elastic do
project = create :project, :repository project = create :project, :repository
index!(project) index!(project)
expect(project.repository.search('def popen')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('def popen')[:blobs][:total_count]).to eq(1)
expect(project.repository.search('def | popen')[:blobs][:total_count] > 1).to be_truthy expect(project.repository.elastic_search('def | popen')[:blobs][:total_count] > 1).to be_truthy
expect(project.repository.search('initial')[:commits][:total_count]).to eq(1) expect(project.repository.elastic_search('initial')[:commits][:total_count]).to eq(1)
end end
it 'can filter blobs' do it 'can filter blobs' do
...@@ -29,20 +29,20 @@ describe Repository, :elastic do ...@@ -29,20 +29,20 @@ describe Repository, :elastic do
index!(project) index!(project)
# Finds custom-highlighting/test.gitlab-custom # Finds custom-highlighting/test.gitlab-custom
expect(project.repository.search('def | popen filename:test')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('def | popen filename:test')[:blobs][:total_count]).to eq(1)
# Should not find anything, since filename doesn't match on path # Should not find anything, since filename doesn't match on path
expect(project.repository.search('def | popen filename:files')[:blobs][:total_count]).to eq(0) expect(project.repository.elastic_search('def | popen filename:files')[:blobs][:total_count]).to eq(0)
# Finds files/ruby/popen.rb, files/markdown/ruby-style-guide.md, files/ruby/regex.rb, files/ruby/version_info.rb # Finds files/ruby/popen.rb, files/markdown/ruby-style-guide.md, files/ruby/regex.rb, files/ruby/version_info.rb
expect(project.repository.search('def | popen path:ruby')[:blobs][:total_count]).to eq(4) expect(project.repository.elastic_search('def | popen path:ruby')[:blobs][:total_count]).to eq(4)
# Finds files/markdown/ruby-style-guide.md # Finds files/markdown/ruby-style-guide.md
expect(project.repository.search('def | popen extension:md')[:blobs][:total_count]).to eq(1) expect(project.repository.elastic_search('def | popen extension:md')[:blobs][:total_count]).to eq(1)
end end
def search_and_check!(on, query, type:, per: 1000) def search_and_check!(on, query, type:, per: 1000)
results = on.search(query, type: type, per: per)["#{type}s".to_sym][:results] results = on.elastic_search(query, type: type, per: per)["#{type}s".to_sym][:results]
blobs, commits = results.partition { |result| result['_source']['blob'].present? } blobs, commits = results.partition { |result| result['_source']['blob'].present? }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment