Optimize query to return outdated projects

parent 8f5c6585
...@@ -2134,6 +2134,7 @@ ActiveRecord::Schema.define(version: 20180612175636) do ...@@ -2134,6 +2134,7 @@ ActiveRecord::Schema.define(version: 20180612175636) do
add_index "projects", ["created_at"], name: "index_projects_on_created_at", using: :btree add_index "projects", ["created_at"], name: "index_projects_on_created_at", using: :btree
add_index "projects", ["creator_id"], name: "index_projects_on_creator_id", using: :btree add_index "projects", ["creator_id"], name: "index_projects_on_creator_id", using: :btree
add_index "projects", ["description"], name: "index_projects_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"} add_index "projects", ["description"], name: "index_projects_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"}
add_index "projects", ["id", "repository_storage", "last_repository_updated_at"], name: "idx_projects_on_repository_storage_last_repository_updated_at", using: :btree
add_index "projects", ["id"], name: "index_projects_on_id_partial_for_visibility", unique: true, where: "(visibility_level = ANY (ARRAY[10, 20]))", using: :btree add_index "projects", ["id"], name: "index_projects_on_id_partial_for_visibility", unique: true, where: "(visibility_level = ANY (ARRAY[10, 20]))", using: :btree
add_index "projects", ["id"], name: "index_projects_on_mirror_and_mirror_trigger_builds_both_true", where: "((mirror IS TRUE) AND (mirror_trigger_builds IS TRUE))", using: :btree add_index "projects", ["id"], name: "index_projects_on_mirror_and_mirror_trigger_builds_both_true", where: "((mirror IS TRUE) AND (mirror_trigger_builds IS TRUE))", using: :btree
add_index "projects", ["last_activity_at"], name: "index_projects_on_last_activity_at", using: :btree add_index "projects", ["last_activity_at"], name: "index_projects_on_last_activity_at", using: :btree
......
module Geo module Geo
class RepositoryVerificationFinder class RepositoryVerificationFinder
attr_reader :shard_name
def initialize(shard_name: nil)
@shard_name = shard_name
end
def find_outdated_projects(batch_size:) def find_outdated_projects(batch_size:)
Project.select(:id) cte_definition =
.with_route projects_table
.joins(:repository_state) .join(repository_state_table).on(project_id_matcher)
.where(repository_outdated.or(wiki_outdated)) .project(projects_table[:id], projects_table[:last_repository_updated_at])
.order(last_repository_updated_at_asc) .where(repository_outdated.or(wiki_outdated))
.limit(batch_size) .take(batch_size)
if shard_name.present?
cte_definition = shard_restriction(cte_definition)
end
cte_table = Arel::Table.new(:outdated_projects)
composed_cte = Arel::Nodes::As.new(cte_table, cte_definition)
alias_to = Arel::Nodes::As.new(cte_table, projects_table)
Project.with(composed_cte)
.from(alias_to)
.order(last_repository_updated_at_asc)
end end
def find_unverified_projects(batch_size:) def find_unverified_projects(batch_size:)
Project.select(:id) relation =
.with_route Project.select(:id)
.joins(left_join_repository_state) .with_route
.where(repository_never_verified) .joins(left_join_repository_state)
.limit(batch_size) .where(repository_never_verified)
.limit(batch_size)
if shard_name.present?
relation = shard_restriction(relation)
end
relation
end end
def count_verified_repositories def count_verified_repositories
...@@ -43,10 +68,14 @@ module Geo ...@@ -43,10 +68,14 @@ module Geo
ProjectRepositoryState.arel_table ProjectRepositoryState.arel_table
end end
def project_id_matcher
projects_table[:id].eq(repository_state_table[:project_id])
end
def left_join_repository_state def left_join_repository_state
projects_table projects_table
.join(repository_state_table, Arel::Nodes::OuterJoin) .join(repository_state_table, Arel::Nodes::OuterJoin)
.on(projects_table[:id].eq(repository_state_table[:project_id])) .on(project_id_matcher)
.join_sources .join_sources
end end
...@@ -67,5 +96,9 @@ module Geo ...@@ -67,5 +96,9 @@ module Geo
def last_repository_updated_at_asc def last_repository_updated_at_asc
Gitlab::Database.nulls_last_order('projects.last_repository_updated_at', 'ASC') Gitlab::Database.nulls_last_order('projects.last_repository_updated_at', 'ASC')
end end
def shard_restriction(relation)
relation.where(projects_table[:repository_storage].eq(shard_name))
end
end end
end end
...@@ -36,7 +36,7 @@ module Geo ...@@ -36,7 +36,7 @@ module Geo
end end
def finder def finder
@finder ||= Geo::RepositoryVerificationFinder.new @finder ||= Geo::RepositoryVerificationFinder.new(shard_name: shard_name)
end end
def load_pending_resources def load_pending_resources
...@@ -51,17 +51,11 @@ module Geo ...@@ -51,17 +51,11 @@ module Geo
end end
def find_unverified_project_ids(batch_size:) def find_unverified_project_ids(batch_size:)
shard_restriction(finder.find_unverified_projects(batch_size: batch_size)) finder.find_unverified_projects(batch_size: batch_size).pluck(:id)
.pluck(:id)
end end
def find_outdated_project_ids(batch_size:) def find_outdated_project_ids(batch_size:)
shard_restriction(finder.find_outdated_projects(batch_size: batch_size)) finder.find_outdated_projects(batch_size: batch_size).pluck(:id)
.pluck(:id)
end
def shard_restriction(relation)
relation.where(repository_storage: shard_name)
end end
end end
end end
......
class AddIndexToProjectsOnRepositoryStorageLastRepositoryUpdatedAt < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'idx_projects_on_repository_storage_last_repository_updated_at'.freeze
disable_ddl_transaction!
def up
add_concurrent_index(
:projects,
[:id, :repository_storage, :last_repository_updated_at],
name: INDEX_NAME
)
end
def down
remove_concurrent_index_by_name(:projects, INDEX_NAME)
end
end
...@@ -53,12 +53,41 @@ describe Geo::RepositoryVerificationFinder, :postgresql do ...@@ -53,12 +53,41 @@ describe Geo::RepositoryVerificationFinder, :postgresql do
expect(subject.find_outdated_projects(batch_size: 10)).to eq [less_active_project, project] expect(subject.find_outdated_projects(batch_size: 10)).to eq [less_active_project, project]
end end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
create(:repository_state, :repository_outdated, project: project)
create(:repository_state, :repository_outdated, project: project_other_shard)
expect(subject.find_outdated_projects(batch_size: 10))
.to match_array(project)
end
end
end end
describe '#find_unverified_projects' do describe '#find_unverified_projects' do
it 'returns projects that never have been verified' do it 'returns projects that never have been verified' do
create(:repository_state, :repository_outdated)
create(:repository_state, :wiki_outdated)
expect(subject.find_unverified_projects(batch_size: 10)) expect(subject.find_unverified_projects(batch_size: 10))
.to match_array(project) .to match_array(project)
end end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
expect(subject.find_unverified_projects(batch_size: 10))
.to match_array(project)
end
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment