Commit 0ac6ffdd authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'ahegyi-make-hierarchcy-cte-distinct' into 'master'

Add DISTINCT to the CTE queries for hierarchies [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!56509
parents 57aa7792 a7038df0
......@@ -15,8 +15,7 @@ module Namespaces
# Returns all ancestors, self, and descendants of the current namespace.
def self_and_hierarchy
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.all_objects
end
......@@ -24,38 +23,38 @@ module Namespaces
def ancestors
return self.class.none unless parent_id
Gitlab::ObjectHierarchy
.new(self.class.where(id: parent_id))
object_hierarchy(self.class.where(id: parent_id))
.base_and_ancestors
end
# returns all ancestors upto but excluding the given namespace
# when no namespace is given, all ancestors upto the top are returned
def ancestors_upto(top = nil, hierarchy_order: nil)
Gitlab::ObjectHierarchy.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.ancestors(upto: top, hierarchy_order: hierarchy_order)
end
def self_and_ancestors(hierarchy_order: nil)
return self.class.where(id: id) unless parent_id
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.base_and_ancestors(hierarchy_order: hierarchy_order)
end
# Returns all the descendants of the current namespace.
def descendants
Gitlab::ObjectHierarchy
.new(self.class.where(parent_id: id))
object_hierarchy(self.class.where(parent_id: id))
.base_and_descendants
end
def self_and_descendants
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.base_and_descendants
end
def object_hierarchy(ancestors_base)
Gitlab::ObjectHierarchy.new(ancestors_base, options: { use_distinct: Feature.enabled?(:use_distinct_in_object_hierarchy, self) })
end
end
end
end
---
name: use_distinct_in_object_hierarchy
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/56509
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/324644
milestone: '13.10'
type: development
group: group::optimize
default_enabled: false
......@@ -60,13 +60,28 @@ module Gitlab
# ancestor to most nested object respectively. This uses a `depth` column
# where `1` is defined as the depth for the base and increment as we go up
# each parent.
#
# Note: By default the order is breadth-first
# rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors(upto: nil, hierarchy_order: nil)
if use_distinct?
expose_depth = hierarchy_order.present?
hierarchy_order ||= :asc
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all).distinct
# if hierarchy_order is given, the calculated `depth` should be present in SELECT
if expose_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: hierarchy_order))
else
read_only(remove_depth_and_maintain_order(recursive_query, hierarchy_order: hierarchy_order))
end
else
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all)
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
read_only(recursive_query)
end
end
# rubocop: enable CodeReuse/ActiveRecord
# Returns a relation that includes the descendants_base set of objects
......@@ -74,9 +89,22 @@ module Gitlab
#
# When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects
# and incremented as we go down the descendant tree
# rubocop: disable CodeReuse/ActiveRecord
def base_and_descendants(with_depth: false)
if use_distinct?
# Always calculate `depth`, remove it later if with_depth is false
base_cte = base_and_descendants_cte(with_depth: true).apply_to(model.all).distinct
if with_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: :asc))
else
read_only(remove_depth_and_maintain_order(base_cte, hierarchy_order: :asc))
end
else
read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all))
end
end
# rubocop: enable CodeReuse/ActiveRecord
# Returns a relation that includes the base objects, their ancestors,
# and the descendants of the base objects.
......@@ -108,13 +136,21 @@ module Gitlab
ancestors_table = ancestors.alias_to(objects_table)
descendants_table = descendants.alias_to(objects_table)
ancestors_scope = model.unscoped.from(ancestors_table)
descendants_scope = model.unscoped.from(descendants_table)
if use_distinct?
ancestors_scope = ancestors_scope.distinct
descendants_scope = descendants_scope.distinct
end
relation = model
.unscoped
.with
.recursive(ancestors.to_arel, descendants.to_arel)
.from_union([
model.unscoped.from(ancestors_table),
model.unscoped.from(descendants_table)
ancestors_scope,
descendants_scope
])
read_only(relation)
......@@ -123,12 +159,28 @@ module Gitlab
private
# Use distinct on the Namespace queries to avoid bad planner behavior in PG11.
def use_distinct?
(model <= Namespace) && options[:use_distinct]
end
# Remove the extra `depth` field using an INNER JOIN to avoid breaking UNION queries
# and ordering the rows based on the `depth` column to maintain the row order.
#
# rubocop: disable CodeReuse/ActiveRecord
def remove_depth_and_maintain_order(relation, hierarchy_order: :asc)
joined_relation = model.joins("INNER JOIN (#{relation.select(:id, :depth).to_sql}) namespaces_join_table on namespaces_join_table.id = #{model.table_name}.id").order("namespaces_join_table.depth" => hierarchy_order)
model.from(Arel::Nodes::As.new(joined_relation.arel, objects_table))
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil)
cte = SQL::RecursiveCTE.new(:base_and_ancestors)
base_query = ancestors_base.except(:order)
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
cte << base_query
......@@ -161,7 +213,7 @@ module Gitlab
cte = SQL::RecursiveCTE.new(:base_and_descendants)
base_query = descendants_base.except(:order)
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
cte << base_query
......
......@@ -7,6 +7,7 @@ RSpec.describe Gitlab::ObjectHierarchy do
let!(:child1) { create(:group, parent: parent) }
let!(:child2) { create(:group, parent: child1) }
shared_context 'Gitlab::ObjectHierarchy test cases' do
describe '#base_and_ancestors' do
let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors
......@@ -181,4 +182,31 @@ RSpec.describe Gitlab::ObjectHierarchy do
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
end
end
context 'when the use_distinct_in_object_hierarchy feature flag is enabled' do
before do
stub_feature_flags(use_distinct_in_object_hierarchy: true)
end
it_behaves_like 'Gitlab::ObjectHierarchy test cases'
it 'calls DISTINCT' do
expect(parent.self_and_descendants.to_sql).to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).to include("DISTINCT")
end
end
context 'when the use_distinct_in_object_hierarchy feature flag is disabled' do
before do
stub_feature_flags(use_distinct_in_object_hierarchy: false)
end
it_behaves_like 'Gitlab::ObjectHierarchy test cases'
it 'does not call DISTINCT' do
expect(parent.self_and_descendants.to_sql).not_to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).not_to include("DISTINCT")
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment