Commit 0ac6ffdd authored by Andreas Brandl's avatar Andreas Brandl

Merge branch 'ahegyi-make-hierarchcy-cte-distinct' into 'master'

Add DISTINCT to the CTE queries for hierarchies [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!56509
parents 57aa7792 a7038df0
......@@ -15,8 +15,7 @@ module Namespaces
# Returns all ancestors, self, and descendants of the current namespace.
def self_and_hierarchy
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.all_objects
end
......@@ -24,38 +23,38 @@ module Namespaces
def ancestors
return self.class.none unless parent_id
Gitlab::ObjectHierarchy
.new(self.class.where(id: parent_id))
object_hierarchy(self.class.where(id: parent_id))
.base_and_ancestors
end
# returns all ancestors upto but excluding the given namespace
# when no namespace is given, all ancestors upto the top are returned
def ancestors_upto(top = nil, hierarchy_order: nil)
Gitlab::ObjectHierarchy.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.ancestors(upto: top, hierarchy_order: hierarchy_order)
end
def self_and_ancestors(hierarchy_order: nil)
return self.class.where(id: id) unless parent_id
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.base_and_ancestors(hierarchy_order: hierarchy_order)
end
# Returns all the descendants of the current namespace.
def descendants
Gitlab::ObjectHierarchy
.new(self.class.where(parent_id: id))
object_hierarchy(self.class.where(parent_id: id))
.base_and_descendants
end
def self_and_descendants
Gitlab::ObjectHierarchy
.new(self.class.where(id: id))
object_hierarchy(self.class.where(id: id))
.base_and_descendants
end
def object_hierarchy(ancestors_base)
Gitlab::ObjectHierarchy.new(ancestors_base, options: { use_distinct: Feature.enabled?(:use_distinct_in_object_hierarchy, self) })
end
end
end
end
---
name: use_distinct_in_object_hierarchy
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/56509
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/324644
milestone: '13.10'
type: development
group: group::optimize
default_enabled: false
......@@ -60,12 +60,27 @@ module Gitlab
# ancestor to most nested object respectively. This uses a `depth` column
# where `1` is defined as the depth for the base and increment as we go up
# each parent.
#
# Note: By default the order is breadth-first
# rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors(upto: nil, hierarchy_order: nil)
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all)
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
read_only(recursive_query)
if use_distinct?
expose_depth = hierarchy_order.present?
hierarchy_order ||= :asc
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all).distinct
# if hierarchy_order is given, the calculated `depth` should be present in SELECT
if expose_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: hierarchy_order))
else
read_only(remove_depth_and_maintain_order(recursive_query, hierarchy_order: hierarchy_order))
end
else
recursive_query = base_and_ancestors_cte(upto, hierarchy_order).apply_to(model.all)
recursive_query = recursive_query.order(depth: hierarchy_order) if hierarchy_order
read_only(recursive_query)
end
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -74,9 +89,22 @@ module Gitlab
#
# When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects
# and incremented as we go down the descendant tree
# rubocop: disable CodeReuse/ActiveRecord
def base_and_descendants(with_depth: false)
read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all))
if use_distinct?
# Always calculate `depth`, remove it later if with_depth is false
base_cte = base_and_descendants_cte(with_depth: true).apply_to(model.all).distinct
if with_depth
read_only(model.from(Arel::Nodes::As.new(recursive_query.arel, objects_table)).order(depth: :asc))
else
read_only(remove_depth_and_maintain_order(base_cte, hierarchy_order: :asc))
end
else
read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(model.all))
end
end
# rubocop: enable CodeReuse/ActiveRecord
# Returns a relation that includes the base objects, their ancestors,
# and the descendants of the base objects.
......@@ -108,13 +136,21 @@ module Gitlab
ancestors_table = ancestors.alias_to(objects_table)
descendants_table = descendants.alias_to(objects_table)
ancestors_scope = model.unscoped.from(ancestors_table)
descendants_scope = model.unscoped.from(descendants_table)
if use_distinct?
ancestors_scope = ancestors_scope.distinct
descendants_scope = descendants_scope.distinct
end
relation = model
.unscoped
.with
.recursive(ancestors.to_arel, descendants.to_arel)
.from_union([
model.unscoped.from(ancestors_table),
model.unscoped.from(descendants_table)
ancestors_scope,
descendants_scope
])
read_only(relation)
......@@ -123,12 +159,28 @@ module Gitlab
private
# Use distinct on the Namespace queries to avoid bad planner behavior in PG11.
def use_distinct?
(model <= Namespace) && options[:use_distinct]
end
# Remove the extra `depth` field using an INNER JOIN to avoid breaking UNION queries
# and ordering the rows based on the `depth` column to maintain the row order.
#
# rubocop: disable CodeReuse/ActiveRecord
def remove_depth_and_maintain_order(relation, hierarchy_order: :asc)
joined_relation = model.joins("INNER JOIN (#{relation.select(:id, :depth).to_sql}) namespaces_join_table on namespaces_join_table.id = #{model.table_name}.id").order("namespaces_join_table.depth" => hierarchy_order)
model.from(Arel::Nodes::As.new(joined_relation.arel, objects_table))
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil)
cte = SQL::RecursiveCTE.new(:base_and_ancestors)
base_query = ancestors_base.except(:order)
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if hierarchy_order
cte << base_query
......@@ -161,7 +213,7 @@ module Gitlab
cte = SQL::RecursiveCTE.new(:base_and_descendants)
base_query = descendants_base.except(:order)
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", objects_table[Arel.star]) if with_depth
cte << base_query
......
......@@ -7,178 +7,206 @@ RSpec.describe Gitlab::ObjectHierarchy do
let!(:child1) { create(:group, parent: parent) }
let!(:child2) { create(:group, parent: child1) }
describe '#base_and_ancestors' do
let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors
end
it 'includes the base rows' do
expect(relation).to include(child2)
end
shared_context 'Gitlab::ObjectHierarchy test cases' do
describe '#base_and_ancestors' do
let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors
end
it 'includes all of the ancestors' do
expect(relation).to include(parent, child1)
end
it 'includes the base rows' do
expect(relation).to include(child2)
end
it 'can find ancestors upto a certain level' do
relation = described_class.new(Group.where(id: child2)).base_and_ancestors(upto: child1)
it 'includes all of the ancestors' do
expect(relation).to include(parent, child1)
end
expect(relation).to contain_exactly(child2)
end
it 'can find ancestors upto a certain level' do
relation = described_class.new(Group.where(id: child2)).base_and_ancestors(upto: child1)
it 'uses ancestors_base #initialize argument' do
relation = described_class.new(Group.where(id: child2.id), Group.none).base_and_ancestors
expect(relation).to contain_exactly(child2)
end
expect(relation).to include(parent, child1, child2)
end
it 'uses ancestors_base #initialize argument' do
relation = described_class.new(Group.where(id: child2.id), Group.none).base_and_ancestors
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
expect(relation).to include(parent, child1, child2)
end
describe 'hierarchy_order option' do
let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors(hierarchy_order: hierarchy_order)
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
context ':asc' do
let(:hierarchy_order) { :asc }
describe 'hierarchy_order option' do
let(:relation) do
described_class.new(Group.where(id: child2.id)).base_and_ancestors(hierarchy_order: hierarchy_order)
end
context ':asc' do
let(:hierarchy_order) { :asc }
it 'orders by child to parent' do
expect(relation).to eq([child2, child1, parent])
it 'orders by child to parent' do
expect(relation).to eq([child2, child1, parent])
end
end
end
context ':desc' do
let(:hierarchy_order) { :desc }
context ':desc' do
let(:hierarchy_order) { :desc }
it 'orders by parent to child' do
expect(relation).to eq([parent, child1, child2])
it 'orders by parent to child' do
expect(relation).to eq([parent, child1, child2])
end
end
end
end
end
describe '#base_and_descendants' do
let(:relation) do
described_class.new(Group.where(id: parent.id)).base_and_descendants
end
it 'includes the base rows' do
expect(relation).to include(parent)
end
describe '#base_and_descendants' do
let(:relation) do
described_class.new(Group.where(id: parent.id)).base_and_descendants
end
it 'includes all the descendants' do
expect(relation).to include(child1, child2)
end
it 'includes the base rows' do
expect(relation).to include(parent)
end
it 'uses descendants_base #initialize argument' do
relation = described_class.new(Group.none, Group.where(id: parent.id)).base_and_descendants
it 'includes all the descendants' do
expect(relation).to include(child1, child2)
end
expect(relation).to include(parent, child1, child2)
end
it 'uses descendants_base #initialize argument' do
relation = described_class.new(Group.none, Group.where(id: parent.id)).base_and_descendants
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
expect(relation).to include(parent, child1, child2)
end
context 'when with_depth is true' do
let(:relation) do
described_class.new(Group.where(id: parent.id)).base_and_descendants(with_depth: true)
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
it 'includes depth in the results' do
object_depths = {
parent.id => 1,
child1.id => 2,
child2.id => 3
}
context 'when with_depth is true' do
let(:relation) do
described_class.new(Group.where(id: parent.id)).base_and_descendants(with_depth: true)
end
it 'includes depth in the results' do
object_depths = {
parent.id => 1,
child1.id => 2,
child2.id => 3
}
relation.each do |object|
expect(object.depth).to eq(object_depths[object.id])
relation.each do |object|
expect(object.depth).to eq(object_depths[object.id])
end
end
end
end
end
describe '#descendants' do
it 'includes only the descendants' do
relation = described_class.new(Group.where(id: parent)).descendants
describe '#descendants' do
it 'includes only the descendants' do
relation = described_class.new(Group.where(id: parent)).descendants
expect(relation).to contain_exactly(child1, child2)
expect(relation).to contain_exactly(child1, child2)
end
end
end
describe '#max_descendants_depth' do
subject { described_class.new(base_relation).max_descendants_depth }
describe '#max_descendants_depth' do
subject { described_class.new(base_relation).max_descendants_depth }
context 'when base relation is empty' do
let(:base_relation) { Group.where(id: nil) }
context 'when base relation is empty' do
let(:base_relation) { Group.where(id: nil) }
it { expect(subject).to be_nil }
end
it { expect(subject).to be_nil }
end
context 'when base has no children' do
let(:base_relation) { Group.where(id: child2) }
context 'when base has no children' do
let(:base_relation) { Group.where(id: child2) }
it { expect(subject).to eq(1) }
end
it { expect(subject).to eq(1) }
end
context 'when base has grandchildren' do
let(:base_relation) { Group.where(id: parent) }
context 'when base has grandchildren' do
let(:base_relation) { Group.where(id: parent) }
it { expect(subject).to eq(3) }
it { expect(subject).to eq(3) }
end
end
end
describe '#ancestors' do
it 'includes only the ancestors' do
relation = described_class.new(Group.where(id: child2)).ancestors
describe '#ancestors' do
it 'includes only the ancestors' do
relation = described_class.new(Group.where(id: child2)).ancestors
expect(relation).to contain_exactly(child1, parent)
end
expect(relation).to contain_exactly(child1, parent)
end
it 'can find ancestors upto a certain level' do
relation = described_class.new(Group.where(id: child2)).ancestors(upto: child1)
it 'can find ancestors upto a certain level' do
relation = described_class.new(Group.where(id: child2)).ancestors(upto: child1)
expect(relation).to be_empty
expect(relation).to be_empty
end
end
end
describe '#all_objects' do
let(:relation) do
described_class.new(Group.where(id: child1.id)).all_objects
end
describe '#all_objects' do
let(:relation) do
described_class.new(Group.where(id: child1.id)).all_objects
end
it 'includes the base rows' do
expect(relation).to include(child1)
end
it 'includes the base rows' do
expect(relation).to include(child1)
end
it 'includes the ancestors' do
expect(relation).to include(parent)
end
it 'includes the descendants' do
expect(relation).to include(child2)
end
it 'uses ancestors_base #initialize argument for ancestors' do
relation = described_class.new(Group.where(id: child1.id), Group.where(id: non_existing_record_id)).all_objects
expect(relation).to include(parent)
end
it 'includes the ancestors' do
expect(relation).to include(parent)
it 'uses descendants_base #initialize argument for descendants' do
relation = described_class.new(Group.where(id: non_existing_record_id), Group.where(id: child1.id)).all_objects
expect(relation).to include(child2)
end
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
end
end
end
it 'includes the descendants' do
expect(relation).to include(child2)
context 'when the use_distinct_in_object_hierarchy feature flag is enabled' do
before do
stub_feature_flags(use_distinct_in_object_hierarchy: true)
end
it 'uses ancestors_base #initialize argument for ancestors' do
relation = described_class.new(Group.where(id: child1.id), Group.where(id: non_existing_record_id)).all_objects
it_behaves_like 'Gitlab::ObjectHierarchy test cases'
expect(relation).to include(parent)
it 'calls DISTINCT' do
expect(parent.self_and_descendants.to_sql).to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).to include("DISTINCT")
end
end
it 'uses descendants_base #initialize argument for descendants' do
relation = described_class.new(Group.where(id: non_existing_record_id), Group.where(id: child1.id)).all_objects
expect(relation).to include(child2)
context 'when the use_distinct_in_object_hierarchy feature flag is disabled' do
before do
stub_feature_flags(use_distinct_in_object_hierarchy: false)
end
it 'does not allow the use of #update_all' do
expect { relation.update_all(share_with_group_lock: false) }
.to raise_error(ActiveRecord::ReadOnlyRecord)
it_behaves_like 'Gitlab::ObjectHierarchy test cases'
it 'does not call DISTINCT' do
expect(parent.self_and_descendants.to_sql).not_to include("DISTINCT")
expect(child2.self_and_ancestors.to_sql).not_to include("DISTINCT")
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment