Commit 8a72f5c4 authored by Yorick Peterse's avatar Yorick Peterse

Added FromUnion to easily select from a UNION

This commit adds the module `FromUnion`, which provides the class method
`from_union`. This simplifies the process of selecting data from the
result of a UNION, and reduces the likelihood of making mistakes. As a
result, instead of this:

    union = Gitlab::SQL::Union.new([foo, bar])

    Foo.from("(#{union.to_sql}) #{Foo.table_name}")

We can now write this instead:

    Foo.from_union([foo, bar])

This commit also includes some changes to make this new setup work
properly. For example, a bug in Rails 4
(https://github.com/rails/rails/issues/24193) would break the use of
`from("sub-query-here").includes(:relation)` in certain cases. There was
also a CI query which appeared to repeat a lot of conditions from an
outer query on an inner query, which isn't necessary.

Finally, we include a RuboCop cop to ensure developers use this new
module, instead of using Gitlab::SQL::Union directly.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/51307
parent 78b3eea7
......@@ -18,7 +18,7 @@ class MembersFinder
group_members = GroupMembersFinder.new(group).execute(include_descendants: include_descendants) # rubocop: disable CodeReuse/Finder
group_members = group_members.non_invite
union = Gitlab::SQL::Union.new([project_members, group_members], remove_duplicates: false)
union = Gitlab::SQL::Union.new([project_members, group_members], remove_duplicates: false) # rubocop: disable Gitlab/Union
sql = distinct_on(union)
......
......@@ -89,9 +89,7 @@ class SnippetsFinder < UnionFinder
# We use a UNION here instead of OR clauses since this results in better
# performance.
union = Gitlab::SQL::Union.new([authorized_projects.select('projects.id'), visible_projects.select('projects.id')])
Project.from("(#{union.to_sql}) AS #{Project.table_name}")
Project.from_union([authorized_projects, visible_projects])
end
# rubocop: enable CodeReuse/ActiveRecord
......
......@@ -164,16 +164,13 @@ class TodosFinder
# rubocop: disable CodeReuse/ActiveRecord
def by_group(items)
if group?
return items unless group?
groups = group.self_and_descendants
project_todos = items.where(project_id: Project.where(group: groups).select(:id))
group_todos = items.where(group_id: groups.select(:id))
union = Gitlab::SQL::Union.new([project_todos, group_todos])
items = Todo.from("(#{union.to_sql}) #{Todo.table_name}")
end
items
Todo.from_union([project_todos, group_todos])
end
# rubocop: enable CodeReuse/ActiveRecord
......
# frozen_string_literal: true
class UnionFinder
# rubocop: disable CodeReuse/ActiveRecord
def find_union(segments, klass)
if segments.length > 1
union = Gitlab::SQL::Union.new(segments.map { |s| s.select(:id) })
unless klass < FromUnion
raise TypeError, "#{klass.inspect} must include the FromUnion module"
end
klass.where("#{klass.table_name}.id IN (#{union.to_sql})")
if segments.length > 1
klass.from_union(segments)
else
segments.first
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
# frozen_string_literal: true
class Badge < ActiveRecord::Base
include FromUnion
# This structure sets the placeholders that the urls
# can have. This hash also sets which action to ask when
# the placeholder is found.
......
......@@ -7,6 +7,7 @@ module Ci
include IgnorableColumn
include RedisCacheable
include ChronicDurationAttribute
include FromUnion
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour
......@@ -57,18 +58,26 @@ module Ci
}
scope :owned_or_instance_wide, -> (project_id) do
union = Gitlab::SQL::Union.new(
[belonging_to_project(project_id), belonging_to_parent_group_of_project(project_id), instance_type],
from_union(
[
belonging_to_project(project_id),
belonging_to_parent_group_of_project(project_id),
instance_type
],
remove_duplicates: false
)
from("(#{union.to_sql}) ci_runners")
end
scope :assignable_for, ->(project) do
# FIXME: That `to_sql` is needed to workaround a weird Rails bug.
# Without that, placeholders would miss one and couldn't match.
#
# We use "unscoped" here so that any current Ci::Runner filters don't
# apply to the inner query, which is not necessary.
exclude_runners = unscoped { project.runners.select(:id) }.to_sql
where(locked: false)
.where.not("ci_runners.id IN (#{project.runners.select(:id).to_sql})")
.where.not("ci_runners.id IN (#{exclude_runners})")
.project_type
end
......
# frozen_string_literal: true
module FromUnion
extend ActiveSupport::Concern
class_methods do
# Produces a query that uses a FROM to select data using a UNION.
#
# Using a FROM for a UNION has in the past lead to better query plans. As
# such, we generally recommend this pattern instead of using a WHERE IN.
#
# Example:
# users = User.from_union([User.where(id: 1), User.where(id: 2)])
#
# This would produce the following SQL query:
#
# SELECT *
# FROM (
# SELECT *
# FROM users
# WHERE id = 1
#
# UNION
#
# SELECT *
# FROM users
# WHERE id = 2
# ) users;
#
# members - An Array of ActiveRecord::Relation objects to use in the UNION.
#
# remove_duplicates - A boolean indicating if duplicate entries should be
# removed. Defaults to true.
#
# alias_as - The alias to use for the sub query. Defaults to the name of the
# table of the current model.
# rubocop: disable Gitlab/Union
def from_union(members, remove_duplicates: true, alias_as: table_name)
union = Gitlab::SQL::Union
.new(members, remove_duplicates: remove_duplicates)
.to_sql
# This pattern is necessary as a bug in Rails 4 can cause the use of
# `from("string here").includes(:foo)` to break ActiveRecord. This is
# fixed in https://github.com/rails/rails/pull/25374, which is released as
# part of Rails 5.
from([Arel.sql("(#{union}) #{alias_as}")])
end
# rubocop: enable Gitlab/Union
end
end
......@@ -3,6 +3,7 @@
class Event < ActiveRecord::Base
include Sortable
include IgnorableColumn
include FromUnion
default_scope { reorder(nil) }
CREATED = 1
......
......@@ -304,14 +304,12 @@ class Group < Namespace
# 3. They belong to a sub-group or project in such sub-group
# 4. They belong to an ancestor group
def direct_and_indirect_users
union = Gitlab::SQL::Union.new([
User.from_union([
User
.where(id: direct_and_indirect_members.select(:user_id))
.reorder(nil),
project_users_with_descendants
])
User.from("(#{union.to_sql}) #{User.table_name}")
end
# Returns all users that are members of projects
......
......@@ -7,6 +7,7 @@ class Label < ActiveRecord::Base
include Gitlab::SQL::Pattern
include OptionallySearch
include Sortable
include FromUnion
# Represents a "No Label" state used for filtering Issues and Merge
# Requests that have no label assigned.
......
......@@ -14,6 +14,7 @@ class MergeRequest < ActiveRecord::Base
include Gitlab::Utils::StrongMemoize
include LabelEventable
include ReactiveCaching
include FromUnion
self.reactive_cache_key = ->(model) { [model.project.id, model.iid] }
self.reactive_cache_refresh_interval = 10.minutes
......@@ -237,11 +238,10 @@ class MergeRequest < ActiveRecord::Base
def self.in_projects(relation)
# unscoping unnecessary conditions that'll be applied
# when executing `where("merge_requests.id IN (#{union.to_sql})")`
source = unscoped.where(source_project_id: relation).select(:id)
target = unscoped.where(target_project_id: relation).select(:id)
union = Gitlab::SQL::Union.new([source, target])
source = unscoped.where(source_project_id: relation)
target = unscoped.where(target_project_id: relation)
where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
from_union([source, target])
end
# This is used after project import, to reset the IDs to the correct
......@@ -740,11 +740,8 @@ class MergeRequest < ActiveRecord::Base
# compared to using OR statements. We're using UNION ALL since the queries
# used won't produce any duplicates (e.g. a note for a commit can't also be
# a note for an MR).
union = Gitlab::SQL::Union
.new([notes, commit_notes], remove_duplicates: false)
.to_sql
Note.from("(#{union}) #{Note.table_name}")
Note
.from_union([notes, commit_notes], remove_duplicates: false)
.includes(:noteable)
end
......
......@@ -11,6 +11,7 @@ class Namespace < ActiveRecord::Base
include Gitlab::SQL::Pattern
include IgnorableColumn
include FeatureGate
include FromUnion
ignore_column :deleted_at
......
......@@ -17,6 +17,7 @@ class Note < ActiveRecord::Base
include Editable
include Gitlab::SQL::Pattern
include ThrottledTouch
include FromUnion
module SpecialRole
FIRST_TIME_CONTRIBUTOR = :first_time_contributor
......
......@@ -29,6 +29,7 @@ class Project < ActiveRecord::Base
include BatchDestroyDependentAssociations
include FeatureGate
include OptionallySearch
include FromUnion
extend Gitlab::Cache::RequestCache
extend Gitlab::ConfigHelper
......@@ -1493,8 +1494,7 @@ class Project < ActiveRecord::Base
end
def all_runners
union = Gitlab::SQL::Union.new([runners, group_runners, shared_runners])
Ci::Runner.from("(#{union.to_sql}) ci_runners")
Ci::Runner.from_union([runners, group_runners, shared_runners])
end
def active_runners
......@@ -2022,12 +2022,10 @@ class Project < ActiveRecord::Base
def badges
return project_badges unless group
group_badges_rel = GroupBadge.where(group: group.self_and_ancestors)
union = Gitlab::SQL::Union.new([project_badges.select(:id),
group_badges_rel.select(:id)])
Badge.where("id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
Badge.from_union([
project_badges,
GroupBadge.where(group: group.self_and_ancestors)
])
end
def merge_requests_allowing_push_to_user(user)
......
# frozen_string_literal: true
class ProjectAuthorization < ActiveRecord::Base
include FromUnion
belongs_to :user
belongs_to :project
......@@ -8,9 +10,9 @@ class ProjectAuthorization < ActiveRecord::Base
validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true
validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true
def self.select_from_union(union)
select(['project_id', 'MAX(access_level) AS access_level'])
.from("(#{union.to_sql}) #{ProjectAuthorization.table_name}")
def self.select_from_union(relations)
from_union(relations)
.select(['project_id', 'MAX(access_level) AS access_level'])
.group(:project_id)
end
......
......@@ -12,6 +12,7 @@ class Snippet < ActiveRecord::Base
include Spammable
include Editable
include Gitlab::SQL::Pattern
include FromUnion
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :description
......
......@@ -2,6 +2,7 @@
class Todo < ActiveRecord::Base
include Sortable
include FromUnion
ASSIGNED = 1
MENTIONED = 2
......
......@@ -20,6 +20,7 @@ class User < ActiveRecord::Base
include BlocksJsonSerialization
include WithUploads
include OptionallySearch
include FromUnion
DEFAULT_NOTIFICATION_LEVEL = :participating
......@@ -286,11 +287,9 @@ class User < ActiveRecord::Base
# user_id - The ID of the user to include.
def self.union_with_user(user_id = nil)
if user_id.present?
union = Gitlab::SQL::Union.new([all, User.unscoped.where(id: user_id)])
# We use "unscoped" here so that any inner conditions are not repeated for
# the outer query, which would be redundant.
User.unscoped.from("(#{union.to_sql}) #{User.table_name}")
User.unscoped.from_union([all, User.unscoped.where(id: user_id)])
else
all
end
......@@ -354,9 +353,8 @@ class User < ActiveRecord::Base
emails = joins(:emails).where(emails: { email: email })
emails = emails.confirmed if confirmed
union = Gitlab::SQL::Union.new([users, emails])
from("(#{union.to_sql}) #{table_name}")
from_union([users, emails])
end
def filter(filter_name)
......@@ -676,10 +674,10 @@ class User < ActiveRecord::Base
# Returns the groups a user has access to, either through a membership or a project authorization
def authorized_groups
union = Gitlab::SQL::Union
.new([groups.select(:id), authorized_projects.select(:namespace_id)])
Group.where("namespaces.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
Group.from_union([
groups,
authorized_projects.joins(:namespace).select('namespaces.*')
])
end
# Returns the groups a user is a member of, either directly or through a parent group
......@@ -744,7 +742,15 @@ class User < ActiveRecord::Base
end
def owned_projects
@owned_projects ||= Project.from("(#{owned_projects_union.to_sql}) AS projects")
@owned_projects ||= Project.from_union(
[
Project.where(namespace: namespace),
Project.joins(:project_authorizations)
.where("projects.namespace_id <> ?", namespace.id)
.where(project_authorizations: { user_id: id, access_level: Gitlab::Access::OWNER })
],
remove_duplicates: false
)
end
# Returns projects which user can admin issues on (for example to move an issue to that project).
......@@ -1138,17 +1144,17 @@ class User < ActiveRecord::Base
def ci_owned_runners
@ci_owned_runners ||= begin
project_runner_ids = Ci::RunnerProject
project_runners = Ci::RunnerProject
.where(project: authorized_projects(Gitlab::Access::MAINTAINER))
.select(:runner_id)
.joins(:runner)
.select('ci_runners.*')
group_runner_ids = Ci::RunnerNamespace
group_runners = Ci::RunnerNamespace
.where(namespace_id: owned_or_maintainers_groups.select(:id))
.select(:runner_id)
.joins(:runner)
.select('ci_runners.*')
union = Gitlab::SQL::Union.new([project_runner_ids, group_runner_ids])
Ci::Runner.where("ci_runners.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
Ci::Runner.from_union([project_runners, group_runners])
end
end
......@@ -1380,15 +1386,6 @@ class User < ActiveRecord::Base
Gitlab::CurrentSettings.usage_stats_set_by_user_id == self.id
end
def owned_projects_union
Gitlab::SQL::Union.new([
Project.where(namespace: namespace),
Project.joins(:project_authorizations)
.where("projects.namespace_id <> ?", namespace.id)
.where(project_authorizations: { user_id: id, access_level: Gitlab::Access::OWNER })
], remove_duplicates: false)
end
# Added according to https://github.com/plataformatec/devise/blob/7df57d5081f9884849ca15e4fde179ef164a575f/README.md#activejob-integration
def send_devise_notification(notification, *args)
return true unless can?(:receive_notifications)
......
......@@ -34,13 +34,13 @@ module Labels
# rubocop: disable CodeReuse/ActiveRecord
def labels_to_transfer
label_ids = []
label_ids << group_labels_applied_to_issues.select(:id)
label_ids << group_labels_applied_to_merge_requests.select(:id)
union = Gitlab::SQL::Union.new(label_ids)
Label.where("labels.id IN (#{union.to_sql})").reorder(nil).uniq # rubocop:disable GitlabSecurity/SqlInjection
Label
.from_union([
group_labels_applied_to_issues,
group_labels_applied_to_merge_requests
])
.reorder(nil)
.uniq
end
# rubocop: enable CodeReuse/ActiveRecord
......
......@@ -30,8 +30,9 @@ module Gitlab
note_events = event_counts(date_from, :merge_requests)
.having(action: [Event::COMMENTED])
union = Gitlab::SQL::Union.new([repo_events, issue_events, mr_events, note_events])
events = Event.find_by_sql(union.to_sql).map(&:attributes)
events = Event
.from_union([repo_events, issue_events, mr_events, note_events])
.map(&:attributes)
@activity_dates = events.each_with_object(Hash.new {|h, k| h[k] = 0 }) do |event, activities|
activities[event["date"]] += event["total_amount"]
......
......@@ -2,6 +2,8 @@ module Gitlab
module Database
# Model that can be used for querying permissions of a SQL user.
class Grant < ActiveRecord::Base
include FromUnion
self.table_name =
if Database.postgresql?
'information_schema.role_table_grants'
......@@ -42,9 +44,7 @@ module Gitlab
.where("GRANTEE = CONCAT('\\'', REPLACE(CURRENT_USER(), '@', '\\'@\\''), '\\'')")
]
union = SQL::Union.new(queries).to_sql
Grant.from("(#{union}) privs").any?
Grant.from_union(queries, alias_as: 'privs').any?
end
end
end
......
......@@ -89,14 +89,14 @@ module Gitlab
ancestors_table = ancestors.alias_to(groups_table)
descendants_table = descendants.alias_to(groups_table)
union = SQL::Union.new([model.unscoped.from(ancestors_table),
model.unscoped.from(descendants_table)])
relation = model
.unscoped
.with
.recursive(ancestors.to_arel, descendants.to_arel)
.from("(#{union.to_sql}) #{model.table_name}")
.from_union([
model.unscoped.from(ancestors_table),
model.unscoped.from(descendants_table)
])
read_only(relation)
end
......
......@@ -49,13 +49,11 @@ module Gitlab
.where('p_ns.share_with_group_lock IS FALSE')
]
union = Gitlab::SQL::Union.new(relations)
ProjectAuthorization
.unscoped
.with
.recursive(cte.to_arel)
.select_from_union(union)
.select_from_union(relations)
end
private
......
......@@ -24,11 +24,9 @@ module Gitlab
user.groups.joins(:shared_projects).select_for_project_authorization
]
union = Gitlab::SQL::Union.new(relations)
ProjectAuthorization
.unscoped
.select_from_union(union)
.select_from_union(relations)
end
end
end
......
......@@ -18,7 +18,7 @@ module Gitlab
def users
return User.none unless @text.present?
@users ||= User.from("(#{union.to_sql}) users")
@users ||= User.from_union(union_relations)
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -43,13 +43,13 @@ module Gitlab
private
def union
def union_relations
relations = []
relations << User.by_any_email(emails) if emails.any?
relations << User.by_username(usernames) if usernames.any?
Gitlab::SQL::Union.new(relations)
relations
end
end
end
# frozen_string_literal: true
require_relative '../../spec_helpers'
module RuboCop
module Cop
module Gitlab
# Cop that disallows the use of `Gitlab::SQL::Union`, in favour of using
# the `FromUnion` module.
class Union < RuboCop::Cop::Cop
include SpecHelpers
MSG = 'Use the `FromUnion` concern, instead of using `Gitlab::SQL::Union` directly'
def_node_matcher :raw_union?, <<~PATTERN
(send (const (const (const nil? :Gitlab) :SQL) :Union) :new ...)
PATTERN
def on_send(node)
return unless raw_union?(node)
return if in_spec?(node)
add_offense(node, location: :expression)
end
end
end
end
end
......@@ -3,6 +3,7 @@ require_relative 'cop/gitlab/module_with_instance_variables'
require_relative 'cop/gitlab/predicate_memoization'
require_relative 'cop/gitlab/httparty'
require_relative 'cop/gitlab/finder_with_find_by'
require_relative 'cop/gitlab/union'
require_relative 'cop/include_sidekiq_worker'
require_relative 'cop/avoid_return_from_blocks'
require_relative 'cop/avoid_break_from_strong_memoize'
......
# frozen_string_literal: true
require 'spec_helper'
describe FromUnion do
describe '.from_union' do
let(:model) do
Class.new(ActiveRecord::Base) do
self.table_name = 'users'
include FromUnion
end
end
it 'selects from the results of the UNION' do
query = model.from_union([model.where(id: 1), model.where(id: 2)])
expect(query.to_sql).to match(/FROM \(SELECT.+UNION.+SELECT.+\) users/m)
end
it 'supports the use of a custom alias for the sub query' do
query = model.from_union(
[model.where(id: 1), model.where(id: 2)],
alias_as: 'kittens'
)
expect(query.to_sql).to match(/FROM \(SELECT.+UNION.+SELECT.+\) kittens/m)
end
it 'supports keeping duplicate rows' do
query = model.from_union(
[model.where(id: 1), model.where(id: 2)],
remove_duplicates: false
)
expect(query.to_sql)
.to match(/FROM \(SELECT.+UNION ALL.+SELECT.+\) users/m)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require 'rubocop'
require 'rubocop/rspec/support'
require_relative '../../../../rubocop/cop/gitlab/union'
describe RuboCop::Cop::Gitlab::Union do
include CopHelper
subject(:cop) { described_class.new }
it 'flags the use of Gitlab::SQL::Union.new' do
expect_offense(<<~SOURCE)
Gitlab::SQL::Union.new([foo])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use the `FromUnion` concern, instead of using `Gitlab::SQL::Union` directly
SOURCE
end
it 'does not flag the use of Gitlab::SQL::Union in a spec' do
allow(cop).to receive(:in_spec?).and_return(true)
expect_no_offenses('Gitlab::SQL::Union.new([foo])')
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment