Commit 70785203 authored by Stan Hu's avatar Stan Hu

Merge branch 'ensure-even-union-selects' into 'master'

Ensure even select values in UNION query [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!66337
parents 6d4aa2d8 2a7cd9cd
......@@ -99,4 +99,8 @@ class ApplicationRecord < ActiveRecord::Base
::Feature.enabled?(:active_record_subtransactions_counter, type: :ops, default_enabled: :yaml) &&
connection.transaction_open?
end
def self.cached_column_list
self.column_names.map { |column_name| self.arel_table[column_name] }
end
end
......@@ -5,7 +5,7 @@ module SelectForProjectAuthorization
class_methods do
def select_for_project_authorization
select("projects.id AS project_id, members.access_level")
select("projects.id AS project_id", "members.access_level")
end
def select_as_maintainer_for_project_authorization
......
......@@ -158,7 +158,7 @@ class Group < Namespace
if current_scope.joins_values.include?(:shared_projects)
joins('INNER JOIN namespaces project_namespace ON project_namespace.id = projects.namespace_id')
.where(project_namespace: { share_with_group_lock: false })
.select("projects.id AS project_id, LEAST(project_group_links.group_access, members.access_level) AS access_level")
.select("projects.id AS project_id", "LEAST(project_group_links.group_access, members.access_level) AS access_level")
else
super
end
......@@ -463,7 +463,7 @@ class Group < Namespace
id
end
group_hierarchy_members = GroupMember.where(source_id: source_ids)
group_hierarchy_members = GroupMember.where(source_id: source_ids).select(*GroupMember.cached_column_list)
GroupMember.from_union([group_hierarchy_members,
members_from_self_and_ancestor_group_shares]).authorizable
......@@ -481,6 +481,7 @@ class Group < Namespace
group_hierarchy_members = GroupMember.active_without_invites_and_requests
.non_minimal_access
.where(source_id: source_ids)
.select(*GroupMember.cached_column_list)
GroupMember.from_union([group_hierarchy_members,
members_from_self_and_ancestor_group_shares])
......
......@@ -2008,8 +2008,8 @@ class User < ApplicationRecord
def authorized_groups_without_shared_membership
Group.from_union([
groups,
authorized_projects.joins(:namespace).select('namespaces.*')
groups.select(Namespace.arel_table[Arel.star]),
authorized_projects.joins(:namespace).select(Namespace.arel_table[Arel.star])
])
end
......
......@@ -311,6 +311,75 @@ union = Gitlab::SQL::Union.new([projects, more_projects, ...])
Project.from("(#{union.to_sql}) projects")
```
### Uneven columns in the UNION sub-queries
When the UNION query has uneven columns in the SELECT clauses, the database returns an error.
Consider the following UNION query:
```sql
SELECT id FROM users WHERE id = 1
UNION
SELECT id, name FROM users WHERE id = 2
end
```
The query results in the following error message:
```plaintext
each UNION query must have the same number of columns
```
This problem is apparent and it can be easily fixed during development. One edge-case is when
UNION queries are combined with explicit column listing where the list comes from the
`ActiveRecord` schema cache.
Example (bad, avoid it):
```ruby
scope1 = User.select(User.column_names).where(id: [1, 2, 3]) # selects the columns explicitly
scope2 = User.where(id: [10, 11, 12]) # uses SELECT users.*
User.connection.execute(Gitlab::SQL::Union.new([scope1, scope2]).to_sql)
```
When this code is deployed, it doesn't cause problems immediately. When another
developer adds a new database column to the `users` table, this query breaks in
production and can cause downtime. The second query (`SELECT users.*`) includes the
newly added column; however, the first query does not. The `column_names` method returns stale
values (the new column is missing), because the values are cached within the `ActiveRecord` schema
cache. These values are usually populated when the application boots up.
At this point, the only fix would be a full application restart so that the schema cache gets
updated.
The problem can be avoided if we always use `SELECT users.*` or we always explicitly define the
columns.
Using `SELECT users.*`:
```ruby
# Bad, avoid it
scope1 = User.select(User.column_names).where(id: [1, 2, 3])
scope2 = User.where(id: [10, 11, 12])
# Good, both queries generate SELECT users.*
scope1 = User.where(id: [1, 2, 3])
scope2 = User.where(id: [10, 11, 12])
User.connection.execute(Gitlab::SQL::Union.new([scope1, scope2]).to_sql)
```
Explicit column list definition:
```ruby
# Good, the SELECT columns are consistent
columns = User.cached_column_names # The helper returns fully qualified (table.column) column names (Arel)
scope1 = User.select(*columns).where(id: [1, 2, 3]) # selects the columns explicitly
scope2 = User.select(*columns).where(id: [10, 11, 12]) # uses SELECT users.*
User.connection.execute(Gitlab::SQL::Union.new([scope1, scope2]).to_sql)
```
## Ordering by Creation Date
When ordering records based on the time they were created, you can order
......
......@@ -20,6 +20,11 @@ module EE
self.inheritance_column = :_type_disabled
self.enumerate_columns_in_select_statements = true
# backported from ApplicationRecord
def self.cached_column_list
self.column_names.map { |column_name| self.arel_table[column_name] }
end
end
class GroupGroupLink < ActiveRecord::Base
......@@ -47,7 +52,9 @@ module EE
end
def members_with_parents
group_hierarchy_members = Member.where(source_type: 'Namespace', source_id: source_ids)
group_hierarchy_members = Member
.where(source_type: 'Namespace', source_id: source_ids)
.select(*Member.cached_column_list)
Member.from_union([group_hierarchy_members,
members_from_self_and_ancestor_group_shares])
......
......@@ -19,6 +19,7 @@ module Gitlab
# Project.where("id IN (#{sql})")
class SetOperator
def initialize(relations, remove_duplicates: true, remove_order: true)
verify_select_values!(relations) if Rails.env.test? || Rails.env.development?
@relations = relations
@remove_duplicates = remove_duplicates
@remove_order = remove_order
......@@ -54,6 +55,34 @@ module Gitlab
private
attr_reader :relations, :remove_duplicates, :remove_order
def verify_select_values!(relations)
all_select_values = relations.map do |relation|
if relation.respond_to?(:select_values)
relation.select_values
else
relation.projections # Handle Arel based subqueries
end
end
unless all_select_values.map(&:size).uniq.one?
relation_select_sizes = all_select_values.map.with_index do |select_values, index|
if select_values.empty?
"Relation ##{index}: *, all columns"
else
"Relation ##{index}: #{select_values.size} select values"
end
end
exception_text = <<~EOF
Relations with uneven select values were passed. The UNION query could break when the underlying table changes (add or remove columns).
#{relation_select_sizes.join("\n")}
EOF
raise(exception_text)
end
end
end
end
end
......@@ -22,7 +22,7 @@ RSpec.shared_examples 'SQL set operator' do |operator_keyword|
end
it 'skips Model.none segments' do
empty_relation = User.none
empty_relation = User.none.select(:id)
set_operator = described_class.new([empty_relation, relation_1, relation_2])
expect {User.where("users.id IN (#{set_operator.to_sql})").to_a}.not_to raise_error
......@@ -44,6 +44,17 @@ RSpec.shared_examples 'SQL set operator' do |operator_keyword|
end
end
context 'when uneven select values are used' do
let(:relation_1) { User.where(email: 'alice@example.com').select(*User.column_names) }
let(:relation_2) { User.where(email: 'bob@example.com') }
it 'raises error' do
expect do
described_class.new([relation_1, relation_2])
end.to raise_error /Relations with uneven select values were passed/
end
end
describe 'remove_order parameter' do
let(:scopes) do
[
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment