Commit 7bbc4025 authored by Adam Hegyi's avatar Adam Hegyi

Load ORDER BY values only

This change allows the IN operator optimization query to return the
loaded ORDER BY values instead of reading the result rows from the
disk.
parent e9735782
......@@ -226,7 +226,12 @@ Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
- `finder_query` loads the actual record row from the database. It must also be a lambda, where
the order by column expressions is available for locating the record. In this example, the
yielded values are `created_at` and `id` SQL expressions. Finding a record is very fast via the
primary key, so we don't use the `created_at` value.
primary key, so we don't use the `created_at` value. Providing the `finder_query` lambda is optional.
If it's not given, the IN operator optimization will only make the ORDER BY columns available to
the end-user and not the full database row.
If it's not given, the IN operator optimization will only make the ORDER BY columns available to
the end-user and not the full database row.
The following database index on the `issues` table must be present
to make the query execute efficiently:
......@@ -611,6 +616,32 @@ Gitlab::Pagination::Keyset::Iterator.new(scope: scope, **opts).each_batch(of: 10
end
```
NOTE:
The query loads complete database rows from the disk. This may cause increased I/O and slower
database queries. Depending on the use case, the primary key is often only
needed for the batch query to invoke additional statements. For example, `UPDATE` or `DELETE`. The
`id` column is included in the `ORDER BY` columns (`created_at` and `id`) and is already
loaded. In this case, you can omit the `finder_query` parameter.
Example for loading the `ORDER BY` columns only:
```ruby
scope = Issue.order(:created_at, :id)
array_scope = Group.find(9970).all_projects.select(:id)
array_mapping_scope = -> (id_expression) { Issue.where(Issue.arel_table[:project_id].eq(id_expression)) }
opts = {
in_operator_optimization_options: {
array_scope: array_scope,
array_mapping_scope: array_mapping_scope
}
}
Gitlab::Pagination::Keyset::Iterator.new(scope: scope, **opts).each_batch(of: 100) do |records|
puts records.select(:id).map { |r| [r.id] } # only id and created_at are available
end
```
#### Keyset pagination
The optimization works out of the box with GraphQL and the `keyset_paginate` helper method.
......
......@@ -9,7 +9,6 @@ module Gitlab
UnsupportedScopeOrder = Class.new(StandardError)
RECURSIVE_CTE_NAME = 'recursive_keyset_cte'
RECORDS_COLUMN = 'records'
# This class optimizes slow database queries (PostgreSQL specific) where the
# IN SQL operator is used with sorting.
......@@ -42,7 +41,7 @@ module Gitlab
# > array_mapping_scope: array_mapping_scope,
# > finder_query: finder_query
# > ).execute.limit(20)
def initialize(scope:, array_scope:, array_mapping_scope:, finder_query:, values: {})
def initialize(scope:, array_scope:, array_mapping_scope:, finder_query: nil, values: {})
@scope, success = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(scope)
unless success
......@@ -57,11 +56,11 @@ module Gitlab
@order = Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(scope)
@array_scope = array_scope
@array_mapping_scope = array_mapping_scope
@finder_query = finder_query
@values = values
@model = @scope.model
@table_name = @model.table_name
@arel_table = @model.arel_table
@finder_strategy = finder_query.present? ? Strategies::RecordLoaderStrategy.new(finder_query, model, order_by_columns) : Strategies::OrderValuesLoaderStrategy.new(model, order_by_columns)
end
def execute
......@@ -74,7 +73,7 @@ module Gitlab
q = cte
.apply_to(model.where({})
.with(selector_cte.to_arel))
.select(result_collector_final_projections)
.select(finder_strategy.final_projections)
.where("count <> 0") # filter out the initializer row
model.from(q.arel.as(table_name))
......@@ -82,13 +81,13 @@ module Gitlab
private
attr_reader :array_scope, :scope, :order, :array_mapping_scope, :finder_query, :values, :model, :table_name, :arel_table
attr_reader :array_scope, :scope, :order, :array_mapping_scope, :finder_strategy, :values, :model, :table_name, :arel_table
def initializer_query
array_column_names = array_scope_columns.array_aggregated_column_names + order_by_columns.array_aggregated_column_names
projections = [
*result_collector_initializer_columns,
*finder_strategy.initializer_columns,
*array_column_names,
'0::bigint AS count'
]
......@@ -156,7 +155,7 @@ module Gitlab
order_column_value_arrays = order_by_columns.replace_value_in_array_by_position_expressions
select = [
*result_collector_columns,
*finder_strategy.columns,
*array_column_list,
*order_column_value_arrays,
"#{RECURSIVE_CTE_NAME}.count + 1"
......@@ -254,23 +253,6 @@ module Gitlab
end.join(", ")
end
def result_collector_initializer_columns
["NULL::#{table_name} AS #{RECORDS_COLUMN}"]
end
def result_collector_columns
query = finder_query
.call(*order_by_columns.array_lookup_expressions_by_position(RECURSIVE_CTE_NAME))
.select("#{table_name}")
.limit(1)
["(#{query.to_sql})"]
end
def result_collector_final_projections
["(#{RECORDS_COLUMN}).*"]
end
def array_scope_columns
@array_scope_columns ||= ArrayScopeColumns.new(array_scope.select_values)
end
......
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
module Strategies
class OrderValuesLoaderStrategy
def initialize(model, order_by_columns)
@model = model
@order_by_columns = order_by_columns
end
def initializer_columns
order_by_columns.map do |column|
column_name = column.original_column_name.to_s
type = model.columns_hash[column_name].sql_type
"NULL::#{type} AS #{column_name}"
end
end
def columns
order_by_columns.array_lookup_expressions_by_position(QueryBuilder::RECURSIVE_CTE_NAME)
end
def final_projections
order_by_columns.map(&:original_column_name)
end
private
attr_reader :model, :order_by_columns
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
module InOperatorOptimization
module Strategies
class RecordLoaderStrategy
RECORDS_COLUMN = 'records'
def initialize(finder_query, model, order_by_columns)
@finder_query = finder_query
@order_by_columns = order_by_columns
@table_name = model.table_name
end
def initializer_columns
["NULL::#{table_name} AS #{RECORDS_COLUMN}"]
end
def columns
query = finder_query
.call(*order_by_columns.array_lookup_expressions_by_position(QueryBuilder::RECURSIVE_CTE_NAME))
.select("#{table_name}")
.limit(1)
["(#{query.to_sql})"]
end
def final_projections
["(#{RECORDS_COLUMN}).*"]
end
private
attr_reader :finder_query, :order_by_columns, :table_name
end
end
end
end
end
end
......@@ -41,14 +41,40 @@ RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder
)
end
it 'returns records in correct order' do
let(:all_records) do
all_records = []
iterator.each_batch(of: batch_size) do |records|
all_records.concat(records)
end
all_records
end
it 'returns records in correct order' do
expect(all_records).to eq(expected_order)
end
context 'when not passing the finder query' do
before do
in_operator_optimization_options.delete(:finder_query)
end
it 'returns records in correct order' do
expect(all_records).to eq(expected_order)
end
it 'loads only the order by column' do
order_by_attribute_names = iterator
.send(:order)
.column_definitions
.map(&:attribute_name)
.map(&:to_s)
record = all_records.first
loaded_attributes = record.attributes.keys - ['time_estimate'] # time_estimate is always present (has default value)
expect(loaded_attributes).to eq(order_by_attribute_names)
end
end
end
context 'when ordering by issues.id DESC' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::Strategies::OrderValuesLoaderStrategy do
let(:model) { Project }
let(:keyset_scope) do
scope, _ = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(
Project.order(:created_at, :id)
)
scope
end
let(:keyset_order) do
Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(keyset_scope)
end
let(:order_by_columns) do
Gitlab::Pagination::Keyset::InOperatorOptimization::OrderByColumns.new(keyset_order.column_definitions, model.arel_table)
end
subject(:strategy) { described_class.new(model, order_by_columns) }
describe '#initializer_columns' do
it 'returns NULLs for each ORDER BY columns' do
expect(strategy.initializer_columns).to eq([
'NULL::timestamp without time zone AS created_at',
'NULL::integer AS id'
])
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Pagination::Keyset::InOperatorOptimization::Strategies::RecordLoaderStrategy do
let(:finder_query) { -> (created_at_value, id_value) { Project.where(Project.arel_table[:id].eq(id_value)) } }
let(:model) { Project }
let(:keyset_scope) do
scope, _ = Gitlab::Pagination::Keyset::SimpleOrderBuilder.build(
Project.order(:created_at, :id)
)
scope
end
let(:keyset_order) do
Gitlab::Pagination::Keyset::Order.extract_keyset_order_object(keyset_scope)
end
let(:order_by_columns) do
Gitlab::Pagination::Keyset::InOperatorOptimization::OrderByColumns.new(keyset_order.column_definitions, model.arel_table)
end
subject(:strategy) { described_class.new(finder_query, model, order_by_columns) }
describe '#initializer_columns' do
# Explanation:
# > SELECT NULL::projects AS records
#
# The query returns one row and one column. The column may contain a full project row.
# In this particular case the row is NULL.
it 'returns a NULL table row as the result column' do
expect(strategy.initializer_columns).to eq(["NULL::projects AS records"])
end
end
describe '#columns' do
# Explanation:
# > SELECT (SELECT projects FROM projects limit 1)
#
# Selects one row from the database and collapses it into one column.
#
# Side note: Due to the type casts, columns and initializer_columns can be also UNION-ed:
# SELECT * FROM (
# (
# SELECT NULL::projects AS records
# UNION
# SELECT (SELECT projects FROM projects limit 1)
# )
# ) as records
it 'uses the finder query to load the row in the result column' do
expected_loader_query = <<~SQL
(SELECT projects FROM "projects" WHERE "projects"."id" = recursive_keyset_cte.projects_id_array[position] LIMIT 1)
SQL
expect(strategy.columns).to eq([expected_loader_query.chomp])
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment