Commit c5dd0858 authored by Aleksei Lipniagov's avatar Aleksei Lipniagov

Improve LoadBalancer#all_caught_up? logic

With the old logic, having a single stale/lagging replica would not
allow us to use caught up replicas, which would increase the load to
primary where we could avoid it. Also adding the comments on the
possible improvements of the similar method.

Changelog: performance
parent 809d8f6c
---
name: load_balancing_improved_caught_up_hosts_check
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65248
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/334989
milestone: '14.1'
type: development
group: group::memory
default_enabled: false
......@@ -147,15 +147,24 @@ module Gitlab
raise 'Failed to determine the write location of the primary database'
end
# Returns true if all hosts have caught up to the given transaction
# write location.
# FF disabled: Returns true if all hosts have caught up to the given transaction write location.
# FF enabled: Returns true if there was at least one host that has caught up with the given transaction and sets it.
def all_caught_up?(location)
@host_list.hosts.all? { |host| host.caught_up?(location) }
if ::Feature.enabled?(:load_balancing_improved_caught_up_hosts_check)
select_up_to_date_host(location)
else
@host_list.hosts.all? { |host| host.caught_up?(location) }
end
end
# Returns true if there was at least one host that has caught up with the given transaction.
#
# In case of a retry, this method also stores the set of hosts that have caught up.
#
# UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`),
# while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604
# Also, shuffling the list afterwards doesn't seem to be necessary.
# This may be improved by merging this method with `select_up_to_date_host`.
def select_caught_up_hosts(location)
all_hosts = @host_list.hosts
valid_hosts = all_hosts.select { |host| host.caught_up?(location) }
......@@ -179,6 +188,8 @@ module Gitlab
# Returns true if there was at least one host that has caught up with the given transaction.
# Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use.
# Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any.
#
# It is going to be merged with `select_caught_up_hosts`, because they intend to do the same.
def select_up_to_date_host(location)
all_hosts = @host_list.hosts.shuffle
host = all_hosts.find { |host| host.caught_up?(location) }
......
......@@ -307,22 +307,34 @@ RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do
end
describe '#all_caught_up?' do
it 'returns true if all hosts caught up to the write location' do
expect(lb.host_list.hosts).to all(receive(:caught_up?).with('foo').and_return(true))
it 'delegates execution to #select_up_to_date_host' do
expect(lb).to receive(:select_up_to_date_host).with('foo').and_return(true)
expect(lb.all_caught_up?('foo')).to eq(true)
end
it 'returns false if a host has not yet caught up' do
expect(lb.host_list.hosts[0]).to receive(:caught_up?)
.with('foo')
.and_return(true)
context 'when :load_balancing_improved_caught_up_hosts_check FF is disabled' do
before do
stub_feature_flags(load_balancing_improved_caught_up_hosts_check: false)
end
it 'returns true if all hosts caught up to the write location' do
expect(lb.host_list.hosts).to all(receive(:caught_up?).with('foo').and_return(true))
expect(lb.all_caught_up?('foo')).to eq(true)
end
it 'returns false if a host has not yet caught up' do
expect(lb.host_list.hosts[0]).to receive(:caught_up?)
.with('foo')
.and_return(true)
expect(lb.host_list.hosts[1]).to receive(:caught_up?)
.with('foo')
.and_return(false)
expect(lb.host_list.hosts[1]).to receive(:caught_up?)
.with('foo')
.and_return(false)
expect(lb.all_caught_up?('foo')).to eq(false)
expect(lb.all_caught_up?('foo')).to eq(false)
end
end
end
......@@ -488,7 +500,7 @@ RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do
end
end
describe '#select_caught_up_hosts' do
describe '#select_up_to_date_host' do
let(:location) { 'AB/12345'}
let(:hosts) { lb.host_list.hosts }
let(:set_host) { RequestStore[described_class::CACHE_KEY] }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment