Commit 10c3bae8 authored by Stan Hu's avatar Stan Hu

Merge branch 'tc-geo-gap-tracking-improvements' into 'master'

Follow-up: Geo log cursor gap tracking improvements

Closes #7057

See merge request gitlab-org/gitlab-ee!6754
parents 01ce3d7d f9baaf84
...@@ -47,6 +47,10 @@ module Geo ...@@ -47,6 +47,10 @@ module Geo
order(id: :desc).first order(id: :desc).first
end end
def self.includes_events
includes(reflections.keys)
end
def event def event
repository_created_event || repository_created_event ||
repository_updated_event || repository_updated_event ||
......
---
title: "Geo: Improve performance in Log Cursor gap tracking"
merge_request: 6754
author:
type: performance
...@@ -50,16 +50,18 @@ module Gitlab ...@@ -50,16 +50,18 @@ module Gitlab
with_redis do |redis| with_redis do |redis|
redis.zremrangebyscore(GEO_EVENT_LOG_GAPS, '-inf', outdated_timestamp) redis.zremrangebyscore(GEO_EVENT_LOG_GAPS, '-inf', outdated_timestamp)
gap_ids = redis.zrangebyscore(GEO_EVENT_LOG_GAPS, '-inf', grace_timestamp, with_scores: true) gap_ids = redis.zrangebyscore(GEO_EVENT_LOG_GAPS, '-inf', grace_timestamp).map(&:to_i)
break if gap_ids.empty?
gap_ids.each do |event_id, score| ::Geo::EventLog.where(id: gap_ids).each_batch do |batch|
handled = yield event_id.to_i batch.includes_events.each { |event_log| yield event_log }
redis.zrem(GEO_EVENT_LOG_GAPS, batch.map(&:id))
redis.zrem(GEO_EVENT_LOG_GAPS, event_id) if handled
end end
end end
end end
private
def track_gaps(current_id) def track_gaps(current_id)
log_info("Event log gap detected", previous_event_id: previous_id, current_event_id: current_id) log_info("Event log gap detected", previous_event_id: previous_id, current_event_id: current_id)
...@@ -78,8 +80,6 @@ module Gitlab ...@@ -78,8 +80,6 @@ module Gitlab
current_id > (previous_id + 1) current_id > (previous_id + 1)
end end
private
def grace_timestamp def grace_timestamp
(Time.now - GAP_GRACE_PERIOD).to_i (Time.now - GAP_GRACE_PERIOD).to_i
end end
......
...@@ -34,7 +34,7 @@ module Gitlab ...@@ -34,7 +34,7 @@ module Gitlab
end end
def run_once! def run_once!
gap_tracking.fill_gaps { |event_id| handle_gap_event(event_id) } gap_tracking.fill_gaps { |event_log| handle_single_event(event_log) }
# Wrap this with the connection to make it possible to reconnect if # Wrap this with the connection to make it possible to reconnect if
# PGbouncer dies: https://github.com/rails/rails/issues/29189 # PGbouncer dies: https://github.com/rails/rails/issues/29189
...@@ -43,6 +43,8 @@ module Gitlab ...@@ -43,6 +43,8 @@ module Gitlab
end end
end end
private
def handle_events(batch, previous_batch_last_id) def handle_events(batch, previous_batch_last_id)
logger.info("Handling events", first_id: batch.first.id, last_id: batch.last.id) logger.info("Handling events", first_id: batch.first.id, last_id: batch.last.id)
...@@ -80,15 +82,6 @@ module Gitlab ...@@ -80,15 +82,6 @@ module Gitlab
raise e raise e
end end
def handle_gap_event(event_id)
event_log = ::Geo::EventLog.find_by(id: event_id)
return false unless event_log
handle_single_event(event_log)
true
end
def event_klass_for(event) def event_klass_for(event)
event_klass_name = event.class.name.demodulize event_klass_name = event.class.name.demodulize
current_namespace = self.class.name.deconstantize current_namespace = self.class.name.deconstantize
......
...@@ -18,12 +18,12 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -18,12 +18,12 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
it 'returns the lowest gap id' do it 'returns the lowest gap id' do
Timecop.travel(50.minutes.ago) do Timecop.travel(50.minutes.ago) do
gap_tracking.previous_id = 18 gap_tracking.previous_id = 18
gap_tracking.track_gaps(20) gap_tracking.send(:track_gaps, 20)
end end
Timecop.travel(40.minutes.ago) do Timecop.travel(40.minutes.ago) do
gap_tracking.previous_id = 12 gap_tracking.previous_id = 12
gap_tracking.track_gaps(14) gap_tracking.send(:track_gaps, 14)
end end
expect(described_class.min_gap_id).to eq(13) expect(described_class.min_gap_id).to eq(13)
...@@ -37,10 +37,10 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -37,10 +37,10 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
it 'returns the number of gaps' do it 'returns the number of gaps' do
gap_tracking.previous_id = 18 gap_tracking.previous_id = 18
gap_tracking.track_gaps(20) gap_tracking.send(:track_gaps, 20)
gap_tracking.previous_id = 12 gap_tracking.previous_id = 12
gap_tracking.track_gaps(14) gap_tracking.send(:track_gaps, 14)
expect(described_class.gap_count).to eq(2) expect(described_class.gap_count).to eq(2)
end end
...@@ -86,9 +86,13 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -86,9 +86,13 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
it 'handles gaps that are more than 10 minutes old' do it 'handles gaps that are more than 10 minutes old' do
gap_tracking.check!(event_id_with_gap) gap_tracking.check!(event_id_with_gap)
gap_event = create(:geo_event_log, id: gap_id)
Timecop.travel(12.minutes) do Timecop.travel(12.minutes) do
expect { |blk| gap_tracking.fill_gaps(&blk) }.to yield_with_args(gap_id) expect { |blk| gap_tracking.fill_gaps(&blk) }.to yield_with_args(gap_event)
end end
expect(read_gaps).to be_empty
end end
it 'drops gaps older than 1 hour' do it 'drops gaps older than 1 hour' do
...@@ -100,18 +104,46 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -100,18 +104,46 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
expect(read_gaps).to be_empty expect(read_gaps).to be_empty
end end
it 'avoids N+1 queries to fetch event logs and their associated events' do
yielded = []
blk = lambda do |event_log|
event_log.event
yielded << event_log
end
Timecop.travel(13.minutes.ago) do
gap_tracking.check!(event_id_with_gap)
end
create(:geo_event_log, :updated_event, id: gap_id)
control_count = ActiveRecord::QueryRecorder.new do
expect { gap_tracking.fill_gaps(&blk) }.to change { yielded.count }.by(1)
end.count
Timecop.travel(12.minutes.ago) do
gap_tracking.check!(event_id_with_gap + 3)
end
create(:geo_event_log, :updated_event, id: event_id_with_gap + 1)
create(:geo_event_log, :updated_event, id: event_id_with_gap + 2)
expect do
expect { gap_tracking.fill_gaps(&blk) }.to change { yielded.count }.by(2)
end.not_to exceed_query_limit(control_count)
end
end end
describe '#track_gaps' do describe '#track_gaps' do
it 'logs a message' do it 'logs a message' do
expect(gap_tracking).to receive(:log_info).with(/gap detected/, hash_including(previous_event_id: previous_event_id, current_event_id: event_id_with_gap)) expect(gap_tracking).to receive(:log_info).with(/gap detected/, hash_including(previous_event_id: previous_event_id, current_event_id: event_id_with_gap))
gap_tracking.track_gaps(event_id_with_gap) gap_tracking.send(:track_gaps, event_id_with_gap)
end end
it 'saves the gap id in redis' do it 'saves the gap id in redis' do
Timecop.freeze do Timecop.freeze do
gap_tracking.track_gaps(event_id_with_gap) gap_tracking.send(:track_gaps, event_id_with_gap)
expect(read_gaps).to contain_exactly([gap_id.to_s, Time.now.to_i]) expect(read_gaps).to contain_exactly([gap_id.to_s, Time.now.to_i])
end end
...@@ -119,7 +151,7 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -119,7 +151,7 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
it 'saves a range of gaps id in redis' do it 'saves a range of gaps id in redis' do
Timecop.freeze do Timecop.freeze do
gap_tracking.track_gaps(event_id_with_gap + 3) gap_tracking.send(:track_gaps, event_id_with_gap + 3)
expected_gaps = ((previous_event_id + 1)..(event_id_with_gap + 2)).collect { |id| [id.to_s, Time.now.to_i] } expected_gaps = ((previous_event_id + 1)..(event_id_with_gap + 2)).collect { |id| [id.to_s, Time.now.to_i] }
...@@ -131,13 +163,13 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -131,13 +163,13 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
expected_gaps = [] expected_gaps = []
Timecop.freeze do Timecop.freeze do
gap_tracking.track_gaps(event_id_with_gap) gap_tracking.send(:track_gaps, event_id_with_gap)
expected_gaps << [gap_id.to_s, Time.now.to_i] expected_gaps << [gap_id.to_s, Time.now.to_i]
end end
Timecop.travel(2.minutes) do Timecop.travel(2.minutes) do
gap_tracking.previous_id = 17 gap_tracking.previous_id = 17
gap_tracking.track_gaps(19) gap_tracking.send(:track_gaps, 19)
expected_gaps << [18.to_s, Time.now.to_i] expected_gaps << [18.to_s, Time.now.to_i]
end end
...@@ -147,25 +179,25 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do ...@@ -147,25 +179,25 @@ describe Gitlab::Geo::EventGapTracking, :clean_gitlab_redis_cache do
describe '#gap?' do describe '#gap?' do
it 'returns false when current_id is the previous +1' do it 'returns false when current_id is the previous +1' do
expect(gap_tracking.gap?(previous_event_id + 1)).to be_falsy expect(gap_tracking.send(:gap?, previous_event_id + 1)).to be_falsy
end end
it 'returns true when current_id is the previous +2' do it 'returns true when current_id is the previous +2' do
expect(gap_tracking.gap?(previous_event_id + 2)).to be_truthy expect(gap_tracking.send(:gap?, previous_event_id + 2)).to be_truthy
end end
it 'returns false when current_id is equal to the previous' do it 'returns false when current_id is equal to the previous' do
expect(gap_tracking.gap?(previous_event_id)).to be_falsy expect(gap_tracking.send(:gap?, previous_event_id)).to be_falsy
end end
it 'returns false when current_id less than the previous' do it 'returns false when current_id less than the previous' do
expect(gap_tracking.gap?(previous_event_id - 1)).to be_falsy expect(gap_tracking.send(:gap?, previous_event_id - 1)).to be_falsy
end end
it 'returns false when previous id is 0' do it 'returns false when previous id is 0' do
gap_tracking.previous_id = 0 gap_tracking.previous_id = 0
expect(gap_tracking.gap?(100)).to be_falsy expect(gap_tracking.send(:gap?, 100)).to be_falsy
end end
end end
......
...@@ -86,10 +86,13 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -86,10 +86,13 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
end end
it 'calls #handle_gap_event for each gap the gap tracking finds' do it 'calls #handle_gap_event for each gap the gap tracking finds' do
allow(daemon.gap_tracking).to receive(:fill_gaps).and_yield(1).and_yield(5) second_event_log = create(:geo_event_log, repository_updated_event: repository_updated_event)
expect(daemon).to receive(:handle_gap_event).with(1) allow_any_instance_of(::Gitlab::Geo::LogCursor::EventLogs).to receive(:fetch_in_batches)
expect(daemon).to receive(:handle_gap_event).with(5) allow(daemon.send(:gap_tracking)).to receive(:fill_gaps).and_yield(event_log).and_yield(second_event_log)
expect(daemon).to receive(:handle_single_event).with(event_log)
expect(daemon).to receive(:handle_single_event).with(second_event_log)
daemon.run_once! daemon.run_once!
end end
...@@ -131,6 +134,8 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -131,6 +134,8 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
daemon.run_once! daemon.run_once!
create(:geo_event_log, id: event_log.id + 1)
expect(read_gaps).to eq([event_log.id + 1]) expect(read_gaps).to eq([event_log.id + 1])
expect(::Geo::EventLogState.last_processed.id).to eq(new_event.id) expect(::Geo::EventLogState.last_processed.id).to eq(new_event.id)
...@@ -146,6 +151,9 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -146,6 +151,9 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
daemon.run_once! daemon.run_once!
create(:geo_event_log, id: new_event.id + 1, repository_updated_event: updated_event)
create(:geo_event_log, id: new_event.id + 2, repository_updated_event: updated_event)
expect(read_gaps).to eq([new_event.id + 1, new_event.id + 2]) expect(read_gaps).to eq([new_event.id + 1, new_event.id + 2])
end end
...@@ -193,20 +201,20 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -193,20 +201,20 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
let(:batch) { create_list(:geo_event_log, 2) } let(:batch) { create_list(:geo_event_log, 2) }
it 'passes the previous batch id on to gap tracking' do it 'passes the previous batch id on to gap tracking' do
expect(daemon.gap_tracking).to receive(:previous_id=).with(55).ordered expect(daemon.send(:gap_tracking)).to receive(:previous_id=).with(55).ordered
batch.each do |event_log| batch.each do |event_log|
expect(daemon.gap_tracking).to receive(:previous_id=).with(event_log.id).ordered expect(daemon.send(:gap_tracking)).to receive(:previous_id=).with(event_log.id).ordered
end end
daemon.handle_events(batch, 55) daemon.send(:handle_events, batch, 55)
end end
it 'checks for gaps for each id in batch' do it 'checks for gaps for each id in batch' do
batch.each do |event_log| batch.each do |event_log|
expect(daemon.gap_tracking).to receive(:check!).with(event_log.id) expect(daemon.send(:gap_tracking)).to receive(:check!).with(event_log.id)
end end
daemon.handle_events(batch, 55) daemon.send(:handle_events, batch, 55)
end end
it 'handles every single event' do it 'handles every single event' do
...@@ -214,7 +222,7 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -214,7 +222,7 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
expect(daemon).to receive(:handle_single_event).with(event_log) expect(daemon).to receive(:handle_single_event).with(event_log)
end end
daemon.handle_events(batch, 55) daemon.send(:handle_events, batch, 55)
end end
end end
...@@ -225,20 +233,20 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -225,20 +233,20 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
event_log = build(:geo_event_log) event_log = build(:geo_event_log)
expect(daemon).not_to receive(:can_replay?) expect(daemon).not_to receive(:can_replay?)
daemon.handle_single_event(event_log) daemon.send(:handle_single_event, event_log)
end end
it 'checks if it can replay the event' do it 'checks if it can replay the event' do
expect(daemon).to receive(:can_replay?) expect(daemon).to receive(:can_replay?)
daemon.handle_single_event(event_log) daemon.send(:handle_single_event, event_log)
end end
it 'processes event when it is replayable' do it 'processes event when it is replayable' do
allow(daemon).to receive(:can_replay?).and_return(true) allow(daemon).to receive(:can_replay?).and_return(true)
expect(daemon).to receive(:process_event).with(event_log.event, event_log) expect(daemon).to receive(:process_event).with(event_log.event, event_log)
daemon.handle_single_event(event_log) daemon.send(:handle_single_event, event_log)
end end
end end
...@@ -246,7 +254,7 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared ...@@ -246,7 +254,7 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared
gaps = [] gaps = []
Timecop.travel(12.minutes) do Timecop.travel(12.minutes) do
daemon.gap_tracking.fill_gaps { |id| gaps << id } daemon.send(:gap_tracking).send(:fill_gaps) { |event| gaps << event.id }
end end
gaps gaps
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment