Commit 09e3236e authored by Heinrich Lee Yu's avatar Heinrich Lee Yu Committed by Mark Chao

Allow configuration of deduplication TTL

parent ea7a8d7b
...@@ -284,6 +284,36 @@ module AuthorizedProjectUpdate ...@@ -284,6 +284,36 @@ module AuthorizedProjectUpdate
end end
``` ```
### Setting the deduplication time-to-live (TTL)
Deduplication depends on an idempotency key that is stored in Redis. This is normally
cleared by the configured deduplication strategy.
However, the key can remain until its TTL in certain cases like:
1. `until_executing` is used but the job was never enqueued or executed after the Sidekiq
client middleware was run.
1. `until_executed` is used but the job fails to finish due to retry exhaustion, gets
interrupted the maximum number of times, or gets lost.
The default value is 6 hours. During this time, jobs won't be enqueued even if the first
job never executed or finished.
The TTL can be configured with:
```ruby
class ProjectImportScheduleWorker
include ApplicationWorker
idempotent!
deduplicate :until_executing, ttl: 5.minutes
end
```
Duplicate jobs can happen when the TTL is reached, so make sure you lower this only for jobs
that can tolerate some duplication.
### Deduplication with load balancing ### Deduplication with load balancing
> [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/6763) in GitLab 14.4. > [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/6763) in GitLab 14.4.
......
...@@ -9,6 +9,8 @@ class ProjectImportScheduleWorker ...@@ -9,6 +9,8 @@ class ProjectImportScheduleWorker
prepend WaitableWorker prepend WaitableWorker
idempotent! idempotent!
deduplicate :until_executing, ttl: 5.minutes
feature_category :source_code_management feature_category :source_code_management
sidekiq_options retry: false sidekiq_options retry: false
loggable_arguments 1 # For the job waiter key loggable_arguments 1 # For the job waiter key
......
...@@ -19,7 +19,7 @@ module Gitlab ...@@ -19,7 +19,7 @@ module Gitlab
class DuplicateJob class DuplicateJob
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
DUPLICATE_KEY_TTL = 6.hours DEFAULT_DUPLICATE_KEY_TTL = 6.hours
WAL_LOCATION_TTL = 60.seconds WAL_LOCATION_TTL = 60.seconds
MAX_REDIS_RETRIES = 5 MAX_REDIS_RETRIES = 5
DEFAULT_STRATEGY = :until_executing DEFAULT_STRATEGY = :until_executing
...@@ -59,7 +59,7 @@ module Gitlab ...@@ -59,7 +59,7 @@ module Gitlab
end end
# This method will return the jid that was set in redis # This method will return the jid that was set in redis
def check!(expiry = DUPLICATE_KEY_TTL) def check!(expiry = duplicate_key_ttl)
read_jid = nil read_jid = nil
read_wal_locations = {} read_wal_locations = {}
...@@ -133,7 +133,7 @@ module Gitlab ...@@ -133,7 +133,7 @@ module Gitlab
jid != existing_jid jid != existing_jid
end end
def set_deduplicated_flag!(expiry = DUPLICATE_KEY_TTL) def set_deduplicated_flag!(expiry = duplicate_key_ttl)
return unless reschedulable? return unless reschedulable?
Sidekiq.redis do |redis| Sidekiq.redis do |redis|
...@@ -168,6 +168,10 @@ module Gitlab ...@@ -168,6 +168,10 @@ module Gitlab
worker_klass.idempotent? worker_klass.idempotent?
end end
def duplicate_key_ttl
options[:ttl] || DEFAULT_DUPLICATE_KEY_TTL
end
private private
attr_writer :existing_wal_locations attr_writer :existing_wal_locations
......
...@@ -26,8 +26,8 @@ module Gitlab ...@@ -26,8 +26,8 @@ module Gitlab
end end
def check! def check!
# The default expiry time is the DuplicateJob::DUPLICATE_KEY_TTL already # The default expiry time is the worker class'
# Only the strategies de-duplicating when scheduling # configured deduplication TTL or DuplicateJob::DEFAULT_DUPLICATE_KEY_TTL.
duplicate_job.check! duplicate_job.check!
end end
end end
......
...@@ -52,11 +52,11 @@ module Gitlab ...@@ -52,11 +52,11 @@ module Gitlab
def expiry def expiry
strong_memoize(:expiry) do strong_memoize(:expiry) do
next DuplicateJob::DUPLICATE_KEY_TTL unless duplicate_job.scheduled? next duplicate_job.duplicate_key_ttl unless duplicate_job.scheduled?
time_diff = duplicate_job.scheduled_at.to_i - Time.now.to_i time_diff = duplicate_job.scheduled_at.to_i - Time.now.to_i
time_diff > 0 ? time_diff : DuplicateJob::DUPLICATE_KEY_TTL time_diff > 0 ? time_diff : duplicate_job.duplicate_key_ttl
end end
end end
end end
......
...@@ -85,24 +85,42 @@ RSpec.describe Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob, :clean_gi ...@@ -85,24 +85,42 @@ RSpec.describe Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob, :clean_gi
context 'when there was no job in the queue yet' do context 'when there was no job in the queue yet' do
it { expect(duplicate_job.check!).to eq('123') } it { expect(duplicate_job.check!).to eq('123') }
it "adds a idempotency key with ttl set to #{described_class::DUPLICATE_KEY_TTL}" do shared_examples 'sets Redis keys with correct TTL' do
it "adds an idempotency key with correct ttl" do
expect { duplicate_job.check! } expect { duplicate_job.check! }
.to change { read_idempotency_key_with_ttl(idempotency_key) } .to change { read_idempotency_key_with_ttl(idempotency_key) }
.from([nil, -2]) .from([nil, -2])
.to(['123', be_within(1).of(described_class::DUPLICATE_KEY_TTL)]) .to(['123', be_within(1).of(expected_ttl)])
end end
context 'when wal locations is not empty' do context 'when wal locations is not empty' do
it "adds a existing wal locations key with ttl set to #{described_class::DUPLICATE_KEY_TTL}" do it "adds an existing wal locations key with correct ttl" do
expect { duplicate_job.check! } expect { duplicate_job.check! }
.to change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :main)) } .to change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :main)) }
.from([nil, -2]) .from([nil, -2])
.to([wal_locations[:main], be_within(1).of(described_class::DUPLICATE_KEY_TTL)]) .to([wal_locations[:main], be_within(1).of(expected_ttl)])
.and change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :ci)) } .and change { read_idempotency_key_with_ttl(existing_wal_location_key(idempotency_key, :ci)) }
.from([nil, -2]) .from([nil, -2])
.to([wal_locations[:ci], be_within(1).of(described_class::DUPLICATE_KEY_TTL)]) .to([wal_locations[:ci], be_within(1).of(expected_ttl)])
end end
end end
end
context 'with TTL option is not set' do
let(:expected_ttl) { described_class::DEFAULT_DUPLICATE_KEY_TTL }
it_behaves_like 'sets Redis keys with correct TTL'
end
context 'when TTL option is set' do
let(:expected_ttl) { 5.minutes }
before do
allow(duplicate_job).to receive(:options).and_return({ ttl: expected_ttl })
end
it_behaves_like 'sets Redis keys with correct TTL'
end
context 'when preserve_latest_wal_locations_for_idempotent_jobs feature flag is disabled' do context 'when preserve_latest_wal_locations_for_idempotent_jobs feature flag is disabled' do
before do before do
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name| RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name|
let(:fake_duplicate_job) do let(:fake_duplicate_job) do
instance_double(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob) instance_double(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob, duplicate_key_ttl: Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob::DEFAULT_DUPLICATE_KEY_TTL)
end end
let(:expected_message) { "dropped #{strategy_name.to_s.humanize.downcase}" } let(:expected_message) { "dropped #{strategy_name.to_s.humanize.downcase}" }
...@@ -18,7 +18,7 @@ RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name| ...@@ -18,7 +18,7 @@ RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name|
expect(fake_duplicate_job).to receive(:scheduled?).twice.ordered.and_return(false) expect(fake_duplicate_job).to receive(:scheduled?).twice.ordered.and_return(false)
expect(fake_duplicate_job).to( expect(fake_duplicate_job).to(
receive(:check!) receive(:check!)
.with(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob::DUPLICATE_KEY_TTL) .with(fake_duplicate_job.duplicate_key_ttl)
.ordered .ordered
.and_return('a jid')) .and_return('a jid'))
expect(fake_duplicate_job).to receive(:duplicate?).ordered.and_return(false) expect(fake_duplicate_job).to receive(:duplicate?).ordered.and_return(false)
...@@ -62,7 +62,7 @@ RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name| ...@@ -62,7 +62,7 @@ RSpec.shared_examples 'deduplicating jobs when scheduling' do |strategy_name|
allow(fake_duplicate_job).to receive(:options).and_return({ including_scheduled: true }) allow(fake_duplicate_job).to receive(:options).and_return({ including_scheduled: true })
allow(fake_duplicate_job).to( allow(fake_duplicate_job).to(
receive(:check!) receive(:check!)
.with(Gitlab::SidekiqMiddleware::DuplicateJobs::DuplicateJob::DUPLICATE_KEY_TTL) .with(fake_duplicate_job.duplicate_key_ttl)
.and_return('the jid')) .and_return('the jid'))
allow(fake_duplicate_job).to receive(:idempotent?).and_return(true) allow(fake_duplicate_job).to receive(:idempotent?).and_return(true)
allow(fake_duplicate_job).to receive(:update_latest_wal_location!) allow(fake_duplicate_job).to receive(:update_latest_wal_location!)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment