Commit 29a22543 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch '339129-cleanup-policies-caching' into 'master'

Cleanup tags service: cache created_at values for tags that are kept

See merge request gitlab-org/gitlab!69459
parents 01196a0e a1450bd7
......@@ -4,7 +4,7 @@ module ContainerExpirationPolicies
class CleanupService
attr_reader :repository
SERVICE_RESULT_FIELDS = %i[original_size before_truncate_size after_truncate_size before_delete_size deleted_size].freeze
SERVICE_RESULT_FIELDS = %i[original_size before_truncate_size after_truncate_size before_delete_size deleted_size cached_tags_count].freeze
def initialize(repository)
@repository = repository
......
# frozen_string_literal: true
module Projects
module ContainerRepository
class CacheTagsCreatedAtService
def initialize(container_repository)
@container_repository = container_repository
@cached_tag_names = Set.new
end
def populate(tags)
return if tags.empty?
# This will load all tags in one Redis roundtrip
# the maximum number of tags is configurable and is set to 200 by default.
# https://gitlab.com/gitlab-org/gitlab/blob/master/doc/user/packages/container_registry/index.md#set-cleanup-limits-to-conserve-resources
keys = tags.map(&method(:cache_key))
cached_tags_count = 0
::Gitlab::Redis::Cache.with do |redis|
tags.zip(redis.mget(keys)).each do |tag, created_at|
next unless created_at
tag.created_at = DateTime.rfc3339(created_at)
@cached_tag_names << tag.name
cached_tags_count += 1
end
end
cached_tags_count
end
def insert(tags, max_ttl_in_seconds)
return unless max_ttl_in_seconds
return if tags.empty?
# tags with nil created_at are not cacheable
# tags already cached don't need to be cached again
cacheable_tags = tags.select do |tag|
tag.created_at.present? && !tag.name.in?(@cached_tag_names)
end
return if cacheable_tags.empty?
now = Time.zone.now
::Gitlab::Redis::Cache.with do |redis|
# we use a pipeline instead of a MSET because each tag has
# a specific ttl
redis.pipelined do
cacheable_tags.each do |tag|
created_at = tag.created_at
# ttl is the max_ttl_in_seconds reduced by the number
# of seconds that the tag has already existed
ttl = max_ttl_in_seconds - (now - created_at).seconds
ttl = ttl.to_i
redis.set(cache_key(tag), created_at.rfc3339, ex: ttl) if ttl > 0
end
end
end
end
private
def cache_key(tag)
"container_repository:{#{@container_repository.id}}:tag:#{tag.name}:created_at"
end
end
end
end
......@@ -3,6 +3,8 @@
module Projects
module ContainerRepository
class CleanupTagsService < BaseService
include ::Gitlab::Utils::StrongMemoize
def execute(container_repository)
return error('access denied') unless can_destroy?
return error('invalid regex') unless valid_regex?
......@@ -17,13 +19,16 @@ module Projects
tags = truncate(tags)
after_truncate_size = tags.size
tags = filter_keep_n(tags)
tags = filter_by_older_than(tags)
cached_tags_count = populate_tags_from_cache(container_repository, tags) || 0
tags = filter_keep_n(container_repository, tags)
tags = filter_by_older_than(container_repository, tags)
delete_tags(container_repository, tags).tap do |result|
result[:original_size] = original_size
result[:before_truncate_size] = before_truncate_size
result[:after_truncate_size] = after_truncate_size
result[:cached_tags_count] = cached_tags_count
result[:before_delete_size] = tags.size
result[:deleted_size] = result[:deleted]&.size
......@@ -67,21 +72,26 @@ module Projects
end
end
def filter_keep_n(tags)
def filter_keep_n(container_repository, tags)
return tags unless params['keep_n']
tags = order_by_date(tags)
cache_tags(container_repository, tags.first(keep_n))
tags.drop(keep_n)
end
def filter_by_older_than(tags)
return tags unless params['older_than']
def filter_by_older_than(container_repository, tags)
return tags unless older_than
older_than = ChronicDuration.parse(params['older_than']).seconds.ago
older_than_timestamp = older_than_in_seconds.ago
tags.select do |tag|
tag.created_at && tag.created_at < older_than
tags, tags_to_keep = tags.partition do |tag|
tag.created_at && tag.created_at < older_than_timestamp
end
cache_tags(container_repository, tags_to_keep)
tags
end
def can_destroy?
......@@ -114,6 +124,28 @@ module Projects
tags.sample(truncated_size)
end
def populate_tags_from_cache(container_repository, tags)
cache(container_repository).populate(tags) if caching_enabled?(container_repository)
end
def cache_tags(container_repository, tags)
cache(container_repository).insert(tags, older_than_in_seconds) if caching_enabled?(container_repository)
end
def cache(container_repository)
# TODO Implement https://gitlab.com/gitlab-org/gitlab/-/issues/340277 to avoid passing
# the container repository parameter which is bad for a memoized function
strong_memoize(:cache) do
::Projects::ContainerRepository::CacheTagsCreatedAtService.new(container_repository)
end
end
def caching_enabled?(container_repository)
params['container_expiration_policy'] &&
older_than.present? &&
Feature.enabled?(:container_registry_expiration_policies_caching, container_repository.project)
end
def throttling_enabled?
Feature.enabled?(:container_registry_expiration_policies_throttling)
end
......@@ -125,6 +157,16 @@ module Projects
def keep_n
params['keep_n'].to_i
end
def older_than_in_seconds
strong_memoize(:older_than_in_seconds) do
ChronicDuration.parse(older_than).seconds
end
end
def older_than
params['older_than']
end
end
end
end
......@@ -21,6 +21,7 @@ module ContainerExpirationPolicies
cleanup_tags_service_original_size
cleanup_tags_service_before_truncate_size
cleanup_tags_service_after_truncate_size
cleanup_tags_service_cached_tags_count
cleanup_tags_service_before_delete_size
cleanup_tags_service_deleted_size
].freeze
......@@ -147,13 +148,27 @@ module ContainerExpirationPolicies
log_extra_metadata_on_done(field, value)
end
log_truncate(result)
log_cache_ratio(result)
log_extra_metadata_on_done(:running_jobs_count, running_jobs_count)
end
def log_cache_ratio(result)
tags_count = result.payload[:cleanup_tags_service_after_truncate_size]
cached_tags_count = result.payload[:cleanup_tags_service_cached_tags_count]
return unless tags_count && cached_tags_count && tags_count != 0
log_extra_metadata_on_done(:cleanup_tags_service_cache_hit_ratio, cached_tags_count / tags_count.to_f)
end
def log_truncate(result)
before_truncate_size = result.payload[:cleanup_tags_service_before_truncate_size]
after_truncate_size = result.payload[:cleanup_tags_service_after_truncate_size]
truncated = before_truncate_size &&
after_truncate_size &&
before_truncate_size != after_truncate_size
log_extra_metadata_on_done(:cleanup_tags_service_truncated, !!truncated)
log_extra_metadata_on_done(:running_jobs_count, running_jobs_count)
end
def policy
......
---
name: container_registry_expiration_policies_caching
introduced_by_url:
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/340606
milestone: '14.3'
type: development
group: group::package
default_enabled: false
......@@ -5,6 +5,7 @@ module ContainerRegistry
include Gitlab::Utils::StrongMemoize
attr_reader :repository, :name
attr_writer :created_at
delegate :registry, :client, to: :repository
delegate :revision, :short_revision, to: :config_blob, allow_nil: true
......@@ -73,9 +74,10 @@ module ContainerRegistry
end
def created_at
return @created_at if @created_at
return unless config
strong_memoize(:created_at) do
strong_memoize(:memoized_created_at) do
DateTime.rfc3339(config['created'])
rescue ArgumentError
nil
......
......@@ -60,6 +60,20 @@ RSpec.describe ContainerRegistry::Tag do
end
context 'manifest processing' do
shared_examples 'using the value manually set on created_at' do
let(:value) { 5.seconds.ago }
before do
tag.created_at = value
end
it 'does not use the config' do
expect(tag).not_to receive(:config)
expect(subject).to eq(value)
end
end
context 'schema v1' do
before do
stub_request(:get, 'http://registry.gitlab/v2/group/test/manifests/tag')
......@@ -93,6 +107,8 @@ RSpec.describe ContainerRegistry::Tag do
subject { tag.created_at }
it { is_expected.to be_nil }
it_behaves_like 'using the value manually set on created_at'
end
end
end
......@@ -117,6 +133,8 @@ RSpec.describe ContainerRegistry::Tag do
subject { tag.created_at }
it { is_expected.to be_nil }
it_behaves_like 'using the value manually set on created_at'
end
end
......@@ -154,6 +172,8 @@ RSpec.describe ContainerRegistry::Tag do
subject { tag.created_at }
it { is_expected.not_to be_nil }
it_behaves_like 'using the value manually set on created_at'
end
end
......
......@@ -69,6 +69,7 @@ RSpec.describe ContainerExpirationPolicies::CleanupService do
before_truncate_size: 800,
after_truncate_size: 200,
before_delete_size: 100,
cached_tags_count: 0,
deleted_size: 100
}
end
......@@ -86,6 +87,7 @@ RSpec.describe ContainerExpirationPolicies::CleanupService do
cleanup_tags_service_before_truncate_size: 800,
cleanup_tags_service_after_truncate_size: 200,
cleanup_tags_service_before_delete_size: 100,
cleanup_tags_service_cached_tags_count: 0,
cleanup_tags_service_deleted_size: 100
)
expect(ContainerRepository.waiting_for_cleanup.count).to eq(1)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Projects::ContainerRepository::CacheTagsCreatedAtService, :clean_gitlab_redis_cache do
let_it_be(:dummy_tag_class) { Struct.new(:name, :created_at) }
let_it_be(:repository) { create(:container_repository) }
let(:tags) { create_tags(5) }
let(:service) { described_class.new(repository) }
shared_examples 'not interacting with redis' do
it 'does not interact with redis' do
expect(::Gitlab::Redis::Cache).not_to receive(:with)
subject
end
end
describe '#populate' do
subject { service.populate(tags) }
context 'with tags' do
it 'gets values from redis' do
expect(::Gitlab::Redis::Cache).to receive(:with).and_call_original
expect(subject).to eq(0)
tags.each { |t| expect(t.created_at).to eq(nil) }
end
context 'with cached values' do
let(:cached_tags) { tags.first(2) }
before do
::Gitlab::Redis::Cache.with do |redis|
cached_tags.each do |tag|
redis.set(cache_key(tag), rfc3339(10.days.ago))
end
end
end
it 'gets values from redis' do
expect(::Gitlab::Redis::Cache).to receive(:with).and_call_original
expect(subject).to eq(2)
cached_tags.each { |t| expect(t.created_at).not_to eq(nil) }
(tags - cached_tags).each { |t| expect(t.created_at).to eq(nil) }
end
end
end
context 'with no tags' do
let(:tags) { [] }
it_behaves_like 'not interacting with redis'
end
end
describe '#insert' do
let(:max_ttl) { 90.days }
subject { service.insert(tags, max_ttl) }
context 'with tags' do
let(:tag) { tags.first }
let(:ttl) { 90.days - 3.days }
before do
travel_to(Time.zone.local(2021, 9, 2, 12, 0, 0))
tag.created_at = DateTime.rfc3339(3.days.ago.rfc3339)
end
after do
travel_back
end
it 'inserts values in redis' do
::Gitlab::Redis::Cache.with do |redis|
expect(redis)
.to receive(:set)
.with(cache_key(tag), rfc3339(tag.created_at), ex: ttl.to_i)
.and_call_original
end
subject
end
context 'with some of them already cached' do
let(:tag) { tags.first }
before do
::Gitlab::Redis::Cache.with do |redis|
redis.set(cache_key(tag), rfc3339(10.days.ago))
end
service.populate(tags)
end
it_behaves_like 'not interacting with redis'
end
end
context 'with no tags' do
let(:tags) { [] }
it_behaves_like 'not interacting with redis'
end
context 'with no expires_in' do
let(:max_ttl) { nil }
it_behaves_like 'not interacting with redis'
end
end
def create_tags(size)
Array.new(size) do |i|
dummy_tag_class.new("Tag #{i}", nil)
end
end
def cache_key(tag)
"container_repository:{#{repository.id}}:tag:#{tag.name}:created_at"
end
def rfc3339(date_time)
# DateTime rfc3339 is different ActiveSupport::TimeWithZone rfc3339
# The caching will use DateTime rfc3339
DateTime.rfc3339(date_time.rfc3339).rfc3339
end
end
......@@ -74,6 +74,30 @@ RSpec.describe ContainerExpirationPolicies::CleanupContainerRepositoryWorker do
end
end
end
context 'the cache hit ratio field' do
where(:after_truncate_size, :cached_tags_count, :ratio) do
nil | nil | nil
10 | nil | nil
nil | 10 | nil
0 | 5 | nil
10 | 0 | 0
10 | 5 | 0.5
3 | 10 | (10 / 3.to_f)
end
with_them do
it 'is logged properly' do
service_response = cleanup_service_response(status: :unfinished, repository: repository, cleanup_tags_service_before_truncate_size: after_truncate_size, cleanup_tags_service_after_truncate_size: after_truncate_size, cleanup_tags_service_cached_tags_count: cached_tags_count)
expect(ContainerExpirationPolicies::CleanupService)
.to receive(:new).with(repository).and_return(double(execute: service_response))
expect_log_extra_metadata(service_response: service_response, cleanup_status: :unfinished, truncated: false, cache_hit_ratio: ratio)
expect_log_info(project_id: project.id, container_repository_id: repository.id)
subject
end
end
end
end
context 'with an erroneous cleanup' do
......@@ -372,7 +396,16 @@ RSpec.describe ContainerExpirationPolicies::CleanupContainerRepositoryWorker do
end
end
def cleanup_service_response(status: :finished, repository:, cleanup_tags_service_original_size: 100, cleanup_tags_service_before_truncate_size: 80, cleanup_tags_service_after_truncate_size: 80, cleanup_tags_service_before_delete_size: 50, cleanup_tags_service_deleted_size: 50)
def cleanup_service_response(
status: :finished,
repository:,
cleanup_tags_service_original_size: 100,
cleanup_tags_service_before_truncate_size: 80,
cleanup_tags_service_after_truncate_size: 80,
cleanup_tags_service_before_delete_size: 50,
cleanup_tags_service_deleted_size: 50,
cleanup_tags_service_cached_tags_count: 0
)
ServiceResponse.success(
message: "cleanup #{status}",
payload: {
......@@ -381,21 +414,35 @@ RSpec.describe ContainerExpirationPolicies::CleanupContainerRepositoryWorker do
cleanup_tags_service_original_size: cleanup_tags_service_original_size,
cleanup_tags_service_before_truncate_size: cleanup_tags_service_before_truncate_size,
cleanup_tags_service_after_truncate_size: cleanup_tags_service_after_truncate_size,
cleanup_tags_service_before_delete_size: cleanup_tags_service_before_delete_size
cleanup_tags_service_before_delete_size: cleanup_tags_service_before_delete_size,
cleanup_tags_service_cached_tags_count: cleanup_tags_service_cached_tags_count
}.compact
)
end
def expect_log_extra_metadata(service_response:, cleanup_status: :finished, truncated: false)
def expect_log_extra_metadata(service_response:, cleanup_status: :finished, truncated: false, cache_hit_ratio: 0)
expect(worker).to receive(:log_extra_metadata_on_done).with(:container_repository_id, repository.id)
expect(worker).to receive(:log_extra_metadata_on_done).with(:project_id, repository.project.id)
expect(worker).to receive(:log_extra_metadata_on_done).with(:cleanup_status, cleanup_status)
%i[cleanup_tags_service_original_size cleanup_tags_service_before_truncate_size cleanup_tags_service_after_truncate_size cleanup_tags_service_before_delete_size cleanup_tags_service_deleted_size].each do |field|
%i[
cleanup_tags_service_original_size
cleanup_tags_service_before_truncate_size
cleanup_tags_service_after_truncate_size
cleanup_tags_service_before_delete_size
cleanup_tags_service_deleted_size
cleanup_tags_service_cached_tags_count
].each do |field|
value = service_response.payload[field]
expect(worker).to receive(:log_extra_metadata_on_done).with(field, value) unless value.nil?
end
expect(worker).to receive(:log_extra_metadata_on_done).with(:cleanup_tags_service_truncated, truncated)
after_truncate_size = service_response.payload[:cleanup_tags_service_after_truncate_size]
if cache_hit_ratio && after_truncate_size && after_truncate_size != 0
expect(worker).to receive(:log_extra_metadata_on_done).with(:cleanup_tags_service_cache_hit_ratio, cache_hit_ratio)
end
expect(worker).to receive(:log_extra_metadata_on_done).with(:running_jobs_count, 0)
if service_response.error?
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment