Commit 6b05e2b8 authored by Steve Abrams's avatar Steve Abrams

Merge branch '347351-add-dedupplicated-size-for-nested-repositories' into 'master'

Support container repositories deduplicated size

See merge request gitlab-org/gitlab!83980
parents 0e2af1ad faa6448d
......@@ -17,6 +17,7 @@ class ContainerRepository < ApplicationRecord
SKIPPABLE_MIGRATION_STATES = (ABORTABLE_MIGRATION_STATES + %w[import_aborted]).freeze
MIGRATION_PHASE_1_STARTED_AT = Date.new(2021, 11, 4).freeze
MIGRATION_PHASE_1_ENDED_AT = Date.new(2022, 01, 23).freeze
TooManyImportsError = Class.new(StandardError)
......@@ -217,6 +218,13 @@ class ContainerRepository < ApplicationRecord
).exists?
end
def self.all_migrated?
# check that the set of non migrated repositories is empty
where(created_at: ...MIGRATION_PHASE_1_ENDED_AT)
.where.not(migration_state: 'import_done')
.empty?
end
def self.with_enabled_policy
joins('INNER JOIN container_expiration_policies ON container_repositories.project_id = container_expiration_policies.project_id')
.where(container_expiration_policies: { enabled: true })
......@@ -456,7 +464,7 @@ class ContainerRepository < ApplicationRecord
next if self.created_at.before?(MIGRATION_PHASE_1_STARTED_AT)
next unless gitlab_api_client.supports_gitlab_api?
gitlab_api_client.repository_details(self.path, with_size: true)['size_bytes']
gitlab_api_client.repository_details(self.path, sizing: :self)['size_bytes']
end
end
......
......@@ -1063,6 +1063,17 @@ class Project < ApplicationRecord
end
end
def container_repositories_size
strong_memoize(:container_repositories_size) do
next unless Gitlab.com?
next 0 if container_repositories.empty?
next unless container_repositories.all_migrated?
next unless ContainerRegistry::GitlabApiClient.supports_gitlab_api?
ContainerRegistry::GitlabApiClient.deduplicated_size(full_path)
end
end
def has_container_registry_tags?
return @images if defined?(@images)
......
# frozen_string_literal: true
class AddNonMigratedIndexToContainerRepositories < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
# follow up issue: https://gitlab.com/gitlab-org/gitlab/-/issues/358407
INDEX_NAME = 'tmp_idx_container_repos_on_non_migrated'
MIGRATION_PHASE_1_ENDED_AT = '2022-01-23'
def up
add_concurrent_index :container_repositories,
[:project_id, :id],
name: INDEX_NAME,
where: "migration_state != 'import_done' AND created_at < '#{MIGRATION_PHASE_1_ENDED_AT}'"
end
def down
remove_concurrent_index_by_name :container_repositories, INDEX_NAME
end
end
c4dcb2b2e1262d63c56e171796f1cb6fb76d4b7dc090cf585f17a451c2fa784f
\ No newline at end of file
......@@ -29646,6 +29646,8 @@ CREATE INDEX tmp_gitlab_subscriptions_max_seats_used_migration ON gitlab_subscri
CREATE INDEX tmp_gitlab_subscriptions_max_seats_used_migration_2 ON gitlab_subscriptions USING btree (id) WHERE ((start_date < '2021-08-02'::date) AND (max_seats_used <> 0) AND (max_seats_used > seats_in_use) AND (max_seats_used > seats));
CREATE INDEX tmp_idx_container_repos_on_non_migrated ON container_repositories USING btree (project_id, id) WHERE ((migration_state <> 'import_done'::text) AND (created_at < '2022-01-23 00:00:00'::timestamp without time zone));
CREATE INDEX tmp_index_ci_job_artifacts_on_id_where_trace_and_expire_at ON ci_job_artifacts USING btree (id) WHERE ((file_type = 3) AND (expire_at = ANY (ARRAY['2021-04-22 00:00:00+00'::timestamp with time zone, '2021-05-22 00:00:00+00'::timestamp with time zone, '2021-06-22 00:00:00+00'::timestamp with time zone, '2022-01-22 00:00:00+00'::timestamp with time zone, '2022-02-22 00:00:00+00'::timestamp with time zone, '2022-03-22 00:00:00+00'::timestamp with time zone, '2022-04-22 00:00:00+00'::timestamp with time zone])));
CREATE INDEX tmp_index_container_repositories_on_id_migration_state ON container_repositories USING btree (id, migration_state);
......@@ -37,14 +37,24 @@ module ContainerRegistry
class << self
private
def with_dummy_client(return_value_if_disabled: nil)
def with_dummy_client(return_value_if_disabled: nil, token_config: { type: :full_access_token, path: nil })
registry_config = Gitlab.config.registry
unless registry_config.enabled && registry_config.api_url.present?
return return_value_if_disabled
end
token = Auth::ContainerRegistryAuthenticationService.access_token([], [])
yield new(registry_config.api_url, token: token)
yield new(registry_config.api_url, token: token_from(token_config))
end
def token_from(config)
case config[:type]
when :full_access_token
Auth::ContainerRegistryAuthenticationService.access_token([], [])
when :nested_repositories_token
return unless config[:path]
Auth::ContainerRegistryAuthenticationService.pull_nested_repositories_access_token(config[:path])
end
end
end
......
......@@ -27,6 +27,12 @@ module ContainerRegistry
end
end
def self.deduplicated_size(path)
with_dummy_client(token_config: { type: :nested_repositories_token, path: path }) do |client|
client.repository_details(path, sizing: :self_with_descendants)['size_bytes']
end
end
# https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs-gitlab/api.md#compliance-check
def supports_gitlab_api?
strong_memoize(:supports_gitlab_api) do
......@@ -78,10 +84,10 @@ module ContainerRegistry
end
end
def repository_details(path, with_size: false)
def repository_details(path, sizing: nil)
with_token_faraday do |faraday_client|
req = faraday_client.get("/gitlab/v1/repositories/#{path}/") do |req|
req.params['size'] = 'self' if with_size
req.params['size'] = sizing if sizing
end
break {} unless req.success?
......
......@@ -174,31 +174,26 @@ RSpec.describe ContainerRegistry::GitlabApiClient do
describe '#repository_details' do
let(:path) { 'namespace/path/to/repository' }
let(:response) { { foo: :bar, this: :is_a_test } }
let(:with_size) { true }
subject { client.repository_details(path, with_size: with_size) }
subject { client.repository_details(path, sizing: sizing) }
context 'with size' do
before do
stub_repository_details(path, with_size: with_size, respond_with: response)
end
it { is_expected.to eq(response.stringify_keys.deep_transform_values(&:to_s)) }
end
context 'without_size' do
let(:with_size) { false }
[:self, :self_with_descendants, nil].each do |size_type|
context "with sizing #{size_type}" do
let(:sizing) { size_type }
before do
stub_repository_details(path, with_size: with_size, respond_with: response)
stub_repository_details(path, sizing: sizing, respond_with: response)
end
it { is_expected.to eq(response.stringify_keys.deep_transform_values(&:to_s)) }
end
end
context 'with non successful response' do
let(:sizing) { nil }
before do
stub_repository_details(path, with_size: with_size, status_code: 404)
stub_repository_details(path, sizing: sizing, status_code: 404)
end
it { is_expected.to eq({}) }
......@@ -263,6 +258,54 @@ RSpec.describe ContainerRegistry::GitlabApiClient do
end
end
describe '.deduplicated_size' do
let(:path) { 'foo/bar' }
let(:response) { { 'size_bytes': 555 } }
let(:registry_enabled) { true }
subject { described_class.deduplicated_size(path) }
before do
stub_container_registry_config(enabled: registry_enabled, api_url: registry_api_url, key: 'spec/fixtures/x509_certificate_pk.key')
end
context 'with successful response' do
before do
expect(Auth::ContainerRegistryAuthenticationService).to receive(:pull_nested_repositories_access_token).with(path).and_return(token)
stub_repository_details(path, sizing: :self_with_descendants, status_code: 200, respond_with: response)
end
it { is_expected.to eq(555) }
end
context 'with unsuccessful response' do
before do
expect(Auth::ContainerRegistryAuthenticationService).to receive(:pull_nested_repositories_access_token).with(path).and_return(token)
stub_repository_details(path, sizing: :self_with_descendants, status_code: 404, respond_with: response)
end
it { is_expected.to eq(nil) }
end
context 'with the registry disabled' do
let(:registry_enabled) { false }
it { is_expected.to eq(nil) }
end
context 'with a nil path' do
let(:path) { nil }
let(:token) { nil }
before do
expect(Auth::ContainerRegistryAuthenticationService).not_to receive(:pull_nested_repositories_access_token)
stub_repository_details(path, sizing: :self_with_descendants, status_code: 401, respond_with: response)
end
it { is_expected.to eq(nil) }
end
end
def stub_pre_import(path, status_code, pre:)
import_type = pre ? 'pre' : 'final'
stub_request(:put, "#{registry_api_url}/gitlab/v1/import/#{path}/?import_type=#{import_type}")
......@@ -303,11 +346,15 @@ RSpec.describe ContainerRegistry::GitlabApiClient do
)
end
def stub_repository_details(path, with_size: true, status_code: 200, respond_with: {})
def stub_repository_details(path, sizing: nil, status_code: 200, respond_with: {})
url = "#{registry_api_url}/gitlab/v1/repositories/#{path}/"
url += "?size=self" if with_size
url += "?size=#{sizing}" if sizing
headers = { 'Accept' => described_class::JSON_TYPE }
headers['Authorization'] = "bearer #{token}" if token
stub_request(:get, url)
.with(headers: { 'Accept' => described_class::JSON_TYPE, 'Authorization' => "bearer #{token}" })
.with(headers: headers)
.to_return(status: status_code, body: respond_with.to_json, headers: { 'Content-Type' => described_class::JSON_TYPE })
end
end
......@@ -652,7 +652,7 @@ RSpec.describe ContainerRepository, :aggregate_failures do
context 'supports gitlab api on .com with a recent repository' do
before do
expect(repository.gitlab_api_client).to receive(:supports_gitlab_api?).and_return(true)
expect(repository.gitlab_api_client).to receive(:repository_details).with(repository.path, with_size: true).and_return(response)
expect(repository.gitlab_api_client).to receive(:repository_details).with(repository.path, sizing: :self).and_return(response)
end
context 'with a size_bytes field' do
......@@ -1076,6 +1076,43 @@ RSpec.describe ContainerRepository, :aggregate_failures do
end
end
describe '.all_migrated?' do
let_it_be(:project) { create(:project) }
subject { project.container_repositories.all_migrated? }
context 'with no repositories' do
it { is_expected.to be_truthy }
end
context 'with only recent repositories' do
let_it_be(:container_repository1) { create(:container_repository, project: project) }
let_it_be_with_reload(:container_repository2) { create(:container_repository, project: project) }
it { is_expected.to be_truthy }
context 'with one old non migrated repository' do
before do
container_repository2.update!(created_at: described_class::MIGRATION_PHASE_1_ENDED_AT - 3.months)
end
it { is_expected.to be_falsey }
end
context 'with one old migrated repository' do
before do
container_repository2.update!(
created_at: described_class::MIGRATION_PHASE_1_ENDED_AT - 3.months,
migration_state: 'import_done',
migration_import_done_at: Time.zone.now
)
end
it { is_expected.to be_truthy }
end
end
end
describe '.with_enabled_policy' do
let_it_be(:repository) { create(:container_repository) }
let_it_be(:repository2) { create(:container_repository) }
......
......@@ -2715,6 +2715,39 @@ RSpec.describe Project, factory_default: :keep do
end
end
describe '#container_repositories_size' do
let(:project) { build(:project) }
subject { project.container_repositories_size }
context 'on gitlab.com' do
where(:no_container_repositories, :all_migrated, :gitlab_api_supported, :returned_size, :expected_result) do
true | nil | nil | nil | 0
false | false | nil | nil | nil
false | true | false | nil | nil
false | true | true | 555 | 555
false | true | true | nil | nil
end
with_them do
before do
stub_container_registry_config(enabled: true, api_url: 'http://container-registry', key: 'spec/fixtures/x509_certificate_pk.key')
allow(Gitlab).to receive(:com?).and_return(true)
allow(project.container_repositories).to receive(:empty?).and_return(no_container_repositories)
allow(project.container_repositories).to receive(:all_migrated?).and_return(all_migrated)
allow(ContainerRegistry::GitlabApiClient).to receive(:supports_gitlab_api?).and_return(gitlab_api_supported)
allow(ContainerRegistry::GitlabApiClient).to receive(:deduplicated_size).with(project.full_path).and_return(returned_size)
end
it { is_expected.to eq(expected_result) }
end
end
context 'not on gitlab.com' do
it { is_expected.to eq(nil) }
end
end
describe '#container_registry_enabled=' do
let_it_be_with_reload(:project) { create(:project) }
......
......@@ -8,8 +8,8 @@ RSpec.shared_context 'container registry client stubs' do
end
end
def stub_container_registry_gitlab_api_repository_details(client, path:, size_bytes:)
allow(client).to receive(:repository_details).with(path, with_size: true).and_return('size_bytes' => size_bytes)
def stub_container_registry_gitlab_api_repository_details(client, path:, size_bytes:, sizing: :self)
allow(client).to receive(:repository_details).with(path, sizing: sizing).and_return('size_bytes' => size_bytes)
end
def stub_container_registry_gitlab_api_network_error(client_method: :supports_gitlab_api?)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment