Commit 00167a2b authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch '41059-calculate-artifact-size-more-efficiently' into 'master'

Resolve "Calculating total size of a project's CI artifacts"

Closes #41059

See merge request gitlab-org/gitlab-ce!17839
parents 29628865 03b020f2
......@@ -20,7 +20,7 @@ module Ci
has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
......@@ -95,8 +95,8 @@ module Ci
run_after_commit { BuildHooksWorker.perform_async(build.id) }
end
after_commit :update_project_statistics_after_save, on: [:create, :update]
after_commit :update_project_statistics, on: :destroy
after_save :update_project_statistics_after_save, if: :artifacts_size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
class << self
# This is needed for url_for to work,
......@@ -664,16 +664,20 @@ module Ci
pipeline.config_processor.build_attributes(name)
end
def update_project_statistics
return unless project
def update_project_statistics_after_save
update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i)
end
ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size])
def update_project_statistics_after_destroy
update_project_statistics(-artifacts_size)
end
def update_project_statistics_after_save
if previous_changes.include?('artifacts_size')
update_project_statistics
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
project.pending_delete?
end
end
end
......@@ -9,6 +9,8 @@ module Ci
before_save :update_file_store
before_save :set_size, if: :file_changed?
after_save :update_project_statistics_after_save, if: :size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
......@@ -34,10 +36,6 @@ module Ci
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end
def set_size
self.size = file.size
end
def expire_in
expire_at - Time.now if expire_at
end
......@@ -48,5 +46,28 @@ module Ci
ChronicDuration.parse(value)&.seconds&.from_now
end
end
private
def set_size
self.size = file.size
end
def update_project_statistics_after_save
update_project_statistics(size.to_i - size_was.to_i)
end
def update_project_statistics_after_destroy
update_project_statistics(-self.size)
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
# Use job.project to avoid extra DB query for project
job.project.pending_delete?
end
end
end
......@@ -4,15 +4,15 @@ class ProjectStatistics < ActiveRecord::Base
before_save :update_storage_size
STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze
STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS
COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze
INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze
def total_repository_size
repository_size + lfs_objects_size
end
def refresh!(only: nil)
STATISTICS_COLUMNS.each do |column, generator|
COLUMNS_TO_REFRESH.each do |column, generator|
if only.blank? || only.include?(column)
public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
end
......@@ -34,13 +34,15 @@ class ProjectStatistics < ActiveRecord::Base
self.lfs_objects_size = project.lfs_objects.sum(:size)
end
def update_build_artifacts_size
self.build_artifacts_size =
project.builds.sum(:artifacts_size) +
Ci::JobArtifact.artifacts_size_for(self.project)
def update_storage_size
self.storage_size = repository_size + lfs_objects_size + build_artifacts_size
end
def update_storage_size
self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute))
def self.increment_statistic(project_id, key, amount)
raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS)
return if amount == 0
where(project_id: project_id)
.update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount])
end
end
......@@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts
def size
return super if model.size.nil?
def cached_size
return model.size if model.size.present? && !model.file_changed?
model.size
size
end
def store_dir
......@@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader
if file_storage?
File.open(path, "rb") if path
else
::Gitlab::Ci::Trace::HttpIO.new(url, size) if url
::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
end
end
......
---
title: Improve DB performance of calculating total artifacts size
merge_request: 17839
author:
type: performance
......@@ -1384,29 +1384,51 @@ describe Ci::Build do
end
end
describe '#update_project_statistics' do
let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics when the artifact size changes' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
context 'when updating the build' do
let(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics' do
build.artifacts_size = 42
build.save!
expect(build).to receive(:update_project_statistics_after_save).and_call_original
expect { build.save! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(19)
end
it 'does not update project statistics when the artifact size stays the same' do
expect(ProjectCacheWorker).not_to receive(:perform_async)
context 'when the artifact size stays the same' do
it 'does not update project statistics' do
build.name = 'changed'
build.name = 'changed'
build.save!
expect(build).not_to receive(:update_project_statistics_after_save)
build.save!
end
end
end
it 'updates project statistics when the build is destroyed' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
context 'when destroying the build' do
let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics' do
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { build.destroy! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(-23)
end
context 'when the build is destroyed due to the project being destroyed' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
build.destroy
build.project.update_attributes(pending_delete: true)
build.project.destroy!
end
end
end
......
require 'spec_helper'
describe Ci::JobArtifact do
set(:artifact) { create(:ci_job_artifact, :archive) }
let(:artifact) { create(:ci_job_artifact, :archive) }
describe "Associations" do
it { is_expected.to belong_to(:project) }
......@@ -59,10 +59,32 @@ describe Ci::JobArtifact do
end
end
describe '#set_size' do
it 'sets the size' do
context 'creating the artifact' do
let(:project) { create(:project) }
let(:artifact) { create(:ci_job_artifact, :archive, project: project) }
it 'sets the size from the file size' do
expect(artifact.size).to eq(106365)
end
it 'updates the project statistics' do
expect { artifact }
.to change { project.statistics.reload.build_artifacts_size }
.by(106365)
end
end
context 'updating the artifact file' do
it 'updates the artifact size' do
artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png')))
expect(artifact.size).to eq(1062)
end
it 'updates the project statistics' do
expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) }
.to change { artifact.project.statistics.reload.build_artifacts_size }
.by(1062 - 106365)
end
end
describe '#file' do
......@@ -118,4 +140,32 @@ describe Ci::JobArtifact do
is_expected.to be_nil
end
end
context 'when destroying the artifact' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_pipeline, project: project) }
let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) }
it 'updates the project statistics' do
artifact = build.job_artifacts.first
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { artifact.destroy }
.to change { project.statistics.reload.build_artifacts_size }
.by(-106365)
end
context 'when it is destroyed from the project level' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
project.update_attributes(pending_delete: true)
project.destroy!
end
end
end
end
......@@ -4,26 +4,6 @@ describe ProjectStatistics do
let(:project) { create :project }
let(:statistics) { project.statistics }
describe 'constants' do
describe 'STORAGE_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STORAGE_COLUMNS).to be_kind_of Array
expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
end
describe 'STATISTICS_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array
expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
it 'includes all storage columns' do
expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS
end
end
end
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:namespace) }
......@@ -63,7 +43,6 @@ describe ProjectStatistics do
allow(statistics).to receive(:update_commit_count)
allow(statistics).to receive(:update_repository_size)
allow(statistics).to receive(:update_lfs_objects_size)
allow(statistics).to receive(:update_build_artifacts_size)
allow(statistics).to receive(:update_storage_size)
end
......@@ -76,7 +55,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_commit_count)
expect(statistics).to have_received(:update_repository_size)
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).to have_received(:update_build_artifacts_size)
end
end
......@@ -89,7 +67,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).not_to have_received(:update_commit_count)
expect(statistics).not_to have_received(:update_repository_size)
expect(statistics).not_to have_received(:update_build_artifacts_size)
end
end
end
......@@ -131,40 +108,6 @@ describe ProjectStatistics do
end
end
describe '#update_build_artifacts_size' do
let!(:pipeline) { create(:ci_pipeline, project: project) }
context 'when new job artifacts are calculated' do
let(:ci_build) { create(:ci_build, pipeline: pipeline) }
before do
create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build)
end
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to be(106365)
end
it 'calculates related build artifacts by project' do
expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 }
statistics.update_build_artifacts_size
end
end
context 'when legacy artifacts are used' do
let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) }
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to eq(10.megabytes)
end
end
end
describe '#update_storage_size' do
it "sums all storage counters" do
statistics.update!(
......@@ -177,4 +120,27 @@ describe ProjectStatistics do
expect(statistics.storage_size).to eq 5
end
end
describe '.increment_statistic' do
it 'increases the statistic by that amount' do
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) }
.to change { statistics.reload.build_artifacts_size }
.by(13)
end
context 'when the amount is 0' do
it 'does not execute a query' do
project
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) }
.not_to exceed_query_limit(0)
end
end
context 'when using an invalid column' do
it 'raises an error' do
expect { described_class.increment_statistic(project.id, :id, 13) }
.to raise_error(ArgumentError, "Cannot increment attribute: id")
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment