Commit 1b3ae7fd authored by Dylan Griffith's avatar Dylan Griffith

Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059)

Previously we scheduled a worker to just some this but we were running
into performance issues when the build table was getting too large. So
now we've updated the code such that this column is updated immediately
and incremented/decremented by the correct amount whenever artifacts are
created or deleted.

We've also added the performance optimization that we do not update this
statistic if a project is deleted because it could result in many
updates for a project with many builds.
parent 7d869c8b
...@@ -24,7 +24,7 @@ module Ci ...@@ -24,7 +24,7 @@ module Ci
has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment' has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
has_many :trace_sections, class_name: 'Ci::BuildTraceSection' has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
...@@ -99,8 +99,8 @@ module Ci ...@@ -99,8 +99,8 @@ module Ci
run_after_commit { BuildHooksWorker.perform_async(build.id) } run_after_commit { BuildHooksWorker.perform_async(build.id) }
end end
after_commit :update_project_statistics_after_save, on: [:create, :update] after_save :update_project_statistics_after_save, if: :artifacts_size_changed?
after_commit :update_project_statistics, on: :destroy after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
class << self class << self
# This is needed for url_for to work, # This is needed for url_for to work,
...@@ -668,16 +668,20 @@ module Ci ...@@ -668,16 +668,20 @@ module Ci
pipeline.config_processor.build_attributes(name) pipeline.config_processor.build_attributes(name)
end end
def update_project_statistics def update_project_statistics_after_save
return unless project update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i)
end
ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size]) def update_project_statistics_after_destroy
update_project_statistics(-artifacts_size)
end end
def update_project_statistics_after_save def update_project_statistics(difference)
if previous_changes.include?('artifacts_size') ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
update_project_statistics
end end
def project_destroyed?
project.pending_delete?
end end
end end
end end
module Ci module Ci
class JobArtifact < ActiveRecord::Base class JobArtifact < ActiveRecord::Base
prepend EE::Ci::JobArtifact prepend EE::Ci::JobArtifact
include AfterCommitQueue include AfterCommitQueue
include ObjectStorage::BackgroundMove include ObjectStorage::BackgroundMove
extend Gitlab::Ci::Model extend Gitlab::Ci::Model
...@@ -10,6 +11,8 @@ module Ci ...@@ -10,6 +11,8 @@ module Ci
before_save :update_file_store before_save :update_file_store
before_save :set_size, if: :file_changed? before_save :set_size, if: :file_changed?
after_save :update_project_statistics_after_save, if: :size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) } scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(file_store: ::JobArtifactUploader::Store::REMOTE) } scope :with_files_stored_remotely, -> { where(file_store: ::JobArtifactUploader::Store::REMOTE) }
...@@ -36,10 +39,6 @@ module Ci ...@@ -36,10 +39,6 @@ module Ci
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store) [nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end end
def set_size
self.size = file.size
end
def expire_in def expire_in
expire_at - Time.now if expire_at expire_at - Time.now if expire_at
end end
...@@ -50,5 +49,28 @@ module Ci ...@@ -50,5 +49,28 @@ module Ci
ChronicDuration.parse(value)&.seconds&.from_now ChronicDuration.parse(value)&.seconds&.from_now
end end
end end
private
def set_size
self.size = file.size
end
def update_project_statistics_after_save
update_project_statistics(size.to_i - size_was.to_i)
end
def update_project_statistics_after_destroy
update_project_statistics(-self.size)
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
# Use job.project to avoid extra DB query for project
job.project.pending_delete?
end
end end
end end
...@@ -4,8 +4,8 @@ class ProjectStatistics < ActiveRecord::Base ...@@ -4,8 +4,8 @@ class ProjectStatistics < ActiveRecord::Base
before_save :update_storage_size before_save :update_storage_size
STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze
STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze
def shared_runners_minutes def shared_runners_minutes
shared_runners_seconds.to_i / 60 shared_runners_seconds.to_i / 60
...@@ -16,7 +16,7 @@ class ProjectStatistics < ActiveRecord::Base ...@@ -16,7 +16,7 @@ class ProjectStatistics < ActiveRecord::Base
end end
def refresh!(only: nil) def refresh!(only: nil)
STATISTICS_COLUMNS.each do |column, generator| COLUMNS_TO_REFRESH.each do |column, generator|
if only.blank? || only.include?(column) if only.blank? || only.include?(column)
public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
end end
...@@ -38,13 +38,15 @@ class ProjectStatistics < ActiveRecord::Base ...@@ -38,13 +38,15 @@ class ProjectStatistics < ActiveRecord::Base
self.lfs_objects_size = project.lfs_objects.sum(:size) self.lfs_objects_size = project.lfs_objects.sum(:size)
end end
def update_build_artifacts_size def update_storage_size
self.build_artifacts_size = self.storage_size = repository_size + lfs_objects_size + build_artifacts_size
project.builds.sum(:artifacts_size) +
Ci::JobArtifact.artifacts_size_for(self.project)
end end
def update_storage_size def self.increment_statistic(project_id, key, amount)
self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute)) raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS)
return if amount == 0
where(project_id: project_id)
.update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount])
end end
end end
...@@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader ...@@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts storage_options Gitlab.config.artifacts
def size def cached_size
return super if model.size.nil? return model.size if model.size.present? && !model.file_changed?
model.size size
end end
def store_dir def store_dir
...@@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader ...@@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader
if file_storage? if file_storage?
File.open(path, "rb") if path File.open(path, "rb") if path
else else
::Gitlab::Ci::Trace::HttpIO.new(url, size) if url ::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
end end
end end
......
---
title: Improve DB performance of calculating total artifacts size
merge_request: 17839
author:
type: performance
...@@ -1403,29 +1403,51 @@ describe Ci::Build do ...@@ -1403,29 +1403,51 @@ describe Ci::Build do
end end
end end
describe '#update_project_statistics' do context 'when updating the build' do
let!(:build) { create(:ci_build, artifacts_size: 23) } let(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics when the artifact size changes' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
it 'updates project statistics' do
build.artifacts_size = 42 build.artifacts_size = 42
build.save!
end
it 'does not update project statistics when the artifact size stays the same' do expect(build).to receive(:update_project_statistics_after_save).and_call_original
expect(ProjectCacheWorker).not_to receive(:perform_async)
expect { build.save! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(19)
end
context 'when the artifact size stays the same' do
it 'does not update project statistics' do
build.name = 'changed' build.name = 'changed'
expect(build).not_to receive(:update_project_statistics_after_save)
build.save! build.save!
end end
end
end
context 'when destroying the build' do
let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics when the build is destroyed' do it 'updates project statistics' do
expect(ProjectCacheWorker).to receive(:perform_async) expect(ProjectStatistics)
.with(build.project_id, [], [:build_artifacts_size]) .to receive(:increment_statistic)
.and_call_original
build.destroy expect { build.destroy! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(-23)
end
context 'when the build is destroyed due to the project being destroyed' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
build.project.update_attributes(pending_delete: true)
build.project.destroy!
end
end end
end end
......
require 'spec_helper' require 'spec_helper'
describe Ci::JobArtifact do describe Ci::JobArtifact do
set(:artifact) { create(:ci_job_artifact, :archive) } let(:artifact) { create(:ci_job_artifact, :archive) }
describe "Associations" do describe "Associations" do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
...@@ -59,10 +59,32 @@ describe Ci::JobArtifact do ...@@ -59,10 +59,32 @@ describe Ci::JobArtifact do
end end
end end
describe '#set_size' do context 'creating the artifact' do
it 'sets the size' do let(:project) { create(:project) }
let(:artifact) { create(:ci_job_artifact, :archive, project: project) }
it 'sets the size from the file size' do
expect(artifact.size).to eq(106365) expect(artifact.size).to eq(106365)
end end
it 'updates the project statistics' do
expect { artifact }
.to change { project.statistics.reload.build_artifacts_size }
.by(106365)
end
end
context 'updating the artifact file' do
it 'updates the artifact size' do
artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png')))
expect(artifact.size).to eq(1062)
end
it 'updates the project statistics' do
expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) }
.to change { artifact.project.statistics.reload.build_artifacts_size }
.by(1062 - 106365)
end
end end
describe '#file' do describe '#file' do
...@@ -118,4 +140,32 @@ describe Ci::JobArtifact do ...@@ -118,4 +140,32 @@ describe Ci::JobArtifact do
is_expected.to be_nil is_expected.to be_nil
end end
end end
context 'when destroying the artifact' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_pipeline, project: project) }
let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) }
it 'updates the project statistics' do
artifact = build.job_artifacts.first
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { artifact.destroy }
.to change { project.statistics.reload.build_artifacts_size }
.by(-106365)
end
context 'when it is destroyed from the project level' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
project.update_attributes(pending_delete: true)
project.destroy!
end
end
end
end end
...@@ -4,26 +4,6 @@ describe ProjectStatistics do ...@@ -4,26 +4,6 @@ describe ProjectStatistics do
let(:project) { create :project } let(:project) { create :project }
let(:statistics) { project.statistics } let(:statistics) { project.statistics }
describe 'constants' do
describe 'STORAGE_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STORAGE_COLUMNS).to be_kind_of Array
expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
end
describe 'STATISTICS_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array
expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
it 'includes all storage columns' do
expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS
end
end
end
describe 'associations' do describe 'associations' do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:namespace) } it { is_expected.to belong_to(:namespace) }
...@@ -63,7 +43,6 @@ describe ProjectStatistics do ...@@ -63,7 +43,6 @@ describe ProjectStatistics do
allow(statistics).to receive(:update_commit_count) allow(statistics).to receive(:update_commit_count)
allow(statistics).to receive(:update_repository_size) allow(statistics).to receive(:update_repository_size)
allow(statistics).to receive(:update_lfs_objects_size) allow(statistics).to receive(:update_lfs_objects_size)
allow(statistics).to receive(:update_build_artifacts_size)
allow(statistics).to receive(:update_storage_size) allow(statistics).to receive(:update_storage_size)
end end
...@@ -76,7 +55,6 @@ describe ProjectStatistics do ...@@ -76,7 +55,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_commit_count) expect(statistics).to have_received(:update_commit_count)
expect(statistics).to have_received(:update_repository_size) expect(statistics).to have_received(:update_repository_size)
expect(statistics).to have_received(:update_lfs_objects_size) expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).to have_received(:update_build_artifacts_size)
end end
end end
...@@ -89,7 +67,6 @@ describe ProjectStatistics do ...@@ -89,7 +67,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_lfs_objects_size) expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).not_to have_received(:update_commit_count) expect(statistics).not_to have_received(:update_commit_count)
expect(statistics).not_to have_received(:update_repository_size) expect(statistics).not_to have_received(:update_repository_size)
expect(statistics).not_to have_received(:update_build_artifacts_size)
end end
end end
end end
...@@ -131,40 +108,6 @@ describe ProjectStatistics do ...@@ -131,40 +108,6 @@ describe ProjectStatistics do
end end
end end
describe '#update_build_artifacts_size' do
let!(:pipeline) { create(:ci_pipeline, project: project) }
context 'when new job artifacts are calculated' do
let(:ci_build) { create(:ci_build, pipeline: pipeline) }
before do
create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build)
end
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to be(106365)
end
it 'calculates related build artifacts by project' do
expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 }
statistics.update_build_artifacts_size
end
end
context 'when legacy artifacts are used' do
let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) }
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to eq(10.megabytes)
end
end
end
describe '#update_storage_size' do describe '#update_storage_size' do
it "sums all storage counters" do it "sums all storage counters" do
statistics.update!( statistics.update!(
...@@ -177,4 +120,27 @@ describe ProjectStatistics do ...@@ -177,4 +120,27 @@ describe ProjectStatistics do
expect(statistics.storage_size).to eq 5 expect(statistics.storage_size).to eq 5
end end
end end
describe '.increment_statistic' do
it 'increases the statistic by that amount' do
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) }
.to change { statistics.reload.build_artifacts_size }
.by(13)
end
context 'when the amount is 0' do
it 'does not execute a query' do
project
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) }
.not_to exceed_query_limit(0)
end
end
context 'when using an invalid column' do
it 'raises an error' do
expect { described_class.increment_statistic(project.id, :id, 13) }
.to raise_error(ArgumentError, "Cannot increment attribute: id")
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment