Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
dc669d91
Commit
dc669d91
authored
Nov 12, 2020
by
Marius Bobin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use slow iteration to remove expired artifacts
Use batching of slow iterations on expire_at column
parent
c38c150c
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
134 additions
and
17 deletions
+134
-17
app/models/ci/job_artifact.rb
app/models/ci/job_artifact.rb
+3
-1
app/models/concerns/ci/artifactable.rb
app/models/concerns/ci/artifactable.rb
+2
-1
app/models/concerns/each_batch.rb
app/models/concerns/each_batch.rb
+10
-6
app/services/ci/destroy_expired_job_artifacts_service.rb
app/services/ci/destroy_expired_job_artifacts_service.rb
+33
-2
changelogs/unreleased/281688-database-query-timeout-for-expired-job-artifacts.yml
...1688-database-query-timeout-for-expired-job-artifacts.yml
+5
-0
config/feature_flags/development/ci_slow_artifacts_removal.yml
...g/feature_flags/development/ci_slow_artifacts_removal.yml
+8
-0
spec/models/ci/job_artifact_spec.rb
spec/models/ci/job_artifact_spec.rb
+33
-0
spec/models/concerns/ci/artifactable_spec.rb
spec/models/concerns/ci/artifactable_spec.rb
+19
-0
spec/models/concerns/each_batch_spec.rb
spec/models/concerns/each_batch_spec.rb
+16
-0
spec/services/ci/destroy_expired_job_artifacts_service_spec.rb
...services/ci/destroy_expired_job_artifacts_service_spec.rb
+5
-7
No files found.
app/models/ci/job_artifact.rb
View file @
dc669d91
...
...
@@ -9,6 +9,7 @@ module Ci
include
Sortable
include
Artifactable
include
FileStoreMounter
include
EachBatch
extend
Gitlab
::
Ci
::
Model
TEST_REPORT_FILE_TYPES
=
%w[junit]
.
freeze
...
...
@@ -176,7 +177,8 @@ module Ci
end
scope
:downloadable
,
->
{
where
(
file_type:
DOWNLOADABLE_TYPES
)
}
scope
:unlocked
,
->
{
joins
(
job: :pipeline
).
merge
(
::
Ci
::
Pipeline
.
unlocked
).
order
(
expire_at: :desc
)
}
scope
:unlocked
,
->
{
joins
(
job: :pipeline
).
merge
(
::
Ci
::
Pipeline
.
unlocked
)
}
scope
:order_expired_desc
,
->
{
order
(
expire_at: :desc
)
}
scope
:with_destroy_preloads
,
->
{
includes
(
project:
[
:route
,
:statistics
])
}
scope
:scoped_project
,
->
{
where
(
'ci_job_artifacts.project_id = projects.id'
)
}
...
...
app/models/concerns/ci/artifactable.rb
View file @
dc669d91
...
...
@@ -18,7 +18,8 @@ module Ci
gzip:
3
},
_suffix:
true
scope
:expired
,
->
(
limit
)
{
where
(
'expire_at < ?'
,
Time
.
current
).
limit
(
limit
)
}
scope
:expired_before
,
->
(
timestamp
)
{
where
(
arel_table
[
:expire_at
].
lt
(
timestamp
))
}
scope
:expired
,
->
(
limit
)
{
expired_before
(
Time
.
current
).
limit
(
limit
)
}
end
def
each_blob
(
&
blk
)
...
...
app/models/concerns/each_batch.rb
View file @
dc669d91
...
...
@@ -47,7 +47,7 @@ module EachBatch
# order_hint does not affect the search results. For example,
# `ORDER BY id ASC, updated_at ASC` means the same thing as `ORDER
# BY id ASC`.
def
each_batch
(
of:
1000
,
column:
primary_key
,
order_hint:
nil
)
def
each_batch
(
of:
1000
,
column:
primary_key
,
order
: :asc
,
order
_hint:
nil
)
unless
column
raise
ArgumentError
,
'the column: argument must be set to a column name to use for ordering rows'
...
...
@@ -55,7 +55,7 @@ module EachBatch
start
=
except
(
:select
)
.
select
(
column
)
.
reorder
(
column
=>
:asc
)
.
reorder
(
column
=>
order
)
start
=
start
.
order
(
order_hint
)
if
order_hint
start
=
start
.
take
...
...
@@ -66,10 +66,12 @@ module EachBatch
arel_table
=
self
.
arel_table
1
.
step
do
|
index
|
start_cond
=
arel_table
[
column
].
gteq
(
start_id
)
start_cond
=
arel_table
[
column
].
lteq
(
start_id
)
if
order
==
:desc
stop
=
except
(
:select
)
.
select
(
column
)
.
where
(
arel_table
[
column
].
gteq
(
start_id
)
)
.
reorder
(
column
=>
:asc
)
.
where
(
start_cond
)
.
reorder
(
column
=>
order
)
stop
=
stop
.
order
(
order_hint
)
if
order_hint
stop
=
stop
...
...
@@ -77,12 +79,14 @@ module EachBatch
.
limit
(
1
)
.
take
relation
=
where
(
arel_table
[
column
].
gteq
(
start_id
)
)
relation
=
where
(
start_cond
)
if
stop
stop_id
=
stop
[
column
]
start_id
=
stop_id
relation
=
relation
.
where
(
arel_table
[
column
].
lt
(
stop_id
))
stop_cond
=
arel_table
[
column
].
lt
(
stop_id
)
stop_cond
=
arel_table
[
column
].
gt
(
stop_id
)
if
order
==
:desc
relation
=
relation
.
where
(
stop_cond
)
end
# Any ORDER BYs are useless for this relation and can lead to less
...
...
app/services/ci/destroy_expired_job_artifacts_service.rb
View file @
dc669d91
...
...
@@ -20,14 +20,30 @@ module Ci
# which is scheduled every 7 minutes.
def
execute
in_lock
(
EXCLUSIVE_LOCK_KEY
,
ttl:
LOCK_TIMEOUT
,
retries:
1
)
do
loop_until
(
timeout:
LOOP_TIMEOUT
,
limit:
LOOP_LIMIT
)
do
destroy_artifacts_batch
if
::
Feature
.
enabled?
(
:ci_slow_artifacts_removal
)
destroy_job_and_pipeline_artifacts
else
loop_until
(
timeout:
LOOP_TIMEOUT
,
limit:
LOOP_LIMIT
)
do
destroy_artifacts_batch
end
end
end
end
private
def
destroy_job_and_pipeline_artifacts
start_at
=
Time
.
current
destroy_job_artifacts_with_slow_iteration
(
start_at
)
timeout
=
LOOP_TIMEOUT
-
(
Time
.
current
-
start_at
)
return
false
if
timeout
<
0
loop_until
(
timeout:
timeout
,
limit:
LOOP_LIMIT
)
do
destroy_pipeline_artifacts_batch
end
end
def
destroy_artifacts_batch
destroy_job_artifacts_batch
||
destroy_pipeline_artifacts_batch
end
...
...
@@ -36,6 +52,7 @@ module Ci
artifacts
=
Ci
::
JobArtifact
.
expired
(
BATCH_SIZE
)
.
unlocked
.
order_expired_desc
.
with_destroy_preloads
.
to_a
...
...
@@ -45,6 +62,16 @@ module Ci
true
end
def
destroy_job_artifacts_with_slow_iteration
(
start_at
)
Ci
::
JobArtifact
.
expired_before
(
start_at
).
each_batch
(
of:
BATCH_SIZE
,
column: :expire_at
,
order: :desc
)
do
|
relation
,
index
|
artifacts
=
relation
.
unlocked
.
with_destroy_preloads
.
to_a
parallel_destroy_batch
(
artifacts
)
if
artifacts
.
any?
break
if
loop_timeout?
(
start_at
)
break
if
index
>=
LOOP_LIMIT
end
end
# TODO: Make sure this can also be parallelized
# https://gitlab.com/gitlab-org/gitlab/-/issues/270973
def
destroy_pipeline_artifacts_batch
...
...
@@ -88,6 +115,10 @@ module Ci
::
Gitlab
::
Metrics
.
counter
(
name
,
comment
)
end
end
def
loop_timeout?
(
start_at
)
Time
.
current
>
start_at
+
LOOP_TIMEOUT
end
end
end
...
...
changelogs/unreleased/281688-database-query-timeout-for-expired-job-artifacts.yml
0 → 100644
View file @
dc669d91
---
title
:
Fix database timeout errors when removing expired job artifacts
merge_request
:
47496
author
:
type
:
fixed
config/feature_flags/development/ci_slow_artifacts_removal.yml
0 → 100644
View file @
dc669d91
---
name
:
ci_slow_artifacts_removal
introduced_by_url
:
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/47496
rollout_issue_url
:
https://gitlab.com/gitlab-org/gitlab/-/issues/281688
milestone
:
'
13.8'
type
:
development
group
:
'
group::continuous
integration'
default_enabled
:
false
spec/models/ci/job_artifact_spec.rb
View file @
dc669d91
...
...
@@ -219,6 +219,39 @@ RSpec.describe Ci::JobArtifact do
end
end
describe
'.unlocked'
do
let_it_be
(
:job_artifact
)
{
create
(
:ci_job_artifact
)
}
context
'with locked pipelines'
do
before
do
job_artifact
.
job
.
pipeline
.
artifacts_locked!
end
it
'returns an empty array'
do
expect
(
described_class
.
unlocked
).
to
be_empty
end
end
context
'with unlocked pipelines'
do
before
do
job_artifact
.
job
.
pipeline
.
unlocked!
end
it
'returns the artifact'
do
expect
(
described_class
.
unlocked
).
to
eq
([
job_artifact
])
end
end
end
describe
'.order_expired_desc'
do
let_it_be
(
:first_artifact
)
{
create
(
:ci_job_artifact
,
expire_at:
2
.
days
.
ago
)
}
let_it_be
(
:second_artifact
)
{
create
(
:ci_job_artifact
,
expire_at:
1
.
day
.
ago
)
}
it
'returns ordered artifacts'
do
expect
(
described_class
.
order_expired_desc
).
to
eq
([
second_artifact
,
first_artifact
])
end
end
describe
'callbacks'
do
describe
'#schedule_background_upload'
do
subject
{
create
(
:ci_job_artifact
,
:archive
)
}
...
...
spec/models/concerns/ci/artifactable_spec.rb
View file @
dc669d91
...
...
@@ -54,4 +54,23 @@ RSpec.describe Ci::Artifactable do
end
end
end
context
'ActiveRecord scopes'
do
let_it_be
(
:recently_expired_artifact
)
{
create
(
:ci_job_artifact
,
expire_at:
1
.
day
.
ago
)
}
let_it_be
(
:later_expired_artifact
)
{
create
(
:ci_job_artifact
,
expire_at:
2
.
days
.
ago
)
}
let_it_be
(
:not_expired_artifact
)
{
create
(
:ci_job_artifact
,
expire_at:
1
.
day
.
from_now
)
}
describe
'.expired_before'
do
it
'returns expired artifacts'
do
expect
(
Ci
::
JobArtifact
.
expired_before
(
1
.
hour
.
ago
))
.
to
match_array
([
recently_expired_artifact
,
later_expired_artifact
])
end
end
describe
'.expired'
do
it
'returns a limited number of expired artifacts'
do
expect
(
Ci
::
JobArtifact
.
expired
(
1
).
order_id_asc
).
to
eq
([
recently_expired_artifact
])
end
end
end
end
spec/models/concerns/each_batch_spec.rb
View file @
dc669d91
...
...
@@ -56,5 +56,21 @@ RSpec.describe EachBatch do
it_behaves_like
'each_batch handling'
,
{}
it_behaves_like
'each_batch handling'
,
{
order_hint: :updated_at
}
it
'orders ascending by default'
do
ids
=
[]
model
.
each_batch
(
of:
1
)
{
|
rel
|
ids
.
concat
(
rel
.
ids
)
}
expect
(
ids
).
to
eq
(
ids
.
sort
)
end
it
'accepts descending order'
do
ids
=
[]
model
.
each_batch
(
of:
1
,
order: :desc
)
{
|
rel
|
ids
.
concat
(
rel
.
ids
)
}
expect
(
ids
).
to
eq
(
ids
.
sort
.
reverse
)
end
end
end
spec/services/ci/destroy_expired_job_artifacts_service_spec.rb
View file @
dc669d91
...
...
@@ -30,14 +30,16 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
it
'performs the smallest number of queries for job_artifacts'
do
log
=
ActiveRecord
::
QueryRecorder
.
new
{
subject
}
# SELECT expired ci_job_artifacts
# SELECT expired ci_job_artifacts
- 3 queries from each_batch
# PRELOAD projects, routes, project_statistics
# BEGIN
# INSERT into ci_deleted_objects
# DELETE loaded ci_job_artifacts
# DELETE security_findings -- for EE
# COMMIT
expect
(
log
.
count
).
to
be_within
(
1
).
of
(
8
)
# SELECT next expired ci_job_artifacts
expect
(
log
.
count
).
to
be_within
(
1
).
of
(
11
)
end
end
...
...
@@ -164,7 +166,7 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
context
'when timeout happens'
do
before
do
stub_const
(
'Ci::DestroyExpiredJobArtifactsService::LOOP_TIMEOUT'
,
1
.
second
)
allow_any_instance_of
(
described_class
).
to
receive
(
:destroy_artifacts_batch
)
{
true
}
allow_any_instance_of
(
described_class
).
to
receive
(
:destroy_
pipeline_
artifacts_batch
)
{
true
}
end
it
'returns false and does not continue destroying'
do
...
...
@@ -182,10 +184,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
let!
(
:second_artifact
)
{
create
(
:ci_job_artifact
,
expire_at:
1
.
day
.
ago
)
}
it
'raises an error and does not continue destroying'
do
is_expected
.
to
be_falsy
end
it
'destroys one artifact'
do
expect
{
subject
}.
to
change
{
Ci
::
JobArtifact
.
count
}.
by
(
-
1
)
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment