Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
8f0b7a30
Commit
8f0b7a30
authored
Sep 29, 2021
by
drew cimino
Committed by
Bob Van Landuyt
Sep 29, 2021
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Create Ci::StuckBuilds::DropRunningService for querying and dropping running builds
parent
ca3aa6e0
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
259 additions
and
55 deletions
+259
-55
app/services/ci/stuck_builds/drop_running_service.rb
app/services/ci/stuck_builds/drop_running_service.rb
+23
-0
app/services/ci/stuck_builds/drop_service.rb
app/services/ci/stuck_builds/drop_service.rb
+0
-10
app/workers/all_queues.yml
app/workers/all_queues.yml
+9
-0
app/workers/ci/stuck_builds/drop_running_worker.rb
app/workers/ci/stuck_builds/drop_running_worker.rb
+43
-0
app/workers/stuck_ci_jobs_worker.rb
app/workers/stuck_ci_jobs_worker.rb
+2
-0
spec/services/ci/stuck_builds/drop_running_service_spec.rb
spec/services/ci/stuck_builds/drop_running_service_spec.rb
+65
-0
spec/services/ci/stuck_builds/drop_service_spec.rb
spec/services/ci/stuck_builds/drop_service_spec.rb
+2
-44
spec/support/shared_examples/ci/stuck_builds_shared_examples.rb
...upport/shared_examples/ci/stuck_builds_shared_examples.rb
+38
-0
spec/workers/ci/stuck_builds/drop_running_worker_spec.rb
spec/workers/ci/stuck_builds/drop_running_worker_spec.rb
+70
-0
spec/workers/stuck_ci_jobs_worker_spec.rb
spec/workers/stuck_ci_jobs_worker_spec.rb
+7
-1
No files found.
app/services/ci/stuck_builds/drop_running_service.rb
0 → 100644
View file @
8f0b7a30
# frozen_string_literal: true
module
Ci
module
StuckBuilds
class
DropRunningService
include
DropHelpers
BUILD_RUNNING_OUTDATED_TIMEOUT
=
1
.
hour
def
execute
Gitlab
::
AppLogger
.
info
"
#{
self
.
class
}
: Cleaning running, timed-out builds"
drop
(
running_timed_out_builds
,
failure_reason: :stuck_or_timeout_failure
)
end
private
def
running_timed_out_builds
Ci
::
Build
.
running
.
updated_at_before
(
BUILD_RUNNING_OUTDATED_TIMEOUT
.
ago
)
end
end
end
end
app/services/ci/stuck_builds/drop_service.rb
View file @
8f0b7a30
...
...
@@ -5,7 +5,6 @@ module Ci
class
DropService
include
DropHelpers
BUILD_RUNNING_OUTDATED_TIMEOUT
=
1
.
hour
BUILD_PENDING_OUTDATED_TIMEOUT
=
1
.
day
BUILD_SCHEDULED_OUTDATED_TIMEOUT
=
1
.
hour
BUILD_PENDING_STUCK_TIMEOUT
=
1
.
hour
...
...
@@ -14,8 +13,6 @@ module Ci
def
execute
Gitlab
::
AppLogger
.
info
"
#{
self
.
class
}
: Cleaning stuck builds"
drop
(
running_timed_out_builds
,
failure_reason: :stuck_or_timeout_failure
)
drop
(
pending_builds
(
BUILD_PENDING_OUTDATED_TIMEOUT
.
ago
),
failure_reason: :stuck_or_timeout_failure
...
...
@@ -50,13 +47,6 @@ module Ci
BUILD_SCHEDULED_OUTDATED_TIMEOUT
.
ago
)
end
def
running_timed_out_builds
Ci
::
Build
.
running
.
where
(
# rubocop: disable CodeReuse/ActiveRecord
'ci_builds.updated_at < ?'
,
BUILD_RUNNING_OUTDATED_TIMEOUT
.
ago
)
end
end
end
end
app/workers/all_queues.yml
View file @
8f0b7a30
...
...
@@ -228,6 +228,15 @@
:weight:
1
:idempotent:
true
:tags: []
-
:name: cronjob:ci_stuck_builds_drop_running
:worker_name: Ci::StuckBuilds::DropRunningWorker
:feature_category: :continuous_integration
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight:
1
:idempotent:
true
:tags: []
-
:name: cronjob:container_expiration_policy
:worker_name: ContainerExpirationPolicyWorker
:feature_category: :container_registry
...
...
app/workers/ci/stuck_builds/drop_running_worker.rb
0 → 100644
View file @
8f0b7a30
# frozen_string_literal: true
module
Ci
module
StuckBuilds
class
DropRunningWorker
include
ApplicationWorker
idempotent!
# rubocop:disable Scalability/CronWorkerContext
# This is an instance-wide cleanup query, so there's no meaningful
# scope to consider this in the context of.
include
CronjobQueue
# rubocop:enable Scalability/CronWorkerContext
data_consistency
:always
feature_category
:continuous_integration
EXCLUSIVE_LEASE_KEY
=
'ci_stuck_builds_drop_running_worker_lease'
def
perform
return
unless
try_obtain_lease
begin
Ci
::
StuckBuilds
::
DropRunningService
.
new
.
execute
ensure
remove_lease
end
end
private
def
try_obtain_lease
@uuid
=
Gitlab
::
ExclusiveLease
.
new
(
EXCLUSIVE_LEASE_KEY
,
timeout:
30
.
minutes
).
try_obtain
end
def
remove_lease
Gitlab
::
ExclusiveLease
.
cancel
(
EXCLUSIVE_LEASE_KEY
,
@uuid
)
end
end
end
end
app/workers/stuck_ci_jobs_worker.rb
View file @
8f0b7a30
...
...
@@ -17,6 +17,8 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker
EXCLUSIVE_LEASE_KEY
=
'stuck_ci_builds_worker_lease'
def
perform
Ci
::
StuckBuilds
::
DropRunningWorker
.
perform_in
(
20
.
minutes
)
return
unless
try_obtain_lease
Ci
::
StuckBuilds
::
DropService
.
new
.
execute
...
...
spec/services/ci/stuck_builds/drop_running_service_spec.rb
0 → 100644
View file @
8f0b7a30
# frozen_string_literal: true
require
'spec_helper'
RSpec
.
describe
Ci
::
StuckBuilds
::
DropRunningService
do
let!
(
:runner
)
{
create
:ci_runner
}
let!
(
:job
)
{
create
:ci_build
,
runner:
runner
}
let
(
:created_at
)
{
}
let
(
:updated_at
)
{
}
subject
(
:service
)
{
described_class
.
new
}
before
do
job_attributes
=
{
status:
status
}
job_attributes
[
:created_at
]
=
created_at
if
created_at
job_attributes
[
:updated_at
]
=
updated_at
if
updated_at
job
.
update!
(
job_attributes
)
end
context
'when job is running'
do
let
(
:status
)
{
'running'
}
context
'when job was updated_at more than an hour ago'
do
let
(
:updated_at
)
{
2
.
hours
.
ago
}
it_behaves_like
'job is dropped'
end
context
'when job was updated in less than 1 hour ago'
do
let
(
:updated_at
)
{
30
.
minutes
.
ago
}
it_behaves_like
'job is unchanged'
end
end
%w(success skipped failed canceled scheduled pending)
.
each
do
|
status
|
context
"when job is
#{
status
}
"
do
let
(
:status
)
{
status
}
let
(
:updated_at
)
{
2
.
days
.
ago
}
context
'when created_at is the same as updated_at'
do
let
(
:created_at
)
{
2
.
days
.
ago
}
it_behaves_like
'job is unchanged'
end
context
'when created_at is before updated_at'
do
let
(
:created_at
)
{
3
.
days
.
ago
}
it_behaves_like
'job is unchanged'
end
end
end
context
'for deleted project'
do
let
(
:status
)
{
'running'
}
let
(
:updated_at
)
{
2
.
days
.
ago
}
before
do
job
.
project
.
update!
(
pending_delete:
true
)
end
it_behaves_like
'job is dropped'
end
end
spec/services/ci/stuck_builds/drop_service_spec.rb
View file @
8f0b7a30
...
...
@@ -17,48 +17,6 @@ RSpec.describe Ci::StuckBuilds::DropService do
job
.
update!
(
job_attributes
)
end
shared_examples
'job is dropped'
do
it
'changes status'
do
expect
(
service
).
to
receive
(
:drop
).
exactly
(
3
).
times
.
and_call_original
expect
(
service
).
to
receive
(
:drop_stuck
).
exactly
(
:once
).
and_call_original
service
.
execute
job
.
reload
expect
(
job
).
to
be_failed
expect
(
job
).
to
be_stuck_or_timeout_failure
end
context
'when job have data integrity problem'
do
it
"does drop the job and logs the reason"
do
job
.
update_columns
(
yaml_variables:
'[{"key" => "value"}]'
)
expect
(
Gitlab
::
ErrorTracking
).
to
receive
(
:track_exception
)
.
with
(
anything
,
a_hash_including
(
build_id:
job
.
id
))
.
once
.
and_call_original
service
.
execute
job
.
reload
expect
(
job
).
to
be_failed
expect
(
job
).
to
be_data_integrity_failure
end
end
end
shared_examples
'job is unchanged'
do
it
'does not change status'
do
expect
(
service
).
to
receive
(
:drop
).
exactly
(
3
).
times
.
and_call_original
expect
(
service
).
to
receive
(
:drop_stuck
).
exactly
(
:once
).
and_call_original
service
.
execute
job
.
reload
expect
(
job
.
status
).
to
eq
(
status
)
end
end
context
'when job is pending'
do
let
(
:status
)
{
'pending'
}
...
...
@@ -195,7 +153,7 @@ RSpec.describe Ci::StuckBuilds::DropService do
context
'when job was updated_at more than an hour ago'
do
let
(
:updated_at
)
{
2
.
hours
.
ago
}
it_behaves_like
'job is
dropp
ed'
it_behaves_like
'job is
unchang
ed'
end
context
'when job was updated in less than 1 hour ago'
do
...
...
@@ -238,7 +196,7 @@ RSpec.describe Ci::StuckBuilds::DropService do
job
.
project
.
update!
(
pending_delete:
true
)
end
it_behaves_like
'job is
dropp
ed'
it_behaves_like
'job is
unchang
ed'
end
describe
'drop stale scheduled builds'
do
...
...
spec/support/shared_examples/ci/stuck_builds_shared_examples.rb
0 → 100644
View file @
8f0b7a30
# frozen_string_literal: true
RSpec
.
shared_examples
'job is dropped'
do
it
'changes status'
do
service
.
execute
job
.
reload
expect
(
job
).
to
be_failed
expect
(
job
).
to
be_stuck_or_timeout_failure
end
context
'when job has data integrity problem'
do
it
'drops the job and logs the reason'
do
job
.
update_columns
(
yaml_variables:
'[{"key" => "value"}]'
)
expect
(
Gitlab
::
ErrorTracking
)
.
to
receive
(
:track_exception
)
.
with
(
anything
,
a_hash_including
(
build_id:
job
.
id
))
.
once
.
and_call_original
service
.
execute
job
.
reload
expect
(
job
).
to
be_failed
expect
(
job
).
to
be_data_integrity_failure
end
end
end
RSpec
.
shared_examples
'job is unchanged'
do
it
'does not change status'
do
service
.
execute
job
.
reload
expect
(
job
.
status
).
to
eq
(
status
)
end
end
spec/workers/ci/stuck_builds/drop_running_worker_spec.rb
0 → 100644
View file @
8f0b7a30
# frozen_string_literal: true
require
'spec_helper'
RSpec
.
describe
Ci
::
StuckBuilds
::
DropRunningWorker
do
include
ExclusiveLeaseHelpers
let
(
:worker_lease_key
)
{
Ci
::
StuckBuilds
::
DropRunningWorker
::
EXCLUSIVE_LEASE_KEY
}
let
(
:worker_lease_uuid
)
{
SecureRandom
.
uuid
}
let
(
:worker2
)
{
described_class
.
new
}
subject
(
:worker
)
{
described_class
.
new
}
before
do
stub_exclusive_lease
(
worker_lease_key
,
worker_lease_uuid
)
end
describe
'#perform'
do
it_behaves_like
'an idempotent worker'
it
'executes an instance of Ci::StuckBuilds::DropRunningService'
do
expect_next_instance_of
(
Ci
::
StuckBuilds
::
DropRunningService
)
do
|
service
|
expect
(
service
).
to
receive
(
:execute
).
exactly
(
:once
)
end
worker
.
perform
end
context
'with an exclusive lease'
do
it
'does not execute concurrently'
do
expect
(
worker
).
to
receive
(
:remove_lease
).
exactly
(
:once
)
expect
(
worker2
).
not_to
receive
(
:remove_lease
)
worker
.
perform
stub_exclusive_lease_taken
(
worker_lease_key
)
worker2
.
perform
end
it
'can execute in sequence'
do
expect
(
worker
).
to
receive
(
:remove_lease
).
at_least
(
:once
)
expect
(
worker2
).
to
receive
(
:remove_lease
).
at_least
(
:once
)
worker
.
perform
worker2
.
perform
end
it
'cancels exclusive leases after worker perform'
do
expect_to_cancel_exclusive_lease
(
worker_lease_key
,
worker_lease_uuid
)
worker
.
perform
end
context
'when the DropRunningService fails'
do
it
'ensures cancellation of the exclusive lease'
do
expect_to_cancel_exclusive_lease
(
worker_lease_key
,
worker_lease_uuid
)
allow_next_instance_of
(
Ci
::
StuckBuilds
::
DropRunningService
)
do
|
service
|
allow
(
service
).
to
receive
(
:execute
)
do
raise
'The query timed out'
end
end
expect
{
worker
.
perform
}.
to
raise_error
(
/The query timed out/
)
end
end
end
end
end
spec/workers/stuck_ci_jobs_worker_spec.rb
View file @
8f0b7a30
...
...
@@ -16,7 +16,13 @@ RSpec.describe StuckCiJobsWorker do
end
describe
'#perform'
do
it
'executes an instance of Ci::StuckBuildsDropService'
do
it
'enqueues a Ci::StuckBuilds::DropRunningWorker job'
do
expect
(
Ci
::
StuckBuilds
::
DropRunningWorker
).
to
receive
(
:perform_in
).
with
(
20
.
minutes
).
exactly
(
:once
)
worker
.
perform
end
it
'executes an instance of Ci::StuckBuilds::DropService'
do
expect_next_instance_of
(
Ci
::
StuckBuilds
::
DropService
)
do
|
service
|
expect
(
service
).
to
receive
(
:execute
).
exactly
(
:once
)
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment