Commit db8740c0 authored by Fabio Pitino's avatar Fabio Pitino

Merge branch 'sh-requeue-failed-job-register' into 'master'

Fail jobs that fail to render registration response

See merge request gitlab-org/gitlab!36274
parents 5ba1af9f f034fcf8
...@@ -11,7 +11,7 @@ module Ci ...@@ -11,7 +11,7 @@ module Ci
METRICS_SHARD_TAG_PREFIX = 'metrics_shard::'.freeze METRICS_SHARD_TAG_PREFIX = 'metrics_shard::'.freeze
DEFAULT_METRICS_SHARD = 'default'.freeze DEFAULT_METRICS_SHARD = 'default'.freeze
Result = Struct.new(:build, :valid?) Result = Struct.new(:build, :build_json, :valid?)
def initialize(runner) def initialize(runner)
@runner = runner @runner = runner
...@@ -59,7 +59,7 @@ module Ci ...@@ -59,7 +59,7 @@ module Ci
end end
register_failure register_failure
Result.new(nil, valid) Result.new(nil, nil, valid)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -71,7 +71,7 @@ module Ci ...@@ -71,7 +71,7 @@ module Ci
# In case when 2 runners try to assign the same build, second runner will be declined # In case when 2 runners try to assign the same build, second runner will be declined
# with StateMachines::InvalidTransition or StaleObjectError when doing run! or save method. # with StateMachines::InvalidTransition or StaleObjectError when doing run! or save method.
if assign_runner!(build, params) if assign_runner!(build, params)
Result.new(build, true) present_build!(build)
end end
rescue StateMachines::InvalidTransition, ActiveRecord::StaleObjectError rescue StateMachines::InvalidTransition, ActiveRecord::StaleObjectError
# We are looping to find another build that is not conflicting # We are looping to find another build that is not conflicting
...@@ -83,8 +83,10 @@ module Ci ...@@ -83,8 +83,10 @@ module Ci
# In case we hit the concurrency-access lock, # In case we hit the concurrency-access lock,
# we still have to return 409 in the end, # we still have to return 409 in the end,
# to make sure that this is properly handled by runner. # to make sure that this is properly handled by runner.
Result.new(nil, false) Result.new(nil, nil, false)
rescue => ex rescue => ex
# If an error (e.g. GRPC::DeadlineExceeded) occurred constructing
# the result, consider this as a failure to be retried.
scheduler_failure!(build) scheduler_failure!(build)
track_exception_for_build(ex, build) track_exception_for_build(ex, build)
...@@ -92,6 +94,15 @@ module Ci ...@@ -92,6 +94,15 @@ module Ci
nil nil
end end
# Force variables evaluation to occur now
def present_build!(build)
# We need to use the presenter here because Gitaly calls in the presenter
# may fail, and we need to ensure the response has been generated.
presented_build = ::Ci::BuildRunnerPresenter.new(build) # rubocop:disable CodeReuse/Presenter
build_json = ::API::Entities::JobRequest::Response.new(presented_build).to_json
Result.new(build, build_json, true)
end
def assign_runner!(build, params) def assign_runner!(build, params)
build.runner_id = runner.id build.runner_id = runner.id
build.runner_session_attributes = params[:session] if params[:session].present? build.runner_session_attributes = params[:session] if params[:session].present?
......
---
title: Fail jobs that fail to render registration response
merge_request: 36274
author:
type: fixed
...@@ -108,6 +108,20 @@ module API ...@@ -108,6 +108,20 @@ module API
end end
optional :job_age, type: Integer, desc: %q(Job should be older than passed age in seconds to be ran on runner) optional :job_age, type: Integer, desc: %q(Job should be older than passed age in seconds to be ran on runner)
end end
# Since we serialize the build output ourselves to ensure Gitaly
# gRPC calls succeed, we need a custom Grape format to handle
# this:
# 1. Grape will ordinarily call `JSON.dump` when Content-Type is set
# to application/json. To avoid this, we need to define a custom type in
# `content_type` and a custom formatter to go with it.
# 2. Grape will parse the request input with the parser defined for
# `content_type`. If no such parser exists, it will be treated as text. We
# reuse the existing JSON parser to preserve the previous behavior.
content_type :build_json, 'application/json'
formatter :build_json, ->(object, _) { object }
parser :build_json, ::Grape::Parser::Json
post '/request' do post '/request' do
authenticate_runner! authenticate_runner!
...@@ -128,9 +142,10 @@ module API ...@@ -128,9 +142,10 @@ module API
result = ::Ci::RegisterJobService.new(current_runner).execute(runner_params) result = ::Ci::RegisterJobService.new(current_runner).execute(runner_params)
if result.valid? if result.valid?
if result.build if result.build_json
Gitlab::Metrics.add_event(:build_found) Gitlab::Metrics.add_event(:build_found)
present ::Ci::BuildRunnerPresenter.new(result.build), with: Entities::JobRequest::Response env['api.format'] = :build_json
body result.build_json
else else
Gitlab::Metrics.add_event(:build_not_found) Gitlab::Metrics.add_event(:build_not_found)
header 'X-GitLab-Last-Update', new_update header 'X-GitLab-Last-Update', new_update
......
...@@ -518,6 +518,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do ...@@ -518,6 +518,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
request_job info: { platform: :darwin } request_job info: { platform: :darwin }
expect(response).to have_gitlab_http_status(:created) expect(response).to have_gitlab_http_status(:created)
expect(response.headers['Content-Type']).to eq('application/json')
expect(response.headers).not_to have_key('X-GitLab-Last-Update') expect(response.headers).not_to have_key('X-GitLab-Last-Update')
expect(runner.reload.platform).to eq('darwin') expect(runner.reload.platform).to eq('darwin')
expect(json_response['id']).to eq(job.id) expect(json_response['id']).to eq(job.id)
...@@ -569,6 +570,24 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do ...@@ -569,6 +570,24 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
end end
end end
context 'when a Gitaly exception is thrown during response' do
before do
allow_next_instance_of(Ci::BuildRunnerPresenter) do |instance|
allow(instance).to receive(:artifacts).and_raise(GRPC::DeadlineExceeded)
end
end
it 'fails the job as a scheduler failure' do
request_job
expect(response).to have_gitlab_http_status(:no_content)
expect(job.reload.failed?).to be_truthy
expect(job.failure_reason).to eq('scheduler_failure')
expect(job.runner_id).to eq(runner.id)
expect(job.runner_session).to be_nil
end
end
context 'when GIT_DEPTH is not specified and there is no default git depth for the project' do context 'when GIT_DEPTH is not specified and there is no default git depth for the project' do
before do before do
project.update!(ci_default_git_depth: nil) project.update!(ci_default_git_depth: nil)
...@@ -1090,7 +1109,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do ...@@ -1090,7 +1109,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
def request_job(token = runner.token, **params) def request_job(token = runner.token, **params)
new_params = params.merge(token: token, last_update: last_update) new_params = params.merge(token: token, last_update: last_update)
post api('/jobs/request'), params: new_params, headers: { 'User-Agent' => user_agent } post api('/jobs/request'), params: new_params.to_json, headers: { 'User-Agent' => user_agent, 'Content-Type': 'application/json' }
end end
end end
......
...@@ -109,12 +109,14 @@ module Ci ...@@ -109,12 +109,14 @@ module Ci
end end
context 'shared runner' do context 'shared runner' do
let(:build) { execute(shared_runner) } let(:response) { described_class.new(shared_runner).execute }
let(:build) { response.build }
it { expect(build).to be_kind_of(Build) } it { expect(build).to be_kind_of(Build) }
it { expect(build).to be_valid } it { expect(build).to be_valid }
it { expect(build).to be_running } it { expect(build).to be_running }
it { expect(build.runner).to eq(shared_runner) } it { expect(build.runner).to eq(shared_runner) }
it { expect(Gitlab::Json.parse(response.build_json)['id']).to eq(build.id) }
end end
context 'specific runner' do context 'specific runner' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment