Commit 62a2b486 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Add support for storing build pending state

Build pending state is a final build state that has been submitted by a
runner, but because of various tasks that still need to be performed we
have told the runner that it needs to retry and send the state again.
We still store the first state submitted in case of a runner failure or
a runner behaving incorrectly (for example - when it is submitting a
different state in subsequent requests).
parent 19d7626c
......@@ -38,8 +38,9 @@ module Ci
has_one :deployment, as: :deployable, class_name: 'Deployment'
has_one :resource, class_name: 'Ci::Resource', inverse_of: :build
has_one :pending_state, class_name: 'Ci::BuildPendingState', inverse_of: :build
has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id
has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id, inverse_of: :build
has_many :report_results, class_name: 'Ci::BuildReportResult', inverse_of: :build
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
......
......@@ -5,5 +5,8 @@ class Ci::BuildPendingState < ApplicationRecord
belongs_to :build, class_name: 'Ci::Build', foreign_key: :build_id
enum state: Ci::Stage.statuses
enum failure_reason: CommitStatus.failure_reasons
validates :build, presence: true
end
......@@ -26,44 +26,12 @@ module Ci
private
def build_state
params.dig(:state).to_s
end
def has_trace?
params.dig(:trace).present?
end
def has_checksum?
params.dig(:checksum).present?
end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def build_running?
build_state == 'running'
end
def accept_available?
!build_running? && has_checksum? && chunks_migration_enabled?
end
def accept_request?
accept_available? && live_chunks_pending?
end
def chunks_migration_enabled?
Feature.enabled?(:ci_enable_live_trace, build.project)
end
def accept_build_state!
# TODO, persist ci_build_state if not present (find or create)
if ACCEPT_TIMEOUT.ago > ensure_pending_state.created_at
metrics.increment_trace_operation(operation: :discarded)
return update_build_state!
end
build.trace_chunks.live.find_each do |chunk|
chunk.schedule_to_persist!
......@@ -77,11 +45,11 @@ module Ci
def overwrite_trace!
metrics.increment_trace_operation(operation: :overwrite)
# TODO, disable in FF
build.trace.set(params[:trace])
build.trace.set(params[:trace]) # TODO, disable by default using a new FF
end
def update_build_state!
# TODO, simplify this, doesn't work in case of timeout too
if accept_available? && has_chunks?
metrics.increment_trace_operation(operation: :finalized)
end
......@@ -103,5 +71,54 @@ module Ci
Result.new(status: 400)
end
end
def accept_available?
!build_running? && has_checksum? && chunks_migration_enabled?
end
def accept_request?
accept_available? && live_chunks_pending?
end
def build_state
params.dig(:state).to_s
end
def has_trace?
params.dig(:trace).present?
end
def has_checksum?
params.dig(:checksum).present?
end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def build_running?
build_state == 'running'
end
def ensure_pending_state
Ci::BuildPendingState.create_or_find_by!(
build_id: build.id,
state: params.fetch(:state),
trace_checksum: params.fetch(:checksum),
failure_reason: params.dig(:failure_reason)
)
rescue ActiveRecord::RecordNotFound
metrics.increment_trace_operation(operation: :flaky)
build.pending_state
end
def chunks_migration_enabled?
Feature.enabled?(:ci_enable_live_trace, build.project)
end
end
end
---
title: Migrate live traces before updating build state
merge_request: 41304
author:
type: added
# frozen_string_literal: true
class ChangeBuildPendingStateEnums < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
change_column :ci_build_pending_states, :state, :integer, limit: 2
change_column :ci_build_pending_states, :failure_reason, :integer, limit: 2
end
def down
change_column :ci_build_pending_states, :state, :integer
change_column :ci_build_pending_states, :failure_reason, :integer
end
end
c46bafefa5fd79a6644cbe259260b66aaded36aad4ae28a84ddd8bb072c2167d
\ No newline at end of file
......@@ -9831,8 +9831,8 @@ CREATE TABLE public.ci_build_pending_states (
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL,
build_id bigint NOT NULL,
state integer,
failure_reason integer,
state smallint,
failure_reason smallint,
trace_checksum bytea
);
......
......@@ -7,7 +7,7 @@ module Gitlab
extend Gitlab::Utils::StrongMemoize
OPERATIONS = [:appended, :mutated, :overwrite, :accepted,
:finalized].freeze
:finalized, :discarded, :flaky].freeze
def increment_trace_operation(operation: :unknown)
unless OPERATIONS.include?(operation)
......
......@@ -50,7 +50,7 @@ RSpec.describe Ci::RetryBuildService do
metadata runner_session trace_chunks upstream_pipeline_id
artifacts_file artifacts_metadata artifacts_size commands
resource resource_group_id processed security_scans author
pipeline_id report_results].freeze
pipeline_id report_results pending_state].freeze
shared_examples 'build duplication' do
let(:another_pipeline) { create(:ci_empty_pipeline, project: project) }
......
......@@ -74,7 +74,9 @@ RSpec.describe Ci::UpdateBuildStateService do
end
context 'when build has a checksum' do
let(:params) { { checksum: 'crc32:12345678', state: 'success' } }
let(:params) do
{ checksum: 'crc32:12345678', state: 'failed', failure_reason: 'script_failure' }
end
context 'when build trace has been migrated' do
before do
......@@ -84,7 +86,7 @@ RSpec.describe Ci::UpdateBuildStateService do
it 'updates a build state' do
subject.execute
expect(build).to be_success
expect(build).to be_failed
end
it 'responds with 200 OK status' do
......@@ -135,6 +137,72 @@ RSpec.describe Ci::UpdateBuildStateService do
.with(operation: :accepted)
end
it 'creates a pending state record' do
subject.execute
build.pending_state.then do |status|
expect(status).to be_present
expect(status.state).to eq 'failed'
expect(status.trace_checksum).to eq 'crc32:12345678'
expect(status.failure_reason).to eq 'script_failure'
end
end
context 'when build pending state is outdated' do
before do
build.create_pending_state(
state: 'failed',
trace_checksum: 'crc32:12345678',
failure_reason: 'script_failure',
created_at: 10.minutes.ago
)
end
it 'responds with 200 OK' do
result = subject.execute
expect(result.status).to eq 200
end
it 'updates build state' do
subject.execute
expect(build.reload).to be_failed
expect(build.failure_reason).to eq 'script_failure'
end
it 'increments discarded traces metric' do
subject.execute
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :discarded)
end
end
context 'when build pending state has changes' do
before do
build.create_pending_state(
state: 'success',
created_at: 10.minutes.ago
)
end
it 'uses stored state and responds with 200 OK' do
result = subject.execute
expect(result.status).to eq 200
end
it 'increments flaky trace metric' do
subject.execute
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :flaky)
end
end
context 'when live traces are disabled' do
before do
stub_feature_flags(ci_enable_live_trace: false)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment