Commit 9bd044d9 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch...

Merge branch '38265-stuckcijobsworker-wrongly-detects-cancels-stuck-builds-when-per-job-timeout-is-more-than-an-hour' into 'master'

Resolve "StuckCiJobsWorker wrongly detects, cancels 'stuck' builds when per-job timeout is more than an hour"

Closes #38265, #42196, and #42750

See merge request gitlab-org/gitlab-ce!16756
parents d08bf247 e27ea805
......@@ -2,9 +2,11 @@ module Ci
class Runner < ActiveRecord::Base
extend Gitlab::Ci::Model
include Gitlab::SQL::Pattern
include RedisCacheable
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour
UPDATE_DB_RUNNER_INFO_EVERY = 40.minutes
AVAILABLE_SCOPES = %w[specific shared active paused online].freeze
FORM_EDITABLE = %i[description tag_list active run_untagged locked access_level].freeze
......@@ -47,6 +49,8 @@ module Ci
ref_protected: 1
}
cached_attr_reader :version, :revision, :platform, :architecture, :contacted_at
# Searches for runners matching the given query.
#
# This method uses ILIKE on PostgreSQL and LIKE on MySQL.
......@@ -152,6 +156,18 @@ module Ci
ensure_runner_queue_value == value if value.present?
end
def update_cached_info(values)
values = values&.slice(:version, :revision, :platform, :architecture) || {}
values[:contacted_at] = Time.now
cache_attributes(values)
if persist_cached_data?
self.assign_attributes(values)
self.save if self.changed?
end
end
private
def cleanup_runner_queue
......@@ -164,6 +180,17 @@ module Ci
"runner:build_queue:#{self.token}"
end
def persist_cached_data?
# Use a random threshold to prevent beating DB updates.
# It generates a distribution between [40m, 80m].
contacted_at_max_age = UPDATE_DB_RUNNER_INFO_EVERY + Random.rand(UPDATE_DB_RUNNER_INFO_EVERY)
real_contacted_at = read_attribute(:contacted_at)
real_contacted_at.nil? ||
(Time.now - real_contacted_at) >= contacted_at_max_age
end
def tag_constraints
unless has_tags? || run_untagged?
errors.add(:tags_list,
......
module RedisCacheable
extend ActiveSupport::Concern
include Gitlab::Utils::StrongMemoize
CACHED_ATTRIBUTES_EXPIRY_TIME = 24.hours
class_methods do
def cached_attr_reader(*attributes)
attributes.each do |attribute|
define_method("#{attribute}") do
cached_attribute(attribute) || read_attribute(attribute)
end
end
end
end
def cached_attribute(attribute)
(cached_attributes || {})[attribute]
end
def cache_attributes(values)
Gitlab::Redis::SharedState.with do |redis|
redis.set(cache_attribute_key, values.to_json, ex: CACHED_ATTRIBUTES_EXPIRY_TIME)
end
end
private
def cache_attribute_key
"cache:#{self.class.name}:#{self.id}:attributes"
end
def cached_attributes
strong_memoize(:cached_attributes) do
Gitlab::Redis::SharedState.with do |redis|
data = redis.get(cache_attribute_key)
JSON.parse(data, symbolize_names: true) if data
end
end
end
end
---
title: Update runner info on all authenticated requests
merge_request: 16756
author:
type: changed
......@@ -3,7 +3,6 @@ module API
module Runner
JOB_TOKEN_HEADER = 'HTTP_JOB_TOKEN'.freeze
JOB_TOKEN_PARAM = :token
UPDATE_RUNNER_EVERY = 10 * 60
def runner_registration_token_valid?
ActiveSupport::SecurityUtils.variable_size_secure_compare(params[:token],
......@@ -18,30 +17,14 @@ module API
def authenticate_runner!
forbidden! unless current_runner
current_runner.update_cached_info(get_runner_version_from_params)
end
def current_runner
@runner ||= ::Ci::Runner.find_by_token(params[:token].to_s)
end
def update_runner_info
return unless update_runner?
current_runner.contacted_at = Time.now
current_runner.assign_attributes(get_runner_version_from_params)
current_runner.save if current_runner.changed?
end
def update_runner?
# Use a random threshold to prevent beating DB updates.
# It generates a distribution between [40m, 80m].
#
contacted_at_max_age = UPDATE_RUNNER_EVERY + Random.rand(UPDATE_RUNNER_EVERY)
current_runner.contacted_at.nil? ||
(Time.now - current_runner.contacted_at) >= contacted_at_max_age
end
def validate_job!(job)
not_found! unless job
......
......@@ -78,7 +78,6 @@ module API
post '/request' do
authenticate_runner!
no_content! unless current_runner.active?
update_runner_info
if current_runner.runner_queue_value_latest?(params[:last_update])
header 'X-GitLab-Last-Update', params[:last_update]
......
......@@ -95,6 +95,17 @@ describe Ci::Runner do
subject { runner.online? }
before do
allow_any_instance_of(described_class).to receive(:cached_attribute).and_call_original
allow_any_instance_of(described_class).to receive(:cached_attribute)
.with(:platform).and_return("darwin")
end
context 'no cache value' do
before do
stub_redis_runner_contacted_at(nil)
end
context 'never contacted' do
before do
runner.contacted_at = nil
......@@ -120,6 +131,35 @@ describe Ci::Runner do
end
end
context 'with cache value' do
context 'contacted long time ago time' do
before do
runner.contacted_at = 1.year.ago
stub_redis_runner_contacted_at(1.year.ago.to_s)
end
it { is_expected.to be_falsey }
end
context 'contacted 1s ago' do
before do
runner.contacted_at = 50.minutes.ago
stub_redis_runner_contacted_at(1.second.ago.to_s)
end
it { is_expected.to be_truthy }
end
end
def stub_redis_runner_contacted_at(value)
Gitlab::Redis::SharedState.with do |redis|
cache_key = runner.send(:cache_attribute_key)
expect(redis).to receive(:get).with(cache_key)
.and_return({ contacted_at: value }.to_json).at_least(:once)
end
end
end
describe '#can_pick?' do
let(:pipeline) { create(:ci_pipeline) }
let(:build) { create(:ci_build, pipeline: pipeline) }
......@@ -361,6 +401,50 @@ describe Ci::Runner do
end
end
describe '#update_cached_info' do
let(:runner) { create(:ci_runner) }
subject { runner.update_cached_info(architecture: '18-bit') }
context 'when database was updated recently' do
before do
runner.contacted_at = Time.now
end
it 'updates cache' do
expect_redis_update
subject
end
end
context 'when database was not updated recently' do
before do
runner.contacted_at = 2.hours.ago
end
it 'updates database' do
expect_redis_update
expect { subject }.to change { runner.reload.read_attribute(:contacted_at) }
.and change { runner.reload.read_attribute(:architecture) }
end
it 'updates cache' do
expect_redis_update
subject
end
end
def expect_redis_update
Gitlab::Redis::SharedState.with do |redis|
redis_key = runner.send(:cache_attribute_key)
expect(redis).to receive(:set).with(redis_key, anything, any_args)
end
end
end
describe '#destroy' do
let(:runner) { create(:ci_runner) }
......
require 'spec_helper'
describe RedisCacheable do
let(:model) { double }
before do
model.extend(described_class)
allow(model).to receive(:cache_attribute_key).and_return('key')
end
describe '#cached_attribute' do
let(:payload) { { attribute: 'value' } }
subject { model.cached_attribute(payload.keys.first) }
it 'gets the cache attribute' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:get).with('key')
.and_return(payload.to_json)
end
expect(subject).to eq(payload.values.first)
end
end
describe '#cache_attributes' do
let(:values) { { name: 'new_name' } }
subject { model.cache_attributes(values) }
it 'sets the cache attributes' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with('key', values.to_json, anything)
end
subject
end
end
end
......@@ -8,6 +8,7 @@ describe API::Runner do
before do
stub_gitlab_calls
stub_application_setting(runners_registration_token: registration_token)
allow_any_instance_of(Ci::Runner).to receive(:cache_attributes)
end
describe '/api/v4/runners' do
......@@ -408,7 +409,7 @@ describe API::Runner do
expect { request_job }.to change { runner.reload.contacted_at }
end
%w(name version revision platform architecture).each do |param|
%w(version revision platform architecture).each do |param|
context "when info parameter '#{param}' is present" do
let(:value) { "#{param}_value" }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment