Commit 23b35c17 authored by Dylan Griffith's avatar Dylan Griffith

Track elasticsearch_timed_out_count for all ES requests in log

Since we've recently introduced timeouts in
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53435 we need some
way to track this in logs. The reason it needs special handling is that
Elasticsearch timeouts by default will just return early and return
partial results. For our use case this is what we want as the user still
gets something but we don't starve Elasticsearch resources but this
means there is no error propagated in the stack or the logs for us to
measure so we need to explicitly log this detail that is returned from
Elasticsearch.
parent 46591843
......@@ -93,6 +93,8 @@ which correspond to:
1. `elasticsearch_calls`: total number of calls to Elasticsearch
1. `elasticsearch_duration_s`: total time taken by Elasticsearch calls
1. `elasticsearch_timed_out_count`: total number of calls to Elasticsearch that
timed out and therefore returned partial results
ActionCable connection and subscription events are also logged to this file and they follow the same
format above. The `method`, `path`, and `format` fields are not applicable, and are always empty.
......
---
title: Track elasticsearch_timed_out_count for all ES requests in log
merge_request: 53955
author:
type: changed
......@@ -12,6 +12,12 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransport, :elastic, :req
end
end
describe '.increment_timed_out_count' do
it 'increases the timed out count by 1' do
expect { described_class.increment_timed_out_count }.to change(described_class, :get_timed_out_count).by(1)
end
end
describe '.add_duration' do
it 'does not lose precision while adding' do
::Gitlab::SafeRequestStore.clear!
......@@ -46,6 +52,8 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransport, :elastic, :req
end
RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransportInterceptor, :elastic, :request_store do
let(:elasticsearch_url) { Gitlab::CurrentSettings.elasticsearch_url[0] }
before do
allow(::Gitlab::PerformanceBar).to receive(:enabled_for_request?).and_return(true)
end
......@@ -61,8 +69,6 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransportInterceptor, :el
it 'adds the labkit correlation id as X-Opaque-Id to all requests' do
allow(Labkit::Correlation::CorrelationId).to receive(:current_or_new_id).and_return('new-correlation-id')
elasticsearch_url = Gitlab::CurrentSettings.elasticsearch_url[0]
Project.__elasticsearch__.client
.perform_request(:get, '/')
......@@ -73,12 +79,30 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransportInterceptor, :el
it 'does not override the X-Opaque-Id if it is already present' do
allow(Labkit::Correlation::CorrelationId).to receive(:current_or_new_id).and_return('new-correlation-id')
elasticsearch_url = Gitlab::CurrentSettings.elasticsearch_url[0]
Project.__elasticsearch__.client
.perform_request(:get, '/', {}, nil, { 'X-Opaque-Id': 'original-opaque-id' })
expect(a_request(:get, /#{elasticsearch_url}/)
.with(headers: { 'X-Opaque-Id' => 'original-opaque-id' })).to have_been_made
end
context 'when the response indicates a server side timeout' do
it 'increments timeouts' do
stub_request(:any, /#{elasticsearch_url}/).to_return(body: +'{"timed_out": true}', status: 200, headers: { 'Content-Type' => 'application/json' })
Project.__elasticsearch__.client.perform_request(:get, '/')
expect(::Gitlab::Instrumentation::ElasticsearchTransport.get_timed_out_count).to eq(1)
end
end
context 'when the response does not indicate a server side timeout' do
it 'does not increment timeouts' do
stub_request(:any, /#{elasticsearch_url}/).to_return(body: +'{"timed_out": false}', status: 200, headers: { 'Content-Type' => 'application/json' })
Project.__elasticsearch__.client.perform_request(:get, '/')
expect(::Gitlab::Instrumentation::ElasticsearchTransport.get_timed_out_count).to eq(0)
end
end
end
......@@ -18,6 +18,7 @@ RSpec.describe Gitlab::InstrumentationHelper do
expect(payload[:elasticsearch_calls]).to be > 0
expect(payload[:elasticsearch_duration_s]).to be > 0
expect(payload[:elasticsearch_timed_out_count]).to be_kind_of(Integer)
end
end
end
......
......@@ -9,12 +9,15 @@ module Gitlab
start = Time.now
headers = (headers || {})
.reverse_merge({ 'X-Opaque-Id': Labkit::Correlation::CorrelationId.current_or_new_id })
super
response = super
ensure
if ::Gitlab::SafeRequestStore.active?
duration = (Time.now - start)
::Gitlab::Instrumentation::ElasticsearchTransport.increment_request_count
::Gitlab::Instrumentation::ElasticsearchTransport.increment_timed_out_count if response&.body&.dig('timed_out')
::Gitlab::Instrumentation::ElasticsearchTransport.add_duration(duration)
::Gitlab::Instrumentation::ElasticsearchTransport.add_call_details(duration, method, path, params, body)
end
......@@ -25,6 +28,7 @@ module Gitlab
ELASTICSEARCH_REQUEST_COUNT = :elasticsearch_request_count
ELASTICSEARCH_CALL_DURATION = :elasticsearch_call_duration
ELASTICSEARCH_CALL_DETAILS = :elasticsearch_call_details
ELASTICSEARCH_TIMED_OUT_COUNT = :elasticsearch_timed_out_count
def self.get_request_count
::Gitlab::SafeRequestStore[ELASTICSEARCH_REQUEST_COUNT] || 0
......@@ -49,6 +53,15 @@ module Gitlab
::Gitlab::SafeRequestStore[ELASTICSEARCH_CALL_DURATION] += duration
end
def self.increment_timed_out_count
::Gitlab::SafeRequestStore[ELASTICSEARCH_TIMED_OUT_COUNT] ||= 0
::Gitlab::SafeRequestStore[ELASTICSEARCH_TIMED_OUT_COUNT] += 1
end
def self.get_timed_out_count
::Gitlab::SafeRequestStore[ELASTICSEARCH_TIMED_OUT_COUNT] || 0
end
def self.add_call_details(duration, method, path, params, body)
return unless Gitlab::PerformanceBar.enabled_for_request?
......
......@@ -15,6 +15,7 @@ module Gitlab
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
*::Gitlab::Memory::Instrumentation::KEY_MAPPING.values,
*::Gitlab::Instrumentation::Redis.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ActiveRecord::DB_COUNTERS,
......@@ -79,6 +80,7 @@ module Gitlab
payload[:elasticsearch_calls] = elasticsearch_calls
payload[:elasticsearch_duration_s] = Gitlab::Instrumentation::ElasticsearchTransport.query_time
payload[:elasticsearch_timed_out_count] = Gitlab::Instrumentation::ElasticsearchTransport.get_timed_out_count
end
def instrument_external_http(payload)
......
......@@ -16,6 +16,7 @@ RSpec.describe Gitlab::InstrumentationHelper do
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
:mem_objects,
:mem_bytes,
:mem_mallocs,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment