Commit 504fab67 authored by Kerri Miller's avatar Kerri Miller

Merge branch '301146-revert-revert-log-timeouts' into 'master'

Log Elasticsearch timeouts (second attempt)

See merge request gitlab-org/gitlab!54180
parents 26a4dc8f 4ae05e3b
......@@ -93,6 +93,8 @@ which correspond to:
1. `elasticsearch_calls`: total number of calls to Elasticsearch
1. `elasticsearch_duration_s`: total time taken by Elasticsearch calls
1. `elasticsearch_timed_out_count`: total number of calls to Elasticsearch that
timed out and therefore returned partial results
ActionCable connection and subscription events are also logged to this file and they follow the same
format above. The `method`, `path`, and `format` fields are not applicable, and are always empty.
......
---
title: Track elasticsearch_timed_out_count for all ES requests in log
merge_request: 54180
author:
type: changed
......@@ -2,7 +2,7 @@
require 'spec_helper'
RSpec.describe Gitlab::Elastic::Helper do
RSpec.describe Gitlab::Elastic::Helper, :request_store do
subject(:helper) { described_class.default }
shared_context 'with a legacy index' do
......
......@@ -12,6 +12,12 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransport, :elastic, :req
end
end
describe '.increment_timed_out_count' do
it 'increases the timed out count by 1' do
expect { described_class.increment_timed_out_count }.to change(described_class, :get_timed_out_count).by(1)
end
end
describe '.add_duration' do
it 'does not lose precision while adding' do
::Gitlab::SafeRequestStore.clear!
......@@ -46,6 +52,8 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransport, :elastic, :req
end
RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransportInterceptor, :elastic, :request_store do
let(:elasticsearch_url) { Gitlab::CurrentSettings.elasticsearch_url[0] }
before do
allow(::Gitlab::PerformanceBar).to receive(:enabled_for_request?).and_return(true)
end
......@@ -61,9 +69,6 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransportInterceptor, :el
it 'adds the labkit correlation id as X-Opaque-Id to all requests' do
allow(Labkit::Correlation::CorrelationId).to receive(:current_or_new_id).and_return('new-correlation-id')
elasticsearch_url = Gitlab::CurrentSettings.elasticsearch_url[0]
stub_request(:any, elasticsearch_url)
Project.__elasticsearch__.client
.perform_request(:get, '/')
......@@ -74,13 +79,47 @@ RSpec.describe ::Gitlab::Instrumentation::ElasticsearchTransportInterceptor, :el
it 'does not override the X-Opaque-Id if it is already present' do
allow(Labkit::Correlation::CorrelationId).to receive(:current_or_new_id).and_return('new-correlation-id')
elasticsearch_url = Gitlab::CurrentSettings.elasticsearch_url[0]
stub_request(:any, elasticsearch_url)
Project.__elasticsearch__.client
.perform_request(:get, '/', {}, nil, { 'X-Opaque-Id': 'original-opaque-id' })
expect(a_request(:get, /#{elasticsearch_url}/)
.with(headers: { 'X-Opaque-Id' => 'original-opaque-id' })).to have_been_made
end
context 'when the response indicates a server side timeout' do
it 'increments timeouts' do
stub_request(:any, /#{elasticsearch_url}/).to_return(body: +'{"timed_out": true}', status: 200, headers: { 'Content-Type' => 'application/json' })
Project.__elasticsearch__.client.perform_request(:get, '/')
expect(::Gitlab::Instrumentation::ElasticsearchTransport.get_timed_out_count).to eq(1)
end
end
context 'when the response does not indicate a server side timeout' do
it 'does not increment timeouts' do
stub_request(:any, /#{elasticsearch_url}/).to_return(body: +'{"timed_out": false}', status: 200, headers: { 'Content-Type' => 'application/json' })
Project.__elasticsearch__.client.perform_request(:get, '/')
expect(::Gitlab::Instrumentation::ElasticsearchTransport.get_timed_out_count).to eq(0)
end
end
context 'when the server returns a blank response body' do
it 'does not error' do
stub_request(:any, /#{elasticsearch_url}/).to_return(body: +'', status: 200)
Project.__elasticsearch__.client.perform_request(:get, '/')
end
end
context 'when the request raises some error' do
it 'does not raise a different error in ensure' do
stub_request(:any, /#{elasticsearch_url}/).to_return(body: +'', status: 500)
expect { Project.__elasticsearch__.client.perform_request(:get, '/') }
.to raise_error(::Elasticsearch::Transport::Transport::Errors::InternalServerError)
end
end
end
......@@ -18,6 +18,7 @@ RSpec.describe Gitlab::InstrumentationHelper do
expect(payload[:elasticsearch_calls]).to be > 0
expect(payload[:elasticsearch_duration_s]).to be > 0
expect(payload[:elasticsearch_timed_out_count]).to be_kind_of(Integer)
end
end
end
......
......@@ -9,12 +9,17 @@ module Gitlab
start = Time.now
headers = (headers || {})
.reverse_merge({ 'X-Opaque-Id': Labkit::Correlation::CorrelationId.current_or_new_id })
super
response = super
ensure
if ::Gitlab::SafeRequestStore.active?
duration = (Time.now - start)
::Gitlab::Instrumentation::ElasticsearchTransport.increment_request_count
if response&.body && response.body.is_a?(Hash) && response.body['timed_out']
::Gitlab::Instrumentation::ElasticsearchTransport.increment_timed_out_count
end
::Gitlab::Instrumentation::ElasticsearchTransport.add_duration(duration)
::Gitlab::Instrumentation::ElasticsearchTransport.add_call_details(duration, method, path, params, body)
end
......@@ -25,6 +30,7 @@ module Gitlab
ELASTICSEARCH_REQUEST_COUNT = :elasticsearch_request_count
ELASTICSEARCH_CALL_DURATION = :elasticsearch_call_duration
ELASTICSEARCH_CALL_DETAILS = :elasticsearch_call_details
ELASTICSEARCH_TIMED_OUT_COUNT = :elasticsearch_timed_out_count
def self.get_request_count
::Gitlab::SafeRequestStore[ELASTICSEARCH_REQUEST_COUNT] || 0
......@@ -49,6 +55,15 @@ module Gitlab
::Gitlab::SafeRequestStore[ELASTICSEARCH_CALL_DURATION] += duration
end
def self.increment_timed_out_count
::Gitlab::SafeRequestStore[ELASTICSEARCH_TIMED_OUT_COUNT] ||= 0
::Gitlab::SafeRequestStore[ELASTICSEARCH_TIMED_OUT_COUNT] += 1
end
def self.get_timed_out_count
::Gitlab::SafeRequestStore[ELASTICSEARCH_TIMED_OUT_COUNT] || 0
end
def self.add_call_details(duration, method, path, params, body)
return unless Gitlab::PerformanceBar.enabled_for_request?
......
......@@ -15,6 +15,7 @@ module Gitlab
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
*::Gitlab::Memory::Instrumentation::KEY_MAPPING.values,
*::Gitlab::Instrumentation::Redis.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ActiveRecord::DB_COUNTERS,
......@@ -79,6 +80,7 @@ module Gitlab
payload[:elasticsearch_calls] = elasticsearch_calls
payload[:elasticsearch_duration_s] = Gitlab::Instrumentation::ElasticsearchTransport.query_time
payload[:elasticsearch_timed_out_count] = Gitlab::Instrumentation::ElasticsearchTransport.get_timed_out_count
end
def instrument_external_http(payload)
......
......@@ -16,6 +16,7 @@ RSpec.describe Gitlab::InstrumentationHelper do
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:elasticsearch_timed_out_count,
:mem_objects,
:mem_bytes,
:mem_mallocs,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment