Commit 19ca6e9e authored by Matthias Käppler's avatar Matthias Käppler

Merge branch 'instrument-and-log-memory-usage' into 'master'

Use a custom Ruby patch to instrument memory usage [RUN ALL RSPEC] [RUN AS-IF-FOSS]

See merge request gitlab-org/gitlab!52306
parents 06655b7f e96a85ad
---
title: Use a custom Ruby patch to instrument memory usage
merge_request: 52306
author:
type: added
---
name: trace_memory_allocations
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52306
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/299524
milestone: '13.9'
type: development
group: group::memory
default_enabled: false
......@@ -7,16 +7,32 @@ module Gitlab
DURATION_PRECISION = 6 # microseconds
def keys
@keys ||= [:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
*::Gitlab::Instrumentation::Redis.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ActiveRecord::DB_COUNTERS,
*::Gitlab::Metrics::Subscribers::ExternalHttp::KNOWN_PAYLOAD_KEYS,
*::Gitlab::Metrics::Subscribers::RackAttack::PAYLOAD_KEYS]
@keys ||= [
:cpu_s,
:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
*::Gitlab::Memory::Instrumentation::KEY_MAPPING.values,
*::Gitlab::Instrumentation::Redis.known_payload_keys,
*::Gitlab::Metrics::Subscribers::ActiveRecord::DB_COUNTERS,
*::Gitlab::Metrics::Subscribers::ExternalHttp::KNOWN_PAYLOAD_KEYS,
*::Gitlab::Metrics::Subscribers::RackAttack::PAYLOAD_KEYS
]
end
def init_instrumentation_data(request_ip: nil)
# Set `request_start_time` only if this is request
# This is done, as `request_start_time` imply `request_deadline`
if request_ip
Gitlab::RequestContext.instance.client_ip = request_ip
Gitlab::RequestContext.instance.request_start_time = Gitlab::Metrics::System.real_time
end
Gitlab::RequestContext.instance.start_thread_cpu_time = Gitlab::Metrics::System.thread_cpu_time
Gitlab::RequestContext.instance.thread_memory_allocations = Gitlab::Memory::Instrumentation.start_thread_memory_allocations
end
def add_instrumentation_data(payload)
......@@ -28,6 +44,8 @@ module Gitlab
instrument_active_record(payload)
instrument_external_http(payload)
instrument_rack_attack(payload)
instrument_cpu(payload)
instrument_thread_memory_allocations(payload)
end
def instrument_gitaly(payload)
......@@ -89,6 +107,19 @@ module Gitlab
payload.merge!(::Gitlab::Metrics::Subscribers::RackAttack.payload)
end
def instrument_cpu(payload)
cpu_s = ::Gitlab::Metrics::System.thread_cpu_duration(
::Gitlab::RequestContext.instance.start_thread_cpu_time)
payload[:cpu_s] = cpu_s.round(DURATION_PRECISION) if cpu_s
end
def instrument_thread_memory_allocations(payload)
counters = ::Gitlab::Memory::Instrumentation.measure_thread_memory_allocations(
::Gitlab::RequestContext.instance.thread_memory_allocations)
payload.merge!(counters) if counters
end
# Returns the queuing duration for a Sidekiq job in seconds, as a float, if the
# `enqueued_at` field or `created_at` field is available.
#
......@@ -115,7 +146,7 @@ module Gitlab
#
# @param [Time] start
def self.elapsed_by_absolute_time(start)
(Time.now - start).to_f.round(6)
(Time.now - start).to_f.round(DURATION_PRECISION)
end
private_class_method :elapsed_by_absolute_time
......
......@@ -29,10 +29,6 @@ module Gitlab
payload[:etag_route] = event.payload[:etag_route] if event.payload[:etag_route]
payload[Labkit::Correlation::CorrelationId::LOG_KEY] = event.payload[Labkit::Correlation::CorrelationId::LOG_KEY] || Labkit::Correlation::CorrelationId.current_id
if cpu_s = Gitlab::Metrics::System.thread_cpu_duration(::Gitlab::RequestContext.instance.start_thread_cpu_time)
payload[:cpu_s] = cpu_s.round(2)
end
CLOUDFLARE_CUSTOM_HEADERS.each do |_, value|
payload[value] = event.payload[value] if event.payload[value]
end
......
# frozen_string_literal: true
# This class uses a custom Ruby patch to allow
# a per-thread memory allocation tracking in a efficient manner
#
# This concept is currently tried to be upstreamed here:
# - https://github.com/ruby/ruby/pull/3978
module Gitlab
module Memory
class Instrumentation
KEY_MAPPING = {
total_allocated_objects: :mem_objects,
total_malloc_bytes: :mem_bytes,
total_mallocs: :mem_mallocs
}.freeze
MUTEX = Mutex.new
def self.available?
Thread.respond_to?(:trace_memory_allocations=) &&
Thread.current.respond_to?(:memory_allocations)
end
# This method changes a global state
def self.ensure_feature_flag!
return unless available?
enabled = Feature.enabled?(:trace_memory_allocations)
return if enabled == Thread.trace_memory_allocations
MUTEX.synchronize do
# This enables or disables feature dynamically
# based on a feature flag
Thread.trace_memory_allocations = enabled
end
end
def self.start_thread_memory_allocations
return unless available?
ensure_feature_flag!
# it will return `nil` if disabled
Thread.current.memory_allocations
end
# This method returns a hash with the following keys:
# - mem_objects: a number of allocated heap slots (as reflected by GC)
# - mem_mallocs: a number of malloc calls
# - mem_bytes: a number of bytes allocated with a mallocs tied to heap slots
def self.measure_thread_memory_allocations(previous)
return unless available?
return unless previous
current = Thread.current.memory_allocations
return unless current
# calculate difference in a memory allocations
previous.to_h do |key, value|
[KEY_MAPPING.fetch(key), current[key].to_i - value]
end
end
def self.with_memory_allocations
previous = self.start_thread_memory_allocations
yield
self.measure_thread_memory_allocations(previous)
end
end
end
end
......@@ -16,9 +16,7 @@ module Gitlab
# load balancer's IP.
req = Rack::Request.new(env)
Gitlab::RequestContext.instance.client_ip = req.ip
Gitlab::RequestContext.instance.start_thread_cpu_time = Gitlab::Metrics::System.thread_cpu_time
Gitlab::RequestContext.instance.request_start_time = Gitlab::Metrics::System.real_time
::Gitlab::InstrumentationHelper.init_instrumentation_data(request_ip: req.ip)
@app.call(env)
end
......
......@@ -7,7 +7,7 @@ module Gitlab
RequestDeadlineExceeded = Class.new(StandardError)
attr_accessor :client_ip, :start_thread_cpu_time, :request_start_time
attr_accessor :client_ip, :start_thread_cpu_time, :request_start_time, :thread_memory_allocations
class << self
def instance
......
......@@ -11,6 +11,7 @@ module Gitlab
def call(job, queue)
started_time = get_time
base_payload = parse_job(job)
ActiveRecord::LogSubscriber.reset_runtime
Sidekiq.logger.info log_job_start(job, base_payload)
......@@ -38,7 +39,9 @@ module Gitlab
private
def add_instrumentation_keys!(job, output_payload)
output_payload.merge!(job.slice(*::Gitlab::InstrumentationHelper.keys))
instrumentation_values = job.slice(*::Gitlab::InstrumentationHelper.keys).stringify_keys
output_payload.merge!(instrumentation_values)
end
def add_logging_extras!(job, output_payload)
......@@ -47,10 +50,6 @@ module Gitlab
)
end
def add_db_counters!(job, output_payload)
output_payload.merge!(job.slice(*::Gitlab::Metrics::Subscribers::ActiveRecord::DB_COUNTERS))
end
def log_job_start(job, payload)
payload['message'] = "#{base_message(payload)}: start"
payload['job_status'] = 'start'
......@@ -67,7 +66,6 @@ module Gitlab
payload = payload.dup
add_instrumentation_keys!(job, payload)
add_logging_extras!(job, payload)
add_db_counters!(job, payload)
elapsed_time = elapsed(started_time)
add_time_keys!(elapsed_time, payload)
......@@ -93,10 +91,6 @@ module Gitlab
def add_time_keys!(time, payload)
payload['duration_s'] = time[:duration].round(Gitlab::InstrumentationHelper::DURATION_PRECISION)
# ignore `cpu_s` if the platform does not support Process::CLOCK_THREAD_CPUTIME_ID (time[:cputime] == 0)
# supported OS version can be found at: https://www.rubydoc.info/stdlib/core/2.1.6/Process:clock_gettime
payload['cpu_s'] = time[:cputime].round(Gitlab::InstrumentationHelper::DURATION_PRECISION) if time[:cputime] > 0
payload['completed_at'] = Time.now.utc.to_f
end
......@@ -108,17 +102,11 @@ module Gitlab
def elapsed(t0)
t1 = get_time
{
duration: t1[:now] - t0[:now],
cputime: t1[:thread_cputime] - t0[:thread_cputime]
}
{ duration: t1[:now] - t0[:now] }
end
def get_time
{
now: current_time,
thread_cputime: defined?(Process::CLOCK_THREAD_CPUTIME_ID) ? Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID) : 0
}
{ now: current_time }
end
def current_time
......
......@@ -4,8 +4,11 @@ module Gitlab
module SidekiqMiddleware
class InstrumentationLogger
def call(worker, job, queue)
::Gitlab::InstrumentationHelper.init_instrumentation_data
yield
ensure
# The Sidekiq logger is called outside the middleware block, so
# we need to modify the job hash to pass along this information
# since RequestStore is only active in the Sidekiq middleware.
......
......@@ -53,14 +53,10 @@ module Gitlab
end
def with_gc_stats
GC.start # perform a full mark-and-sweep
stats_before = GC.stat
stats = ::Gitlab::Memory::Instrumentation.start_thread_memory_allocations
yield
stats_after = GC.stat
@gc_stats = stats_after.map do |key, after_value|
before_value = stats_before[key]
[key, before: before_value, after: after_value, diff: after_value - before_value]
end.to_h
ensure
@gc_stats = ::Gitlab::Memory::Instrumentation.measure_thread_memory_allocations(stats)
end
def with_measure_time
......
......@@ -64,11 +64,11 @@ RSpec.describe 'lograge', type: :request do
)
expect(Lograge.formatter).to receive(:call)
.with(a_hash_including(cpu_s: 0.11))
.with(a_hash_including(cpu_s: 0.111112))
.and_call_original
expect(Lograge.logger).to receive(:send)
.with(anything, include('"cpu_s":0.11'))
.with(anything, include('"cpu_s":0.111112'))
.and_call_original
subject
......@@ -89,6 +89,26 @@ RSpec.describe 'lograge', type: :request do
subject
end
context 'when logging memory allocations' do
include MemoryInstrumentationHelper
before do
skip_memory_instrumentation!
end
it 'logs memory usage metrics' do
expect(Lograge.formatter).to receive(:call)
.with(a_hash_including(:mem_objects))
.and_call_original
expect(Lograge.logger).to receive(:send)
.with(anything, include('"mem_objects":'))
.and_call_original
subject
end
end
it 'limits param size' do
expect(Lograge.formatter).to receive(:call)
.with(a_hash_including(params: limited_params))
......
......@@ -9,12 +9,16 @@ RSpec.describe Gitlab::InstrumentationHelper do
describe '.keys' do
it 'returns all available payload keys' do
expected_keys = [
:cpu_s,
:gitaly_calls,
:gitaly_duration_s,
:rugged_calls,
:rugged_duration_s,
:elasticsearch_calls,
:elasticsearch_duration_s,
:mem_objects,
:mem_bytes,
:mem_mallocs,
:redis_calls,
:redis_duration_s,
:redis_read_bytes,
......@@ -53,10 +57,14 @@ RSpec.describe Gitlab::InstrumentationHelper do
subject { described_class.add_instrumentation_data(payload) }
it 'adds only DB counts by default' do
before do
described_class.init_instrumentation_data
end
it 'includes DB counts' do
subject
expect(payload).to eq(db_count: 0, db_cached_count: 0, db_write_count: 0)
expect(payload).to include(db_count: 0, db_cached_count: 0, db_write_count: 0)
end
context 'when Gitaly calls are made' do
......@@ -114,6 +122,47 @@ RSpec.describe Gitlab::InstrumentationHelper do
expect(payload[:throttle_safelist]).to eq('foobar')
end
end
it 'logs cpu_s duration' do
subject
expect(payload).to include(:cpu_s)
end
context 'when logging memory allocations' do
include MemoryInstrumentationHelper
before do
skip_memory_instrumentation!
end
it 'logs memory usage metrics' do
subject
expect(payload).to include(
:mem_objects,
:mem_bytes,
:mem_mallocs
)
end
context 'when trace_memory_allocations is disabled' do
before do
stub_feature_flags(trace_memory_allocations: false)
Gitlab::Memory::Instrumentation.ensure_feature_flag!
end
it 'does not log memory usage metrics' do
subject
expect(payload).not_to include(
:mem_objects,
:mem_bytes,
:mem_mallocs
)
end
end
end
end
describe '.queue_duration_for_job' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Memory::Instrumentation do
include MemoryInstrumentationHelper
before do
skip_memory_instrumentation!
end
describe '.available?' do
it 'returns true' do
expect(described_class).to be_available
end
end
describe '.start_thread_memory_allocations' do
subject { described_class.start_thread_memory_allocations }
context 'when feature flag trace_memory_allocations is enabled' do
before do
stub_feature_flags(trace_memory_allocations: true)
end
it 'a hash is returned' do
is_expected.not_to be_empty
end
end
context 'when feature flag trace_memory_allocations is disabled' do
before do
stub_feature_flags(trace_memory_allocations: false)
end
it 'a nil is returned' do
is_expected.to be_nil
end
end
context 'when feature is unavailable' do
before do
allow(described_class).to receive(:available?) { false }
end
it 'a nil is returned' do
is_expected.to be_nil
end
end
end
describe '.with_memory_allocations' do
let(:ntimes) { 100 }
subject do
described_class.with_memory_allocations do
Array.new(1000).map { '0' * 100 }
end
end
before do
expect(described_class).to receive(:start_thread_memory_allocations).and_call_original
expect(described_class).to receive(:measure_thread_memory_allocations).and_call_original
end
context 'when feature flag trace_memory_allocations is enabled' do
before do
stub_feature_flags(trace_memory_allocations: true)
end
it 'a hash is returned' do
is_expected.to include(
mem_objects: be > 1000,
mem_mallocs: be > 1000,
mem_bytes: be > 100_000 # 100 items * 100 bytes each
)
end
end
context 'when feature flag trace_memory_allocations is disabled' do
before do
stub_feature_flags(trace_memory_allocations: false)
end
it 'a nil is returned' do
is_expected.to be_nil
end
end
context 'when feature is unavailable' do
before do
allow(described_class).to receive(:available?) { false }
end
it 'a nil is returned' do
is_expected.to be_nil
end
end
end
end
......@@ -18,9 +18,11 @@ RSpec.describe Gitlab::Middleware::RequestContext do
end
describe '#call' do
context 'setting the client ip' do
subject { Gitlab::RequestContext.instance.client_ip }
let(:instance) { Gitlab::RequestContext.instance }
subject { described_class.new(app).call(env) }
context 'setting the client ip' do
context 'with X-Forwarded-For headers' do
let(:load_balancer_ip) { '1.2.3.4' }
let(:headers) do
......@@ -33,13 +35,7 @@ RSpec.describe Gitlab::Middleware::RequestContext do
let(:env) { Rack::MockRequest.env_for("/").merge(headers) }
it 'returns the load balancer IP' do
endpoint = proc do
[200, {}, ["Hello"]]
end
described_class.new(endpoint).call(env)
expect(subject).to eq(load_balancer_ip)
expect { subject }.to change { instance.client_ip }.from(nil).to(load_balancer_ip)
end
end
......@@ -47,32 +43,19 @@ RSpec.describe Gitlab::Middleware::RequestContext do
let(:ip) { '192.168.1.11' }
before do
allow_next_instance_of(Rack::Request) do |instance|
allow(instance).to receive(:ip).and_return(ip)
allow_next_instance_of(Rack::Request) do |request|
allow(request).to receive(:ip).and_return(ip)
end
described_class.new(app).call(env)
end
it { is_expected.to eq(ip) }
end
it 'sets the `client_ip`' do
expect { subject }.to change { instance.client_ip }.from(nil).to(ip)
end
context 'before RequestContext middleware run' do
it { is_expected.to be_nil }
it 'sets the `request_start_time`' do
expect { subject }.to change { instance.request_start_time }.from(nil).to(Float)
end
end
end
end
context 'setting the thread cpu time' do
it 'sets the `start_thread_cpu_time`' do
expect { described_class.new(app).call(env) }
.to change { Gitlab::RequestContext.instance.start_thread_cpu_time }.from(nil).to(Float)
end
end
context 'setting the request start time' do
it 'sets the `request_start_time`' do
expect { described_class.new(app).call(env) }
.to change { Gitlab::RequestContext.instance.request_start_time }.from(nil).to(Float)
end
end
end
# frozen_string_literal: true
# Memory instrumentation can only be done if running on a valid Ruby
#
# This concept is currently tried to be upstreamed here:
# - https://github.com/ruby/ruby/pull/3978
module MemoryInstrumentationHelper
def skip_memory_instrumentation!
return if ::Gitlab::Memory::Instrumentation.available?
# if we are running in CI, a test cannot be skipped
return if ENV['CI']
skip 'Missing a memory instrumentation patch. ' \
'More information can be found here: https://gitlab.com/gitlab-org/gitlab/-/issues/296530.'
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment