Commit 8d9e929a authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch '289838-add-vm-stats-endpoint' into 'master'

Add /-/metrics/system endpoint

See merge request gitlab-org/gitlab!51095
parents c6f861ac 1eb2d69c
......@@ -18,9 +18,19 @@ class MetricsController < ActionController::Base
render plain: response, content_type: 'text/plain; version=0.0.4'
end
def system
render json: system_metrics
end
private
def metrics_service
@metrics_service ||= MetricsService.new
end
def system_metrics
Gitlab::Metrics::System.summary.merge(
worker_id: Prometheus::PidProvider.worker_id
)
end
end
......@@ -92,7 +92,10 @@ Rails.application.routes.draw do
# '/-/health' implemented by BasicHealthCheck middleware
get 'liveness' => 'health#liveness'
get 'readiness' => 'health#readiness'
resources :metrics, only: [:index]
controller :metrics do
get 'metrics', action: :index
get 'metrics/system', action: :system
end
mount Peek::Railtie => '/peek', as: 'peek_routes'
get 'runner_setup/platforms' => 'runner_setup#platforms'
......
......@@ -128,6 +128,62 @@ console.
As a follow up to finding `N+1` queries with Bullet, consider writing a [QueryRecoder test](query_recorder.md) to prevent a regression.
## System stats
During or after profiling, you may want to get detailed information about the Ruby virtual machine process,
such as memory consumption, time spent on CPU, or garbage collector statistics. These are easy to produce individually
through various tools, but for convenience, a summary endpoint has been added that exports this data as a JSON payload:
```shell
curl localhost:3000/-/metrics/system | jq
```
Example output:
```json
{
"version": "ruby 2.7.2p137 (2020-10-01 revision a8323b79eb) [x86_64-linux-gnu]",
"gc_stat": {
"count": 118,
"heap_allocated_pages": 11503,
"heap_sorted_length": 11503,
"heap_allocatable_pages": 0,
"heap_available_slots": 4688580,
"heap_live_slots": 3451712,
"heap_free_slots": 1236868,
"heap_final_slots": 0,
"heap_marked_slots": 3451450,
"heap_eden_pages": 11503,
"heap_tomb_pages": 0,
"total_allocated_pages": 11503,
"total_freed_pages": 0,
"total_allocated_objects": 32679478,
"total_freed_objects": 29227766,
"malloc_increase_bytes": 84760,
"malloc_increase_bytes_limit": 32883343,
"minor_gc_count": 88,
"major_gc_count": 30,
"compact_count": 0,
"remembered_wb_unprotected_objects": 114228,
"remembered_wb_unprotected_objects_limit": 228456,
"old_objects": 3185330,
"old_objects_limit": 6370660,
"oldmalloc_increase_bytes": 21838024,
"oldmalloc_increase_bytes_limit": 119181499
},
"memory_rss": 1326501888,
"memory_uss": 1048563712,
"memory_pss": 1139554304,
"time_cputime": 82.885264633,
"time_realtime": 1610459445.5579069,
"time_monotonic": 24001.23145713,
"worker_id": "puma_0"
}
```
NOTE:
This endpoint is only available for Rails web workers. Sidekiq workers can not be inspected this way.
## Settings that impact performance
### Application settings
......
......@@ -17,6 +17,20 @@ module Gitlab
RSS_PATTERN = /VmRSS:\s+(?<value>\d+)/.freeze
MAX_OPEN_FILES_PATTERN = /Max open files\s*(?<value>\d+)/.freeze
def self.summary
proportional_mem = memory_usage_uss_pss
{
version: RUBY_DESCRIPTION,
gc_stat: GC.stat,
memory_rss: memory_usage_rss,
memory_uss: proportional_mem[:uss],
memory_pss: proportional_mem[:pss],
time_cputime: cpu_time,
time_realtime: real_time,
time_monotonic: monotonic_time
}
end
# Returns the current process' RSS (resident set size) in bytes.
def self.memory_usage_rss
sum_matches(PROC_STATUS_PATH, rss: RSS_PATTERN)[:rss].kilobytes
......
......@@ -28,8 +28,38 @@ RSpec.describe MetricsController, :request_store do
end
end
shared_examples_for 'protected metrics endpoint' do |examples|
context 'accessed from whitelisted ip' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(whitelisted_ip)
end
it_behaves_like examples
end
context 'accessed from ip in whitelisted range' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(ip_in_whitelisted_range)
end
it_behaves_like examples
end
context 'accessed from not whitelisted ip' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(not_whitelisted_ip)
end
it 'returns the expected error response' do
get :index
expect(response).to have_gitlab_http_status(:not_found)
end
end
end
describe '#index' do
shared_examples_for 'endpoint providing metrics' do
shared_examples_for 'providing metrics' do
it 'returns prometheus metrics' do
get :index
......@@ -51,32 +81,35 @@ RSpec.describe MetricsController, :request_store do
end
end
context 'accessed from whitelisted ip' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(whitelisted_ip)
include_examples 'protected metrics endpoint', 'providing metrics'
end
it_behaves_like 'endpoint providing metrics'
describe '#system' do
shared_examples_for 'providing system stats' do
let(:summary) do
{
version: 'ruby-3.0-patch1',
memory_rss: 1024
}
end
context 'accessed from ip in whitelisted range' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(ip_in_whitelisted_range)
end
it 'renders system stats JSON' do
expect(Prometheus::PidProvider).to receive(:worker_id).and_return('worker-0')
expect(Gitlab::Metrics::System).to receive(:summary).and_return(summary)
it_behaves_like 'endpoint providing metrics'
end
get :system
context 'accessed from not whitelisted ip' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(not_whitelisted_ip)
expect(response).to have_gitlab_http_status(:ok)
expect(response_json['version']).to eq('ruby-3.0-patch1')
expect(response_json['worker_id']).to eq('worker-0')
expect(response_json['memory_rss']).to eq(1024)
end
it 'returns the expected error response' do
get :index
expect(response).to have_gitlab_http_status(:not_found)
end
include_examples 'protected metrics endpoint', 'providing system stats'
end
def response_json
Gitlab::Json.parse(response.body)
end
end
......@@ -96,6 +96,25 @@ RSpec.describe Gitlab::Metrics::System do
expect(described_class.memory_usage_uss_pss).to eq(uss: 475136, pss: 515072)
end
end
describe '.summary' do
it 'contains a selection of the available fields' do
stub_const('RUBY_DESCRIPTION', 'ruby-3.0-patch1')
mock_existing_proc_file('/proc/self/status', proc_status)
mock_existing_proc_file('/proc/self/smaps_rollup', proc_smaps_rollup)
summary = described_class.summary
expect(summary[:version]).to eq('ruby-3.0-patch1')
expect(summary[:gc_stat].keys).to eq(GC.stat.keys)
expect(summary[:memory_rss]).to eq(2527232)
expect(summary[:memory_uss]).to eq(475136)
expect(summary[:memory_pss]).to eq(515072)
expect(summary[:time_cputime]).to be_a(Float)
expect(summary[:time_realtime]).to be_a(Float)
expect(summary[:time_monotonic]).to be_a(Float)
end
end
end
context 'when /proc files do not exist' do
......@@ -128,6 +147,21 @@ RSpec.describe Gitlab::Metrics::System do
expect(described_class.max_open_file_descriptors).to eq(0)
end
end
describe '.summary' do
it 'returns only available fields' do
summary = described_class.summary
expect(summary[:version]).to be_a(String)
expect(summary[:gc_stat].keys).to eq(GC.stat.keys)
expect(summary[:memory_rss]).to eq(0)
expect(summary[:memory_uss]).to eq(0)
expect(summary[:memory_pss]).to eq(0)
expect(summary[:time_cputime]).to be_a(Float)
expect(summary[:time_realtime]).to be_a(Float)
expect(summary[:time_monotonic]).to be_a(Float)
end
end
end
describe '.cpu_time' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment