Commit 8d9e929a authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch '289838-add-vm-stats-endpoint' into 'master'

Add /-/metrics/system endpoint

See merge request gitlab-org/gitlab!51095
parents c6f861ac 1eb2d69c
...@@ -18,9 +18,19 @@ class MetricsController < ActionController::Base ...@@ -18,9 +18,19 @@ class MetricsController < ActionController::Base
render plain: response, content_type: 'text/plain; version=0.0.4' render plain: response, content_type: 'text/plain; version=0.0.4'
end end
def system
render json: system_metrics
end
private private
def metrics_service def metrics_service
@metrics_service ||= MetricsService.new @metrics_service ||= MetricsService.new
end end
def system_metrics
Gitlab::Metrics::System.summary.merge(
worker_id: Prometheus::PidProvider.worker_id
)
end
end end
...@@ -92,7 +92,10 @@ Rails.application.routes.draw do ...@@ -92,7 +92,10 @@ Rails.application.routes.draw do
# '/-/health' implemented by BasicHealthCheck middleware # '/-/health' implemented by BasicHealthCheck middleware
get 'liveness' => 'health#liveness' get 'liveness' => 'health#liveness'
get 'readiness' => 'health#readiness' get 'readiness' => 'health#readiness'
resources :metrics, only: [:index] controller :metrics do
get 'metrics', action: :index
get 'metrics/system', action: :system
end
mount Peek::Railtie => '/peek', as: 'peek_routes' mount Peek::Railtie => '/peek', as: 'peek_routes'
get 'runner_setup/platforms' => 'runner_setup#platforms' get 'runner_setup/platforms' => 'runner_setup#platforms'
......
...@@ -128,6 +128,62 @@ console. ...@@ -128,6 +128,62 @@ console.
As a follow up to finding `N+1` queries with Bullet, consider writing a [QueryRecoder test](query_recorder.md) to prevent a regression. As a follow up to finding `N+1` queries with Bullet, consider writing a [QueryRecoder test](query_recorder.md) to prevent a regression.
## System stats
During or after profiling, you may want to get detailed information about the Ruby virtual machine process,
such as memory consumption, time spent on CPU, or garbage collector statistics. These are easy to produce individually
through various tools, but for convenience, a summary endpoint has been added that exports this data as a JSON payload:
```shell
curl localhost:3000/-/metrics/system | jq
```
Example output:
```json
{
"version": "ruby 2.7.2p137 (2020-10-01 revision a8323b79eb) [x86_64-linux-gnu]",
"gc_stat": {
"count": 118,
"heap_allocated_pages": 11503,
"heap_sorted_length": 11503,
"heap_allocatable_pages": 0,
"heap_available_slots": 4688580,
"heap_live_slots": 3451712,
"heap_free_slots": 1236868,
"heap_final_slots": 0,
"heap_marked_slots": 3451450,
"heap_eden_pages": 11503,
"heap_tomb_pages": 0,
"total_allocated_pages": 11503,
"total_freed_pages": 0,
"total_allocated_objects": 32679478,
"total_freed_objects": 29227766,
"malloc_increase_bytes": 84760,
"malloc_increase_bytes_limit": 32883343,
"minor_gc_count": 88,
"major_gc_count": 30,
"compact_count": 0,
"remembered_wb_unprotected_objects": 114228,
"remembered_wb_unprotected_objects_limit": 228456,
"old_objects": 3185330,
"old_objects_limit": 6370660,
"oldmalloc_increase_bytes": 21838024,
"oldmalloc_increase_bytes_limit": 119181499
},
"memory_rss": 1326501888,
"memory_uss": 1048563712,
"memory_pss": 1139554304,
"time_cputime": 82.885264633,
"time_realtime": 1610459445.5579069,
"time_monotonic": 24001.23145713,
"worker_id": "puma_0"
}
```
NOTE:
This endpoint is only available for Rails web workers. Sidekiq workers can not be inspected this way.
## Settings that impact performance ## Settings that impact performance
### Application settings ### Application settings
......
...@@ -17,6 +17,20 @@ module Gitlab ...@@ -17,6 +17,20 @@ module Gitlab
RSS_PATTERN = /VmRSS:\s+(?<value>\d+)/.freeze RSS_PATTERN = /VmRSS:\s+(?<value>\d+)/.freeze
MAX_OPEN_FILES_PATTERN = /Max open files\s*(?<value>\d+)/.freeze MAX_OPEN_FILES_PATTERN = /Max open files\s*(?<value>\d+)/.freeze
def self.summary
proportional_mem = memory_usage_uss_pss
{
version: RUBY_DESCRIPTION,
gc_stat: GC.stat,
memory_rss: memory_usage_rss,
memory_uss: proportional_mem[:uss],
memory_pss: proportional_mem[:pss],
time_cputime: cpu_time,
time_realtime: real_time,
time_monotonic: monotonic_time
}
end
# Returns the current process' RSS (resident set size) in bytes. # Returns the current process' RSS (resident set size) in bytes.
def self.memory_usage_rss def self.memory_usage_rss
sum_matches(PROC_STATUS_PATH, rss: RSS_PATTERN)[:rss].kilobytes sum_matches(PROC_STATUS_PATH, rss: RSS_PATTERN)[:rss].kilobytes
......
...@@ -28,8 +28,38 @@ RSpec.describe MetricsController, :request_store do ...@@ -28,8 +28,38 @@ RSpec.describe MetricsController, :request_store do
end end
end end
shared_examples_for 'protected metrics endpoint' do |examples|
context 'accessed from whitelisted ip' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(whitelisted_ip)
end
it_behaves_like examples
end
context 'accessed from ip in whitelisted range' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(ip_in_whitelisted_range)
end
it_behaves_like examples
end
context 'accessed from not whitelisted ip' do
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(not_whitelisted_ip)
end
it 'returns the expected error response' do
get :index
expect(response).to have_gitlab_http_status(:not_found)
end
end
end
describe '#index' do describe '#index' do
shared_examples_for 'endpoint providing metrics' do shared_examples_for 'providing metrics' do
it 'returns prometheus metrics' do it 'returns prometheus metrics' do
get :index get :index
...@@ -51,32 +81,35 @@ RSpec.describe MetricsController, :request_store do ...@@ -51,32 +81,35 @@ RSpec.describe MetricsController, :request_store do
end end
end end
context 'accessed from whitelisted ip' do include_examples 'protected metrics endpoint', 'providing metrics'
before do
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(whitelisted_ip)
end end
it_behaves_like 'endpoint providing metrics' describe '#system' do
shared_examples_for 'providing system stats' do
let(:summary) do
{
version: 'ruby-3.0-patch1',
memory_rss: 1024
}
end end
context 'accessed from ip in whitelisted range' do it 'renders system stats JSON' do
before do expect(Prometheus::PidProvider).to receive(:worker_id).and_return('worker-0')
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(ip_in_whitelisted_range) expect(Gitlab::Metrics::System).to receive(:summary).and_return(summary)
end
it_behaves_like 'endpoint providing metrics' get :system
end
context 'accessed from not whitelisted ip' do expect(response).to have_gitlab_http_status(:ok)
before do expect(response_json['version']).to eq('ruby-3.0-patch1')
allow(Gitlab::RequestContext.instance).to receive(:client_ip).and_return(not_whitelisted_ip) expect(response_json['worker_id']).to eq('worker-0')
expect(response_json['memory_rss']).to eq(1024)
end end
it 'returns the expected error response' do
get :index
expect(response).to have_gitlab_http_status(:not_found)
end end
include_examples 'protected metrics endpoint', 'providing system stats'
end end
def response_json
Gitlab::Json.parse(response.body)
end end
end end
...@@ -96,6 +96,25 @@ RSpec.describe Gitlab::Metrics::System do ...@@ -96,6 +96,25 @@ RSpec.describe Gitlab::Metrics::System do
expect(described_class.memory_usage_uss_pss).to eq(uss: 475136, pss: 515072) expect(described_class.memory_usage_uss_pss).to eq(uss: 475136, pss: 515072)
end end
end end
describe '.summary' do
it 'contains a selection of the available fields' do
stub_const('RUBY_DESCRIPTION', 'ruby-3.0-patch1')
mock_existing_proc_file('/proc/self/status', proc_status)
mock_existing_proc_file('/proc/self/smaps_rollup', proc_smaps_rollup)
summary = described_class.summary
expect(summary[:version]).to eq('ruby-3.0-patch1')
expect(summary[:gc_stat].keys).to eq(GC.stat.keys)
expect(summary[:memory_rss]).to eq(2527232)
expect(summary[:memory_uss]).to eq(475136)
expect(summary[:memory_pss]).to eq(515072)
expect(summary[:time_cputime]).to be_a(Float)
expect(summary[:time_realtime]).to be_a(Float)
expect(summary[:time_monotonic]).to be_a(Float)
end
end
end end
context 'when /proc files do not exist' do context 'when /proc files do not exist' do
...@@ -128,6 +147,21 @@ RSpec.describe Gitlab::Metrics::System do ...@@ -128,6 +147,21 @@ RSpec.describe Gitlab::Metrics::System do
expect(described_class.max_open_file_descriptors).to eq(0) expect(described_class.max_open_file_descriptors).to eq(0)
end end
end end
describe '.summary' do
it 'returns only available fields' do
summary = described_class.summary
expect(summary[:version]).to be_a(String)
expect(summary[:gc_stat].keys).to eq(GC.stat.keys)
expect(summary[:memory_rss]).to eq(0)
expect(summary[:memory_uss]).to eq(0)
expect(summary[:memory_pss]).to eq(0)
expect(summary[:time_cputime]).to be_a(Float)
expect(summary[:time_realtime]).to be_a(Float)
expect(summary[:time_monotonic]).to be_a(Float)
end
end
end end
describe '.cpu_time' do describe '.cpu_time' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment