Commit 5bc099c2 authored by Pawel Chojnacki's avatar Pawel Chojnacki

Prometheus metrics first pass

metrics wip
parent f74b133a
require 'prometheus/client/formats/text'
class HealthController < ActionController::Base
protect_from_forgery with: :exception
include RequiresHealthToken
......@@ -24,7 +26,7 @@ class HealthController < ActionController::Base
results = CHECKS.flat_map(&:metrics)
response = results.map(&method(:metric_to_prom_line)).join("\n")
response = ::Prometheus::Client::Formats::Text.marshal_multiprocess
render text: response, content_type: 'text/plain; version=0.0.4'
end
......
......@@ -11,6 +11,5 @@ class PromService
@login = Prometheus::Client::Counter.new(:login, 'Login counter')
@prometheus.register(@login)
end
end
......@@ -15,11 +15,7 @@ if defined?(Unicorn)
end
# TODO(lyda): Needs to be set externally.
ENV['prometheus_multiproc_dir'] = '/tmp'
require 'prometheus/client/rack/exporter'
use Prometheus::Client::Rack::Exporter, path: '/admin/metrics'
ENV['prometheus_multiproc_dir'] = '/tmp/somestuff'
end
require ::File.expand_path('../config/environment', __FILE__)
......
require 'prometheus/client'
module Gitlab
module Metrics
extend Gitlab::CurrentSettings
......@@ -9,6 +11,7 @@ module Gitlab
def self.settings
@settings ||= {
enabled: current_application_settings[:metrics_enabled],
prometheus_metrics_enabled: true,
pool_size: current_application_settings[:metrics_pool_size],
timeout: current_application_settings[:metrics_timeout],
method_call_threshold: current_application_settings[:metrics_method_call_threshold],
......@@ -19,10 +22,18 @@ module Gitlab
}
end
def self.enabled?
def self.prometheus_metrics_enabled?
settings[:prometheus_metrics_enabled] || false
end
def self.influx_metrics_enabled?
settings[:enabled] || false
end
def self.enabled?
influx_metrics_enabled? || prometheus_metrics_enabled? || false
end
def self.mri?
RUBY_ENGINE == 'ruby'
end
......@@ -38,10 +49,58 @@ module Gitlab
@pool
end
def self.registry
@registry ||= ::Prometheus::Client.registry
end
def self.counter(name, docstring, base_labels = {})
dummy_metric || registry.get(name) || registry.counter(name, docstring, base_labels)
end
def self.summary(name, docstring, base_labels = {})
dummy_metric || registry.get(name) || registry.summary(name, docstring, base_labels)
end
def self.gauge(name, docstring, base_labels = {})
dummy_metric || registry.get(name) || registry.gauge(name, docstring, base_labels)
end
def self.histogram(name, docstring, base_labels = {}, buckets = Histogram::DEFAULT_BUCKETS)
dummy_metric || registry.get(name) || registry.histogram(name, docstring, base_labels, buckets)
end
def self.dummy_metric
unless prometheus_metrics_enabled?
DummyMetric.new
end
end
def self.submit_metrics(metrics)
prepared = prepare_metrics(metrics)
pool.with do |connection|
if prometheus_metrics_enabled?
metrics.map do |metric|
known = [:series, :tags,:values, :timestamp]
value = metric&.[](:values)&.[](:value)
handled= [:rails_gc_statistics]
if handled.include? metric[:series].to_sym
next
end
if metric.keys.any? {|k| !known.include?(k)} || value.nil?
print metric
print "\n"
{:series=>"rails_gc_statistics", :tags=>{}, :values=>{:count=>0, :heap_allocated_pages=>4245, :heap_sorted_length=>4426, :heap_allocatable_pages=>0, :heap_available_slots=>1730264, :heap_live_slots=>1729935, :heap_free_slots=>329, :heap_final_slots=>0, :heap_marked_slots=>1184216, :heap_swept_slots=>361843, :heap_eden_pages=>4245, :heap_tomb_pages=>0, :total_allocated_pages=>4245, :total_freed_pages=>0, :total_allocated_objects=>15670757, :total_freed_objects=>13940822, :malloc_increase_bytes=>4842256, :malloc_increase_bytes_limit=>29129457, :minor_gc_count=>0, :major_gc_count=>0, :remembered_wb_unprotected_objects=>39905, :remembered_wb_unprotected_objects_limit=>74474, :old_objects=>1078731, :old_objects_limit=>1975860, :oldmalloc_increase_bytes=>4842640, :oldmalloc_increase_bytes_limit=>31509677, :total_time=>0.0}, :timestamp=>1494356175592659968}
next
end
metric_value = gauge(metric[:series].to_sym, metric[:series])
metric_value.set(metric[:tags], value)
end
end
pool&.with do |connection|
prepared.each_slice(settings[:packet_size]) do |slice|
begin
connection.write_points(slice)
......@@ -148,7 +207,7 @@ module Gitlab
# When enabled this should be set before being used as the usual pattern
# "@foo ||= bar" is _not_ thread-safe.
if enabled?
if influx_metrics_enabled?
@pool = ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do
host = settings[:host]
port = settings[:port]
......
module Gitlab
module Metrics
# Mocks ::Prometheus::Client::Metric and all derived metrics
class DummyMetric
def get(*args)
raise NotImplementedError
end
def values(*args)
raise NotImplementedError
end
# counter
def increment(*args)
# noop
end
# gauge
def set(*args)
# noop
end
# histogram / summary
def observe(*args)
# noop
end
end
end
end
module Gitlab
module Metrics
# Class that sends certain metrics to InfluxDB at a specific interval.
#
# This class is used to gather statistics that can't be directly associated
# with a transaction such as system memory usage, garbage collection
# statistics, etc.
class PrometheusSamples
# interval - The sampling interval in seconds.
def initialize(interval = Metrics.settings[:sample_interval])
interval_half = interval.to_f / 2
@interval = interval
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
end
def start
Thread.new do
Thread.current.abort_on_exception = true
loop do
sleep(sleep_interval)
sample
end
end
end
def sidekiq?
Sidekiq.server?
end
# Returns the sleep interval with a random adjustment.
#
# The random adjustment is put in place to ensure we:
#
# 1. Don't generate samples at the exact same interval every time (thus
# potentially missing anything that happens in between samples).
# 2. Don't sample data at the same interval two times in a row.
def sleep_interval
while step = @interval_steps.sample
if step != @last_step
@last_step = step
return @interval + @last_step
end
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment