Commit 99148939 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'metrics-tuning' into 'master'

Tuning of metrics data to store

This removes data we don't really need, as well as making sure we don't overload any cache stores or databases.

See merge request !2265
parents 525f8fb4 8de491a6
if Gitlab::Metrics.enabled?
require 'influxdb'
require 'socket'
require 'connection_pool'
require 'method_source'
......
......@@ -6,16 +6,21 @@ module Gitlab
METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s
PATH_REGEX = /^#{RAILS_ROOT}\/?/
def self.pool_size
current_application_settings[:metrics_pool_size] || 16
end
def self.timeout
current_application_settings[:metrics_timeout] || 10
def self.settings
@settings ||= {
enabled: current_application_settings[:metrics_enabled],
pool_size: current_application_settings[:metrics_pool_size],
timeout: current_application_settings[:metrics_timeout],
method_call_threshold: current_application_settings[:metrics_method_call_threshold],
host: current_application_settings[:metrics_host],
username: current_application_settings[:metrics_username],
password: current_application_settings[:metrics_password],
port: current_application_settings[:metrics_port]
}
end
def self.enabled?
current_application_settings[:metrics_enabled] || false
settings[:enabled] || false
end
def self.mri?
......@@ -26,18 +31,13 @@ module Gitlab
# This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows
# things down too much.
@method_call_threshold ||=
(current_application_settings[:metrics_method_call_threshold] || 10)
@method_call_threshold ||= settings[:method_call_threshold]
end
def self.pool
@pool
end
def self.hostname
@hostname
end
# Returns a relative path and line number based on the last application call
# frame.
def self.last_relative_application_frame
......@@ -85,16 +85,14 @@ module Gitlab
value.to_s.gsub('=', '\\=')
end
@hostname = Socket.gethostname
# When enabled this should be set before being used as the usual pattern
# "@foo ||= bar" is _not_ thread-safe.
if enabled?
@pool = ConnectionPool.new(size: pool_size, timeout: timeout) do
host = current_application_settings[:metrics_host]
user = current_application_settings[:metrics_username]
pw = current_application_settings[:metrics_password]
port = current_application_settings[:metrics_port]
@pool = ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do
host = settings[:host]
user = settings[:username]
pw = settings[:password]
port = settings[:port]
InfluxDB::Client.
new(udp: { host: host, port: port }, username: user, password: pw)
......
......@@ -123,6 +123,8 @@ module Gitlab
duration = (Time.now - start) * 1000.0
if duration >= Gitlab::Metrics.method_call_threshold
trans.increment(:method_duration, duration)
trans.add_metric(Gitlab::Metrics::Instrumentation::SERIES,
{ duration: duration },
method: #{label.inspect})
......
......@@ -17,11 +17,8 @@ module Gitlab
# Returns a Hash in a format that can be directly written to InfluxDB.
def to_hash
{
series: @series,
tags: @tags.merge(
hostname: Metrics.hostname,
process_type: Sidekiq.server? ? 'sidekiq' : 'rails'
),
series: @series,
tags: @tags,
values: @values,
timestamp: @created_at.to_i * 1_000_000_000
}
......
module Gitlab
module Metrics
# Class for producing SQL queries with sensitive data stripped out.
class ObfuscatedSQL
REPLACEMENT = /
\d+(\.\d+)? # integers, floats
| '.+?' # single quoted strings
| \/.+?(?<!\\)\/ # regexps (including escaped slashes)
/x
MYSQL_REPLACEMENTS = /
".+?" # double quoted strings
/x
# Regex to replace consecutive placeholders with a single one indicating
# the length. This can be useful when a "IN" statement uses thousands of
# IDs (storing this would just be a waste of space).
CONSECUTIVE = /(\?(\s*,\s*)?){2,}/
# sql - The raw SQL query as a String.
def initialize(sql)
@sql = sql
end
# Returns a new, obfuscated SQL query.
def to_s
regex = REPLACEMENT
if Gitlab::Database.mysql?
regex = Regexp.union(regex, MYSQL_REPLACEMENTS)
end
sql = @sql.gsub(regex, '?').gsub(CONSECUTIVE) do |match|
"#{match.count(',') + 1} values"
end
# InfluxDB escapes double quotes upon output, so lets get rid of them
# whenever we can.
if Gitlab::Database.postgresql?
sql = sql.delete('"')
end
sql.tr("\n", ' ')
end
end
end
end
......@@ -50,12 +50,11 @@ module Gitlab
end
def sample_memory_usage
@metrics << Metric.new('memory_usage', value: System.memory_usage)
add_metric('memory_usage', value: System.memory_usage)
end
def sample_file_descriptors
@metrics << Metric.
new('file_descriptors', value: System.file_descriptor_count)
add_metric('file_descriptors', value: System.file_descriptor_count)
end
if Metrics.mri?
......@@ -69,7 +68,7 @@ module Gitlab
counts['Symbol'] = Symbol.all_symbols.length
counts.each do |name, count|
@metrics << Metric.new('object_counts', { count: count }, type: name)
add_metric('object_counts', { count: count }, type: name)
end
end
else
......@@ -91,7 +90,17 @@ module Gitlab
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
@metrics << Metric.new('gc_statistics', stats)
add_metric('gc_statistics', stats)
end
def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
@metrics << Metric.new("#{prefix}#{series}", values, tags)
end
def sidekiq?
Sidekiq.server?
end
end
end
......
......@@ -19,6 +19,7 @@ module Gitlab
values = values_for(event)
tags = tags_for(event)
current_transaction.increment(:view_duration, event.duration)
current_transaction.add_metric(SERIES, values, tags)
end
......
module Gitlab
module Metrics
module Subscribers
# Class for tracking raw SQL queries.
#
# Queries are obfuscated before being logged to ensure no private data is
# exposed via InfluxDB/Grafana.
# Class for tracking the total query duration of a transaction.
class ActiveRecord < ActiveSupport::Subscriber
attach_to :active_record
SERIES = 'sql_queries'
def sql(event)
return unless current_transaction
values = values_for(event)
tags = tags_for(event)
current_transaction.add_metric(SERIES, values, tags)
current_transaction.increment(:sql_duration, event.duration)
end
private
def values_for(event)
{ duration: event.duration }
end
def tags_for(event)
sql = ObfuscatedSQL.new(event.payload[:sql]).to_s
tags = { sql: sql }
file, line = Metrics.last_relative_application_frame
if file and line
tags[:file] = file
tags[:line] = line
end
tags
end
def current_transaction
Transaction.current
end
......
......@@ -4,15 +4,12 @@ module Gitlab
class Transaction
THREAD_KEY = :_gitlab_metrics_transaction
SERIES = 'transactions'
attr_reader :uuid, :tags
def self.current
Thread.current[THREAD_KEY]
end
# name - The name of this transaction as a String.
def initialize
@metrics = []
@uuid = SecureRandom.uuid
......@@ -20,7 +17,8 @@ module Gitlab
@started_at = nil
@finished_at = nil
@tags = {}
@values = Hash.new(0)
@tags = {}
end
def duration
......@@ -40,9 +38,14 @@ module Gitlab
end
def add_metric(series, values, tags = {})
tags = tags.merge(transaction_id: @uuid)
tags = tags.merge(transaction_id: @uuid)
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
@metrics << Metric.new("#{prefix}#{series}", values, tags)
end
@metrics << Metric.new(series, values, tags)
def increment(name, value)
@values[name] += value
end
def add_tag(key, value)
......@@ -55,12 +58,22 @@ module Gitlab
end
def track_self
add_metric(SERIES, { duration: duration }, @tags)
values = { duration: duration }
@values.each do |name, value|
values[name] = value
end
add_metric('transactions', values, @tags)
end
def submit
Metrics.submit_metrics(@metrics.map(&:to_hash))
end
def sidekiq?
Sidekiq.server?
end
end
end
end
......@@ -48,6 +48,9 @@ describe Gitlab::Metrics::Instrumentation do
allow(described_class).to receive(:transaction).
and_return(transaction)
expect(transaction).to receive(:increment).
with(:method_duration, a_kind_of(Numeric))
expect(transaction).to receive(:add_metric).
with(described_class::SERIES, an_instance_of(Hash),
method: 'Dummy.foo')
......@@ -102,6 +105,9 @@ describe Gitlab::Metrics::Instrumentation do
allow(described_class).to receive(:transaction).
and_return(transaction)
expect(transaction).to receive(:increment).
with(:method_duration, a_kind_of(Numeric))
expect(transaction).to receive(:add_metric).
with(described_class::SERIES, an_instance_of(Hash),
method: 'Dummy#bar')
......
......@@ -37,9 +37,6 @@ describe Gitlab::Metrics::Metric do
it 'includes the tags' do
expect(hash[:tags]).to be_an_instance_of(Hash)
expect(hash[:tags][:hostname]).to be_an_instance_of(String)
expect(hash[:tags][:process_type]).to be_an_instance_of(String)
end
it 'includes the values' do
......
require 'spec_helper'
describe Gitlab::Metrics::ObfuscatedSQL do
describe '#to_s' do
it 'replaces newlines with a space' do
sql = described_class.new("SELECT x\nFROM y")
expect(sql.to_s).to eq('SELECT x FROM y')
end
describe 'using single values' do
it 'replaces a single integer' do
sql = described_class.new('SELECT x FROM y WHERE a = 10')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces a single float' do
sql = described_class.new('SELECT x FROM y WHERE a = 10.5')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces a single quoted string' do
sql = described_class.new("SELECT x FROM y WHERE a = 'foo'")
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
if Gitlab::Database.mysql?
it 'replaces a double quoted string' do
sql = described_class.new('SELECT x FROM y WHERE a = "foo"')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
end
it 'replaces a single regular expression' do
sql = described_class.new('SELECT x FROM y WHERE a = /foo/')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces regular expressions using escaped slashes' do
sql = described_class.new('SELECT x FROM y WHERE a = /foo\/bar/')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
end
describe 'using consecutive values' do
it 'replaces multiple integers' do
sql = described_class.new('SELECT x FROM y WHERE z IN (10, 20, 30)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (3 values)')
end
it 'replaces multiple floats' do
sql = described_class.new('SELECT x FROM y WHERE z IN (1.5, 2.5, 3.5)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (3 values)')
end
it 'replaces multiple single quoted strings' do
sql = described_class.new("SELECT x FROM y WHERE z IN ('foo', 'bar')")
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
if Gitlab::Database.mysql?
it 'replaces multiple double quoted strings' do
sql = described_class.new('SELECT x FROM y WHERE z IN ("foo", "bar")')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
end
it 'replaces multiple regular expressions' do
sql = described_class.new('SELECT x FROM y WHERE z IN (/foo/, /bar/)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
end
if Gitlab::Database.postgresql?
it 'replaces double quotes' do
sql = described_class.new('SELECT "x" FROM "y"')
expect(sql.to_s).to eq('SELECT x FROM y')
end
end
end
end
......@@ -51,8 +51,8 @@ describe Gitlab::Metrics::Sampler do
expect(Gitlab::Metrics::System).to receive(:memory_usage).
and_return(9000)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('memory_usage', value: 9000).
expect(sampler).to receive(:add_metric).
with(/memory_usage/, value: 9000).
and_call_original
sampler.sample_memory_usage
......@@ -64,8 +64,8 @@ describe Gitlab::Metrics::Sampler do
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count).
and_return(4)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('file_descriptors', value: 4).
expect(sampler).to receive(:add_metric).
with(/file_descriptors/, value: 4).
and_call_original
sampler.sample_file_descriptors
......@@ -74,8 +74,8 @@ describe Gitlab::Metrics::Sampler do
describe '#sample_objects' do
it 'adds a metric containing the amount of allocated objects' do
expect(Gitlab::Metrics::Metric).to receive(:new).
with('object_counts', an_instance_of(Hash), an_instance_of(Hash)).
expect(sampler).to receive(:add_metric).
with(/object_counts/, an_instance_of(Hash), an_instance_of(Hash)).
at_least(:once).
and_call_original
......@@ -87,11 +87,33 @@ describe Gitlab::Metrics::Sampler do
it 'adds a metric containing garbage collection statistics' do
expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('gc_statistics', an_instance_of(Hash)).
expect(sampler).to receive(:add_metric).
with(/gc_statistics/, an_instance_of(Hash)).
and_call_original
sampler.sample_gc
end
end
describe '#add_metric' do
it 'prefixes the series name for a Rails process' do
expect(sampler).to receive(:sidekiq?).and_return(false)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('rails_cats', { value: 10 }, {}).
and_call_original
sampler.add_metric('cats', value: 10)
end
it 'prefixes the series name for a Sidekiq process' do
expect(sampler).to receive(:sidekiq?).and_return(true)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('sidekiq_cats', { value: 10 }, {}).
and_call_original
sampler.add_metric('cats', value: 10)
end
end
end
......@@ -28,6 +28,9 @@ describe Gitlab::Metrics::Subscribers::ActionView do
line: 4
}
expect(transaction).to receive(:increment).
with(:view_duration, 2.1)
expect(transaction).to receive(:add_metric).
with(described_class::SERIES, values, tags)
......
......@@ -2,31 +2,34 @@ require 'spec_helper'
describe Gitlab::Metrics::Subscribers::ActiveRecord do
let(:transaction) { Gitlab::Metrics::Transaction.new }
let(:subscriber) { described_class.new }
let(:subscriber) { described_class.new }
let(:event) do
double(:event, duration: 0.2,
payload: { sql: 'SELECT * FROM users WHERE id = 10' })
end
before do
allow(subscriber).to receive(:current_transaction).and_return(transaction)
describe '#sql' do
describe 'without a current transaction' do
it 'simply returns' do
expect_any_instance_of(Gitlab::Metrics::Transaction).
to_not receive(:increment)
allow(Gitlab::Metrics).to receive(:last_relative_application_frame).
and_return(['app/models/foo.rb', 4])
end
subscriber.sql(event)
end
end
describe '#sql' do
it 'tracks the execution of a SQL query' do
sql = 'SELECT * FROM users WHERE id = ?'
values = { duration: 0.2 }
tags = { sql: sql, file: 'app/models/foo.rb', line: 4 }
describe 'with a current transaction' do
it 'increments the :sql_duration value' do
expect(subscriber).to receive(:current_transaction).
at_least(:once).
and_return(transaction)
expect(transaction).to receive(:add_metric).
with(described_class::SERIES, values, tags)
expect(transaction).to receive(:increment).
with(:sql_duration, 0.2)
subscriber.sql(event)
subscriber.sql(event)
end
end
end
end
......@@ -32,12 +32,24 @@ describe Gitlab::Metrics::Transaction do
describe '#add_metric' do
it 'adds a metric tagged with the transaction UUID' do
expect(Gitlab::Metrics::Metric).to receive(:new).
with('foo', { number: 10 }, { transaction_id: transaction.uuid })
with('rails_foo', { number: 10 }, { transaction_id: transaction.uuid })
transaction.add_metric('foo', number: 10)
end
end
describe '#increment' do
it 'increments a counter' do
transaction.increment(:time, 1)
transaction.increment(:time, 2)
expect(transaction).to receive(:add_metric).
with('transactions', { duration: 0.0, time: 3 }, {})
transaction.track_self
end
end
describe '#add_tag' do
it 'adds a tag' do
transaction.add_tag(:foo, 'bar')
......@@ -58,7 +70,7 @@ describe Gitlab::Metrics::Transaction do
describe '#track_self' do
it 'adds a metric for the transaction itself' do
expect(transaction).to receive(:add_metric).
with(described_class::SERIES, { duration: transaction.duration }, {})
with('transactions', { duration: transaction.duration }, {})
transaction.track_self
end
......
require 'spec_helper'
describe Gitlab::Metrics do
describe '.pool_size' do
it 'returns a Fixnum' do
expect(described_class.pool_size).to be_an_instance_of(Fixnum)
end
end
describe '.timeout' do
it 'returns a Fixnum' do
expect(described_class.timeout).to be_an_instance_of(Fixnum)
describe '.settings' do
it 'returns a Hash' do
expect(described_class.settings).to be_an_instance_of(Hash)
end
end
......@@ -19,12 +13,6 @@ describe Gitlab::Metrics do
end
end
describe '.hostname' do
it 'returns a String containing the hostname' do
expect(described_class.hostname).to eq(Socket.gethostname)
end
end
describe '.last_relative_application_frame' do
it 'returns an Array containing a file path and line number' do
file, line = described_class.last_relative_application_frame
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment