Commit 99148939 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'metrics-tuning' into 'master'

Tuning of metrics data to store

This removes data we don't really need, as well as making sure we don't overload any cache stores or databases.

See merge request !2265
parents 525f8fb4 8de491a6
if Gitlab::Metrics.enabled? if Gitlab::Metrics.enabled?
require 'influxdb' require 'influxdb'
require 'socket'
require 'connection_pool' require 'connection_pool'
require 'method_source' require 'method_source'
......
...@@ -6,16 +6,21 @@ module Gitlab ...@@ -6,16 +6,21 @@ module Gitlab
METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s
PATH_REGEX = /^#{RAILS_ROOT}\/?/ PATH_REGEX = /^#{RAILS_ROOT}\/?/
def self.pool_size def self.settings
current_application_settings[:metrics_pool_size] || 16 @settings ||= {
end enabled: current_application_settings[:metrics_enabled],
pool_size: current_application_settings[:metrics_pool_size],
def self.timeout timeout: current_application_settings[:metrics_timeout],
current_application_settings[:metrics_timeout] || 10 method_call_threshold: current_application_settings[:metrics_method_call_threshold],
host: current_application_settings[:metrics_host],
username: current_application_settings[:metrics_username],
password: current_application_settings[:metrics_password],
port: current_application_settings[:metrics_port]
}
end end
def self.enabled? def self.enabled?
current_application_settings[:metrics_enabled] || false settings[:enabled] || false
end end
def self.mri? def self.mri?
...@@ -26,18 +31,13 @@ module Gitlab ...@@ -26,18 +31,13 @@ module Gitlab
# This is memoized since this method is called for every instrumented # This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows # method. Loading data from an external cache on every method call slows
# things down too much. # things down too much.
@method_call_threshold ||= @method_call_threshold ||= settings[:method_call_threshold]
(current_application_settings[:metrics_method_call_threshold] || 10)
end end
def self.pool def self.pool
@pool @pool
end end
def self.hostname
@hostname
end
# Returns a relative path and line number based on the last application call # Returns a relative path and line number based on the last application call
# frame. # frame.
def self.last_relative_application_frame def self.last_relative_application_frame
...@@ -85,16 +85,14 @@ module Gitlab ...@@ -85,16 +85,14 @@ module Gitlab
value.to_s.gsub('=', '\\=') value.to_s.gsub('=', '\\=')
end end
@hostname = Socket.gethostname
# When enabled this should be set before being used as the usual pattern # When enabled this should be set before being used as the usual pattern
# "@foo ||= bar" is _not_ thread-safe. # "@foo ||= bar" is _not_ thread-safe.
if enabled? if enabled?
@pool = ConnectionPool.new(size: pool_size, timeout: timeout) do @pool = ConnectionPool.new(size: settings[:pool_size], timeout: settings[:timeout]) do
host = current_application_settings[:metrics_host] host = settings[:host]
user = current_application_settings[:metrics_username] user = settings[:username]
pw = current_application_settings[:metrics_password] pw = settings[:password]
port = current_application_settings[:metrics_port] port = settings[:port]
InfluxDB::Client. InfluxDB::Client.
new(udp: { host: host, port: port }, username: user, password: pw) new(udp: { host: host, port: port }, username: user, password: pw)
......
...@@ -123,6 +123,8 @@ module Gitlab ...@@ -123,6 +123,8 @@ module Gitlab
duration = (Time.now - start) * 1000.0 duration = (Time.now - start) * 1000.0
if duration >= Gitlab::Metrics.method_call_threshold if duration >= Gitlab::Metrics.method_call_threshold
trans.increment(:method_duration, duration)
trans.add_metric(Gitlab::Metrics::Instrumentation::SERIES, trans.add_metric(Gitlab::Metrics::Instrumentation::SERIES,
{ duration: duration }, { duration: duration },
method: #{label.inspect}) method: #{label.inspect})
......
...@@ -18,10 +18,7 @@ module Gitlab ...@@ -18,10 +18,7 @@ module Gitlab
def to_hash def to_hash
{ {
series: @series, series: @series,
tags: @tags.merge( tags: @tags,
hostname: Metrics.hostname,
process_type: Sidekiq.server? ? 'sidekiq' : 'rails'
),
values: @values, values: @values,
timestamp: @created_at.to_i * 1_000_000_000 timestamp: @created_at.to_i * 1_000_000_000
} }
......
module Gitlab
module Metrics
# Class for producing SQL queries with sensitive data stripped out.
class ObfuscatedSQL
REPLACEMENT = /
\d+(\.\d+)? # integers, floats
| '.+?' # single quoted strings
| \/.+?(?<!\\)\/ # regexps (including escaped slashes)
/x
MYSQL_REPLACEMENTS = /
".+?" # double quoted strings
/x
# Regex to replace consecutive placeholders with a single one indicating
# the length. This can be useful when a "IN" statement uses thousands of
# IDs (storing this would just be a waste of space).
CONSECUTIVE = /(\?(\s*,\s*)?){2,}/
# sql - The raw SQL query as a String.
def initialize(sql)
@sql = sql
end
# Returns a new, obfuscated SQL query.
def to_s
regex = REPLACEMENT
if Gitlab::Database.mysql?
regex = Regexp.union(regex, MYSQL_REPLACEMENTS)
end
sql = @sql.gsub(regex, '?').gsub(CONSECUTIVE) do |match|
"#{match.count(',') + 1} values"
end
# InfluxDB escapes double quotes upon output, so lets get rid of them
# whenever we can.
if Gitlab::Database.postgresql?
sql = sql.delete('"')
end
sql.tr("\n", ' ')
end
end
end
end
...@@ -50,12 +50,11 @@ module Gitlab ...@@ -50,12 +50,11 @@ module Gitlab
end end
def sample_memory_usage def sample_memory_usage
@metrics << Metric.new('memory_usage', value: System.memory_usage) add_metric('memory_usage', value: System.memory_usage)
end end
def sample_file_descriptors def sample_file_descriptors
@metrics << Metric. add_metric('file_descriptors', value: System.file_descriptor_count)
new('file_descriptors', value: System.file_descriptor_count)
end end
if Metrics.mri? if Metrics.mri?
...@@ -69,7 +68,7 @@ module Gitlab ...@@ -69,7 +68,7 @@ module Gitlab
counts['Symbol'] = Symbol.all_symbols.length counts['Symbol'] = Symbol.all_symbols.length
counts.each do |name, count| counts.each do |name, count|
@metrics << Metric.new('object_counts', { count: count }, type: name) add_metric('object_counts', { count: count }, type: name)
end end
end end
else else
...@@ -91,7 +90,17 @@ module Gitlab ...@@ -91,7 +90,17 @@ module Gitlab
stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count] stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
@metrics << Metric.new('gc_statistics', stats) add_metric('gc_statistics', stats)
end
def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
@metrics << Metric.new("#{prefix}#{series}", values, tags)
end
def sidekiq?
Sidekiq.server?
end end
end end
end end
......
...@@ -19,6 +19,7 @@ module Gitlab ...@@ -19,6 +19,7 @@ module Gitlab
values = values_for(event) values = values_for(event)
tags = tags_for(event) tags = tags_for(event)
current_transaction.increment(:view_duration, event.duration)
current_transaction.add_metric(SERIES, values, tags) current_transaction.add_metric(SERIES, values, tags)
end end
......
module Gitlab module Gitlab
module Metrics module Metrics
module Subscribers module Subscribers
# Class for tracking raw SQL queries. # Class for tracking the total query duration of a transaction.
#
# Queries are obfuscated before being logged to ensure no private data is
# exposed via InfluxDB/Grafana.
class ActiveRecord < ActiveSupport::Subscriber class ActiveRecord < ActiveSupport::Subscriber
attach_to :active_record attach_to :active_record
SERIES = 'sql_queries'
def sql(event) def sql(event)
return unless current_transaction return unless current_transaction
values = values_for(event) current_transaction.increment(:sql_duration, event.duration)
tags = tags_for(event)
current_transaction.add_metric(SERIES, values, tags)
end end
private private
def values_for(event)
{ duration: event.duration }
end
def tags_for(event)
sql = ObfuscatedSQL.new(event.payload[:sql]).to_s
tags = { sql: sql }
file, line = Metrics.last_relative_application_frame
if file and line
tags[:file] = file
tags[:line] = line
end
tags
end
def current_transaction def current_transaction
Transaction.current Transaction.current
end end
......
...@@ -4,15 +4,12 @@ module Gitlab ...@@ -4,15 +4,12 @@ module Gitlab
class Transaction class Transaction
THREAD_KEY = :_gitlab_metrics_transaction THREAD_KEY = :_gitlab_metrics_transaction
SERIES = 'transactions'
attr_reader :uuid, :tags attr_reader :uuid, :tags
def self.current def self.current
Thread.current[THREAD_KEY] Thread.current[THREAD_KEY]
end end
# name - The name of this transaction as a String.
def initialize def initialize
@metrics = [] @metrics = []
@uuid = SecureRandom.uuid @uuid = SecureRandom.uuid
...@@ -20,6 +17,7 @@ module Gitlab ...@@ -20,6 +17,7 @@ module Gitlab
@started_at = nil @started_at = nil
@finished_at = nil @finished_at = nil
@values = Hash.new(0)
@tags = {} @tags = {}
end end
...@@ -41,8 +39,13 @@ module Gitlab ...@@ -41,8 +39,13 @@ module Gitlab
def add_metric(series, values, tags = {}) def add_metric(series, values, tags = {})
tags = tags.merge(transaction_id: @uuid) tags = tags.merge(transaction_id: @uuid)
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
@metrics << Metric.new("#{prefix}#{series}", values, tags)
end
@metrics << Metric.new(series, values, tags) def increment(name, value)
@values[name] += value
end end
def add_tag(key, value) def add_tag(key, value)
...@@ -55,12 +58,22 @@ module Gitlab ...@@ -55,12 +58,22 @@ module Gitlab
end end
def track_self def track_self
add_metric(SERIES, { duration: duration }, @tags) values = { duration: duration }
@values.each do |name, value|
values[name] = value
end
add_metric('transactions', values, @tags)
end end
def submit def submit
Metrics.submit_metrics(@metrics.map(&:to_hash)) Metrics.submit_metrics(@metrics.map(&:to_hash))
end end
def sidekiq?
Sidekiq.server?
end
end end
end end
end end
...@@ -48,6 +48,9 @@ describe Gitlab::Metrics::Instrumentation do ...@@ -48,6 +48,9 @@ describe Gitlab::Metrics::Instrumentation do
allow(described_class).to receive(:transaction). allow(described_class).to receive(:transaction).
and_return(transaction) and_return(transaction)
expect(transaction).to receive(:increment).
with(:method_duration, a_kind_of(Numeric))
expect(transaction).to receive(:add_metric). expect(transaction).to receive(:add_metric).
with(described_class::SERIES, an_instance_of(Hash), with(described_class::SERIES, an_instance_of(Hash),
method: 'Dummy.foo') method: 'Dummy.foo')
...@@ -102,6 +105,9 @@ describe Gitlab::Metrics::Instrumentation do ...@@ -102,6 +105,9 @@ describe Gitlab::Metrics::Instrumentation do
allow(described_class).to receive(:transaction). allow(described_class).to receive(:transaction).
and_return(transaction) and_return(transaction)
expect(transaction).to receive(:increment).
with(:method_duration, a_kind_of(Numeric))
expect(transaction).to receive(:add_metric). expect(transaction).to receive(:add_metric).
with(described_class::SERIES, an_instance_of(Hash), with(described_class::SERIES, an_instance_of(Hash),
method: 'Dummy#bar') method: 'Dummy#bar')
......
...@@ -37,9 +37,6 @@ describe Gitlab::Metrics::Metric do ...@@ -37,9 +37,6 @@ describe Gitlab::Metrics::Metric do
it 'includes the tags' do it 'includes the tags' do
expect(hash[:tags]).to be_an_instance_of(Hash) expect(hash[:tags]).to be_an_instance_of(Hash)
expect(hash[:tags][:hostname]).to be_an_instance_of(String)
expect(hash[:tags][:process_type]).to be_an_instance_of(String)
end end
it 'includes the values' do it 'includes the values' do
......
require 'spec_helper'
describe Gitlab::Metrics::ObfuscatedSQL do
describe '#to_s' do
it 'replaces newlines with a space' do
sql = described_class.new("SELECT x\nFROM y")
expect(sql.to_s).to eq('SELECT x FROM y')
end
describe 'using single values' do
it 'replaces a single integer' do
sql = described_class.new('SELECT x FROM y WHERE a = 10')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces a single float' do
sql = described_class.new('SELECT x FROM y WHERE a = 10.5')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces a single quoted string' do
sql = described_class.new("SELECT x FROM y WHERE a = 'foo'")
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
if Gitlab::Database.mysql?
it 'replaces a double quoted string' do
sql = described_class.new('SELECT x FROM y WHERE a = "foo"')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
end
it 'replaces a single regular expression' do
sql = described_class.new('SELECT x FROM y WHERE a = /foo/')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
it 'replaces regular expressions using escaped slashes' do
sql = described_class.new('SELECT x FROM y WHERE a = /foo\/bar/')
expect(sql.to_s).to eq('SELECT x FROM y WHERE a = ?')
end
end
describe 'using consecutive values' do
it 'replaces multiple integers' do
sql = described_class.new('SELECT x FROM y WHERE z IN (10, 20, 30)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (3 values)')
end
it 'replaces multiple floats' do
sql = described_class.new('SELECT x FROM y WHERE z IN (1.5, 2.5, 3.5)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (3 values)')
end
it 'replaces multiple single quoted strings' do
sql = described_class.new("SELECT x FROM y WHERE z IN ('foo', 'bar')")
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
if Gitlab::Database.mysql?
it 'replaces multiple double quoted strings' do
sql = described_class.new('SELECT x FROM y WHERE z IN ("foo", "bar")')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
end
it 'replaces multiple regular expressions' do
sql = described_class.new('SELECT x FROM y WHERE z IN (/foo/, /bar/)')
expect(sql.to_s).to eq('SELECT x FROM y WHERE z IN (2 values)')
end
end
if Gitlab::Database.postgresql?
it 'replaces double quotes' do
sql = described_class.new('SELECT "x" FROM "y"')
expect(sql.to_s).to eq('SELECT x FROM y')
end
end
end
end
...@@ -51,8 +51,8 @@ describe Gitlab::Metrics::Sampler do ...@@ -51,8 +51,8 @@ describe Gitlab::Metrics::Sampler do
expect(Gitlab::Metrics::System).to receive(:memory_usage). expect(Gitlab::Metrics::System).to receive(:memory_usage).
and_return(9000) and_return(9000)
expect(Gitlab::Metrics::Metric).to receive(:new). expect(sampler).to receive(:add_metric).
with('memory_usage', value: 9000). with(/memory_usage/, value: 9000).
and_call_original and_call_original
sampler.sample_memory_usage sampler.sample_memory_usage
...@@ -64,8 +64,8 @@ describe Gitlab::Metrics::Sampler do ...@@ -64,8 +64,8 @@ describe Gitlab::Metrics::Sampler do
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count). expect(Gitlab::Metrics::System).to receive(:file_descriptor_count).
and_return(4) and_return(4)
expect(Gitlab::Metrics::Metric).to receive(:new). expect(sampler).to receive(:add_metric).
with('file_descriptors', value: 4). with(/file_descriptors/, value: 4).
and_call_original and_call_original
sampler.sample_file_descriptors sampler.sample_file_descriptors
...@@ -74,8 +74,8 @@ describe Gitlab::Metrics::Sampler do ...@@ -74,8 +74,8 @@ describe Gitlab::Metrics::Sampler do
describe '#sample_objects' do describe '#sample_objects' do
it 'adds a metric containing the amount of allocated objects' do it 'adds a metric containing the amount of allocated objects' do
expect(Gitlab::Metrics::Metric).to receive(:new). expect(sampler).to receive(:add_metric).
with('object_counts', an_instance_of(Hash), an_instance_of(Hash)). with(/object_counts/, an_instance_of(Hash), an_instance_of(Hash)).
at_least(:once). at_least(:once).
and_call_original and_call_original
...@@ -87,11 +87,33 @@ describe Gitlab::Metrics::Sampler do ...@@ -87,11 +87,33 @@ describe Gitlab::Metrics::Sampler do
it 'adds a metric containing garbage collection statistics' do it 'adds a metric containing garbage collection statistics' do
expect(GC::Profiler).to receive(:total_time).and_return(0.24) expect(GC::Profiler).to receive(:total_time).and_return(0.24)
expect(Gitlab::Metrics::Metric).to receive(:new). expect(sampler).to receive(:add_metric).
with('gc_statistics', an_instance_of(Hash)). with(/gc_statistics/, an_instance_of(Hash)).
and_call_original and_call_original
sampler.sample_gc sampler.sample_gc
end end
end end
describe '#add_metric' do
it 'prefixes the series name for a Rails process' do
expect(sampler).to receive(:sidekiq?).and_return(false)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('rails_cats', { value: 10 }, {}).
and_call_original
sampler.add_metric('cats', value: 10)
end
it 'prefixes the series name for a Sidekiq process' do
expect(sampler).to receive(:sidekiq?).and_return(true)
expect(Gitlab::Metrics::Metric).to receive(:new).
with('sidekiq_cats', { value: 10 }, {}).
and_call_original
sampler.add_metric('cats', value: 10)
end
end
end end
...@@ -28,6 +28,9 @@ describe Gitlab::Metrics::Subscribers::ActionView do ...@@ -28,6 +28,9 @@ describe Gitlab::Metrics::Subscribers::ActionView do
line: 4 line: 4
} }
expect(transaction).to receive(:increment).
with(:view_duration, 2.1)
expect(transaction).to receive(:add_metric). expect(transaction).to receive(:add_metric).
with(described_class::SERIES, values, tags) with(described_class::SERIES, values, tags)
......
...@@ -2,7 +2,6 @@ require 'spec_helper' ...@@ -2,7 +2,6 @@ require 'spec_helper'
describe Gitlab::Metrics::Subscribers::ActiveRecord do describe Gitlab::Metrics::Subscribers::ActiveRecord do
let(:transaction) { Gitlab::Metrics::Transaction.new } let(:transaction) { Gitlab::Metrics::Transaction.new }
let(:subscriber) { described_class.new } let(:subscriber) { described_class.new }
let(:event) do let(:event) do
...@@ -10,23 +9,27 @@ describe Gitlab::Metrics::Subscribers::ActiveRecord do ...@@ -10,23 +9,27 @@ describe Gitlab::Metrics::Subscribers::ActiveRecord do
payload: { sql: 'SELECT * FROM users WHERE id = 10' }) payload: { sql: 'SELECT * FROM users WHERE id = 10' })
end end
before do describe '#sql' do
allow(subscriber).to receive(:current_transaction).and_return(transaction) describe 'without a current transaction' do
it 'simply returns' do
expect_any_instance_of(Gitlab::Metrics::Transaction).
to_not receive(:increment)
allow(Gitlab::Metrics).to receive(:last_relative_application_frame). subscriber.sql(event)
and_return(['app/models/foo.rb', 4]) end
end end
describe '#sql' do describe 'with a current transaction' do
it 'tracks the execution of a SQL query' do it 'increments the :sql_duration value' do
sql = 'SELECT * FROM users WHERE id = ?' expect(subscriber).to receive(:current_transaction).
values = { duration: 0.2 } at_least(:once).
tags = { sql: sql, file: 'app/models/foo.rb', line: 4 } and_return(transaction)
expect(transaction).to receive(:add_metric). expect(transaction).to receive(:increment).
with(described_class::SERIES, values, tags) with(:sql_duration, 0.2)
subscriber.sql(event) subscriber.sql(event)
end end
end end
end
end end
...@@ -32,12 +32,24 @@ describe Gitlab::Metrics::Transaction do ...@@ -32,12 +32,24 @@ describe Gitlab::Metrics::Transaction do
describe '#add_metric' do describe '#add_metric' do
it 'adds a metric tagged with the transaction UUID' do it 'adds a metric tagged with the transaction UUID' do
expect(Gitlab::Metrics::Metric).to receive(:new). expect(Gitlab::Metrics::Metric).to receive(:new).
with('foo', { number: 10 }, { transaction_id: transaction.uuid }) with('rails_foo', { number: 10 }, { transaction_id: transaction.uuid })
transaction.add_metric('foo', number: 10) transaction.add_metric('foo', number: 10)
end end
end end
describe '#increment' do
it 'increments a counter' do
transaction.increment(:time, 1)
transaction.increment(:time, 2)
expect(transaction).to receive(:add_metric).
with('transactions', { duration: 0.0, time: 3 }, {})
transaction.track_self
end
end
describe '#add_tag' do describe '#add_tag' do
it 'adds a tag' do it 'adds a tag' do
transaction.add_tag(:foo, 'bar') transaction.add_tag(:foo, 'bar')
...@@ -58,7 +70,7 @@ describe Gitlab::Metrics::Transaction do ...@@ -58,7 +70,7 @@ describe Gitlab::Metrics::Transaction do
describe '#track_self' do describe '#track_self' do
it 'adds a metric for the transaction itself' do it 'adds a metric for the transaction itself' do
expect(transaction).to receive(:add_metric). expect(transaction).to receive(:add_metric).
with(described_class::SERIES, { duration: transaction.duration }, {}) with('transactions', { duration: transaction.duration }, {})
transaction.track_self transaction.track_self
end end
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::Metrics do describe Gitlab::Metrics do
describe '.pool_size' do describe '.settings' do
it 'returns a Fixnum' do it 'returns a Hash' do
expect(described_class.pool_size).to be_an_instance_of(Fixnum) expect(described_class.settings).to be_an_instance_of(Hash)
end
end
describe '.timeout' do
it 'returns a Fixnum' do
expect(described_class.timeout).to be_an_instance_of(Fixnum)
end end
end end
...@@ -19,12 +13,6 @@ describe Gitlab::Metrics do ...@@ -19,12 +13,6 @@ describe Gitlab::Metrics do
end end
end end
describe '.hostname' do
it 'returns a String containing the hostname' do
expect(described_class.hostname).to eq(Socket.gethostname)
end
end
describe '.last_relative_application_frame' do describe '.last_relative_application_frame' do
it 'returns an Array containing a file path and line number' do it 'returns an Array containing a file path and line number' do
file, line = described_class.last_relative_application_frame file, line = described_class.last_relative_application_frame
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment