Commit 5e319f52 authored by Mikolaj Wawrzyniak's avatar Mikolaj Wawrzyniak

Create Agggregated Metrics Sources

To house all source related operations for Aggregated Metrics
feautre new Sources class has to be added.
parent 7bf372c1
---
name: database_sourced_aggregated_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52784
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300411
milestone: '13.9'
type: development
group: group::product intelligence
default_enabled: false
...@@ -451,10 +451,12 @@ module EE ...@@ -451,10 +451,12 @@ module EE
pipelines_with_secure_jobs[metric_name.to_sym] = pipelines_with_secure_jobs[metric_name.to_sym] =
if start_id && finish_id if start_id && finish_id
estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result| estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result|
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result })) ::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: result }))
end end
else else
save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new })) ::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
.save_aggregated_metrics(**aggregated_metrics_params.merge({ data: ::Gitlab::Database::PostgresHll::Buckets.new }))
0 0
end end
end end
......
...@@ -8,16 +8,26 @@ module Gitlab ...@@ -8,16 +8,26 @@ module Gitlab
INTERSECTION_OF_AGGREGATED_METRICS = 'AND' INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml') AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError) AggregatedMetricError = Class.new(StandardError)
UnknownAggregationOperator = Class.new(AggregatedMetricError)
UnknownAggregationSource = Class.new(AggregatedMetricError)
DATABASE_SOURCE = 'database'
REDIS_SOURCE = 'redis'
SOURCES = {
DATABASE_SOURCE => Sources::PostgresHll,
REDIS_SOURCE => Sources::RedisHll
}.freeze
class Aggregate class Aggregate
delegate :calculate_events_union, delegate :weekly_time_range,
:weekly_time_range,
:monthly_time_range, :monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize def initialize(recorded_at)
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH) @aggregated_metrics = load_metrics(AGGREGATED_METRICS_PATH)
@recorded_at = recorded_at
end end
def monthly_data def monthly_data
...@@ -30,35 +40,49 @@ module Gitlab ...@@ -30,35 +40,49 @@ module Gitlab
private private
attr_accessor :aggregated_metrics attr_accessor :aggregated_metrics, :recorded_at
def aggregated_metrics_data(start_date:, end_date:) def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data| aggregated_metrics.each_with_object({}) do |aggregation, data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development) next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date) case aggregation[:source]
when REDIS_SOURCE
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
when DATABASE_SOURCE
next unless Feature.enabled?('database_sourced_aggregated_metrics', default_enabled: false, type: :development)
data[aggregation[:name]] = calculate_count_for_aggregation(aggregation: aggregation, start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationSource.new("Aggregation source: '#{aggregation[:source]}' must be included in #{SOURCES.keys}"))
data[aggregation[:name]] = Gitlab::Utils::UsageData::FALLBACK
end end
end end
end
def calculate_count_for_aggregation(aggregation:, start_date:, end_date:)
source = SOURCES[aggregation[:source]]
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator] case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date) source.calculate_metrics_union(metric_names: aggregation[:events], start_date: start_date, end_date: end_date, recorded_at: recorded_at)
when INTERSECTION_OF_AGGREGATED_METRICS when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date) calculate_metrics_intersections(source: source, metric_names: aggregation[:events], start_date: start_date, end_date: end_date)
else else
Gitlab::ErrorTracking Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")) .track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK Gitlab::Utils::UsageData::FALLBACK
end end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError, AggregatedMetricError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error) Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK Gitlab::Utils::UsageData::FALLBACK
end end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle # calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391 # this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({})) def calculate_metrics_intersections(source:, metric_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle # calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) => # |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C| # |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
...@@ -66,12 +90,12 @@ module Gitlab ...@@ -66,12 +90,12 @@ module Gitlab
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D| # |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... # calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache) subset_powers_data = subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D| # calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin power_of_union_of_all_metrics = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \ subset_powers_cache[metric_names.size][metric_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date) source.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate, # in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
...@@ -86,7 +110,7 @@ module Gitlab ...@@ -86,7 +110,7 @@ module Gitlab
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even) sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D| # add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events) sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_metrics : -power_of_union_of_all_metrics)
end end
def sum_subset_powers(subset_powers_data, subset_powers_size_even) def sum_subset_powers(subset_powers_data, subset_powers_size_even)
...@@ -97,29 +121,29 @@ module Gitlab ...@@ -97,29 +121,29 @@ module Gitlab
(subset_powers_size_even ? -1 : 1) * sum_without_sign (subset_powers_size_even ? -1 : 1) * sum_without_sign
end end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache) def subsets_intersection_powers(source, metric_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1)) subset_sizes = (1...metric_names.size)
subset_sizes.map do |subset_size| subset_sizes.map do |subset_size|
if subset_size > 1 if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|) # calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset| metric_names.combination(subset_size).sum do |metrics_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \ subset_powers_cache[subset_size][metrics_subset.join('_&_')] ||=
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache) calculate_metrics_intersections(source: source, metric_names: metrics_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end end
else else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ... # calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event| metric_names.sum do |metric|
subset_powers_cache[subset_size][event] ||= \ subset_powers_cache[subset_size][metric] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date) source.calculate_metrics_union(metric_names: metric, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end end
end end
end end
end end
def load_events(wildcard) def load_metrics(wildcard)
Dir[wildcard].each_with_object([]) do |path, events| Dir[wildcard].each_with_object([]) do |path, metrics|
events.push(*load_yaml_from_path(path)) metrics.push(*load_yaml_from_path(path))
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
class PostgresHll
class << self
def calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at:)
time_period = start_date && end_date ? (start_date..end_date) : nil
Array(metric_names).each_with_object(Gitlab::Database::PostgresHll::Buckets.new) do |event, buckets|
json = read_aggregated_metric(metric_name: event, time_period: time_period, recorded_at: recorded_at)
raise UnionNotAvailable, "Union data not available for #{metric_names}" unless json
buckets.merge_hash!(Gitlab::Json.parse(json))
end.estimated_distinct_count
end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# Usage Ping report generation for gitlab.com is very long running process
# to make sure that saved keys are available at the end of report generation process
# lets use triple max generation time
keys_expiration = ::Gitlab::UsageData::MAX_GENERATION_TIME_FOR_SAAS * 3
Gitlab::Redis::SharedState.with do |redis|
redis.set(
redis_key(metric_name: metric_name, time_period: time_period&.values&.first, recorded_at: recorded_at_timestamp),
data.to_json,
ex: keys_expiration
)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
private
def read_aggregated_metric(metric_name:, time_period:, recorded_at:)
Gitlab::Redis::SharedState.with do |redis|
redis.get(redis_key(metric_name: metric_name, time_period: time_period, recorded_at: recorded_at))
end
end
def redis_key(metric_name:, time_period:, recorded_at:)
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
"#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at.to_i}"
end
def time_period_to_human_name(time_period)
return Gitlab::Utils::UsageData::ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
start_date = time_period.first.to_date
end_date = time_period.last.to_date
if (end_date - start_date).to_i > 7
Gitlab::Utils::UsageData::MONTHLY_PERIOD_HUMAN_NAME
else
Gitlab::Utils::UsageData::WEEKLY_PERIOD_HUMAN_NAME
end
end
end
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
module Sources
UnionNotAvailable = Class.new(AggregatedMetricError)
class RedisHll
def self.calculate_metrics_union(metric_names:, start_date:, end_date:, recorded_at: nil)
union = Gitlab::UsageDataCounters::HLLRedisCounter
.calculate_events_union(event_names: metric_names, start_date: start_date, end_date: end_date)
return union if union >= 0
raise UnionNotAvailable, "Union data not available for #{metric_names}"
end
end
end
end
end
end
end
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
module Gitlab module Gitlab
class UsageData class UsageData
DEPRECATED_VALUE = -1000 DEPRECATED_VALUE = -1000
MAX_GENERATION_TIME_FOR_SAAS = 40.hours
CE_MEMOIZED_VALUES = %i( CE_MEMOIZED_VALUES = %i(
issue_minimum_id issue_minimum_id
...@@ -754,7 +755,7 @@ module Gitlab ...@@ -754,7 +755,7 @@ module Gitlab
private private
def aggregated_metrics def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new @aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new(recorded_at)
end end
def event_monthly_active_users(date_range) def event_monthly_active_users(date_range)
......
...@@ -4,21 +4,28 @@ ...@@ -4,21 +4,28 @@
# - "AND": counts unique elements that were observed triggering all of following events # - "AND": counts unique elements that were observed triggering all of following events
# events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes # events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes
# see from lib/gitlab/usage_data_counters/known_events/ for the list of valid events. # see from lib/gitlab/usage_data_counters/known_events/ for the list of valid events.
# source: defines which datasource will be used to locate events that should be included in aggregated metric. Valid values are:
# - database
# - redis
# feature_flag: name of development feature flag that will be checked before metrics aggregation is performed. # feature_flag: name of development feature flag that will be checked before metrics aggregation is performed.
# Corresponding feature flag should have `default_enabled` attribute set to `false`. # Corresponding feature flag should have `default_enabled` attribute set to `false`.
# This attribute is OPTIONAL and can be omitted, when `feature_flag` is missing no feature flag will be checked. # This attribute is OPTIONAL and can be omitted, when `feature_flag` is missing no feature flag will be checked.
--- ---
- name: compliance_features_track_unique_visits_union - name: compliance_features_track_unique_visits_union
operator: OR operator: OR
source: redis
events: ['g_compliance_audit_events', 'g_compliance_dashboard', 'i_compliance_audit_events', 'a_compliance_audit_events_api', 'i_compliance_credential_inventory'] events: ['g_compliance_audit_events', 'g_compliance_dashboard', 'i_compliance_audit_events', 'a_compliance_audit_events_api', 'i_compliance_credential_inventory']
- name: product_analytics_test_metrics_union - name: product_analytics_test_metrics_union
operator: OR operator: OR
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid'] events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: product_analytics_test_metrics_intersection - name: product_analytics_test_metrics_intersection
operator: AND operator: AND
source: redis
events: ['i_search_total', 'i_search_advanced', 'i_search_paid'] events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- name: incident_management_alerts_total_unique_counts - name: incident_management_alerts_total_unique_counts
operator: OR operator: OR
source: redis
events: [ events: [
'incident_management_alert_status_changed', 'incident_management_alert_status_changed',
'incident_management_alert_assigned', 'incident_management_alert_assigned',
...@@ -27,6 +34,7 @@ ...@@ -27,6 +34,7 @@
] ]
- name: incident_management_incidents_total_unique_counts - name: incident_management_incidents_total_unique_counts
operator: OR operator: OR
source: redis
events: [ events: [
'incident_management_incident_created', 'incident_management_incident_created',
'incident_management_incident_reopened', 'incident_management_incident_reopened',
......
...@@ -80,27 +80,6 @@ module Gitlab ...@@ -80,27 +80,6 @@ module Gitlab
DISTRIBUTED_HLL_FALLBACK DISTRIBUTED_HLL_FALLBACK
end end
def save_aggregated_metrics(metric_name:, time_period:, recorded_at_timestamp:, data:)
unless data.is_a? ::Gitlab::Database::PostgresHll::Buckets
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(StandardError.new("Unsupported data type: #{data.class}"))
return
end
# the longest recorded usage ping generation time for gitlab.com
# was below 40 hours, there is added error margin of 20 h
usage_ping_generation_period = 80.hours
# add timestamp at the end of the key to avoid stale keys if
# usage ping job is retried
redis_key = "#{metric_name}_#{time_period_to_human_name(time_period)}-#{recorded_at_timestamp}"
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key, data.to_json, ex: usage_ping_generation_period)
end
rescue ::Redis::CommandError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
end
def sum(relation, column, batch_size: nil, start: nil, finish: nil) def sum(relation, column, batch_size: nil, start: nil, finish: nil)
Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish) Gitlab::Database::BatchCount.batch_sum(relation, column, batch_size: batch_size, start: start, finish: finish)
rescue ActiveRecord::StatementInvalid rescue ActiveRecord::StatementInvalid
...@@ -152,20 +131,6 @@ module Gitlab ...@@ -152,20 +131,6 @@ module Gitlab
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values) Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
end end
def time_period_to_human_name(time_period)
return ALL_TIME_PERIOD_HUMAN_NAME if time_period.blank?
date_range = time_period.values[0]
start_date = date_range.first.to_date
end_date = date_range.last.to_date
if (end_date - start_date).to_i > 7
MONTHLY_PERIOD_HUMAN_NAME
else
WEEKLY_PERIOD_HUMAN_NAME
end
end
private private
def prometheus_client(verify:) def prometheus_client(verify:)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll, :clean_gitlab_redis_shared_state do
let_it_be(:start_date) { 7.days.ago }
let_it_be(:end_date) { Date.current }
let_it_be(:recorded_at) { Time.current }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
let(:metric_1) { 'metric_1' }
let(:metric_2) { 'metric_2' }
let(:metric_names) { [metric_1, metric_2] }
describe '.calculate_events_union' do
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: metric_names, start_date: start_date, end_date: end_date, recorded_at: recorded_at)
end
before do
[
{
metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1)
},
{
metric_name: metric_2,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: ::Gitlab::Database::PostgresHll::Buckets.new(10 => 1, 56 => 1)
}
].each do |params|
described_class.save_aggregated_metrics(**params)
end
end
it 'returns the number of unique events in the union of all metrics' do
expect(calculate_metrics_union.round(2)).to eq(3.12)
end
context 'when there is no aggregated data saved' do
let(:metric_names) { [metric_1, 'i do not have any records'] }
it 'raises error when union data is missing' do
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
context 'when there is only one metric defined as aggregated' do
let(:metric_names) { [metric_1] }
it 'returns the number of unique events for that metric' do
expect(calculate_metrics_union.round(2)).to eq(2.08)
end
end
end
describe '.save_aggregated_metrics' do
subject(:save_aggregated_metrics) do
described_class.save_aggregated_metrics(metric_name: metric_1,
time_period: time_period,
recorded_at_timestamp: recorded_at,
data: data)
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_weekly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
context 'with monthly key' do
let_it_be(:start_date) { 4.weeks.ago }
let_it_be(:time_period) { { created_at: (start_date..end_date) } }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_monthly-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'with all_time key' do
let_it_be(:time_period) { nil }
it 'persists serialized data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_1}_all_time-#{recorded_at.to_i}", '{"141":1,"56":1}', ex: 120.hours)
end
save_aggregated_metrics
end
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { save_aggregated_metrics }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { save_aggregated_metrics }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
save_aggregated_metrics
end
end
it 'raises error for development environment' do
expect { save_aggregated_metrics }.to raise_error /Unsupported data type/
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Sources::RedisHll do
describe '.calculate_events_union' do
let(:event_names) { %w[event_a event_b] }
let(:start_date) { 7.days.ago }
let(:end_date) { Date.current }
subject(:calculate_metrics_union) do
described_class.calculate_metrics_union(metric_names: event_names, start_date: start_date, end_date: end_date, recorded_at: nil)
end
it 'calls Gitlab::UsageDataCounters::HLLRedisCounter.calculate_events_union' do
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event_names, start_date: start_date, end_date: end_date)
.and_return(5)
calculate_metrics_union
end
it 'prevents from using fallback value as valid union result' do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_return(-1)
expect { calculate_metrics_union }.to raise_error Gitlab::Usage::Metrics::Aggregates::Sources::UnionNotAvailable
end
end
end
...@@ -13,18 +13,32 @@ RSpec.describe 'aggregated metrics' do ...@@ -13,18 +13,32 @@ RSpec.describe 'aggregated metrics' do
end end
end end
RSpec::Matchers.define :has_known_source do
match do |aggregate|
Gitlab::Usage::Metrics::Aggregates::SOURCES.include?(aggregate[:source])
end
failure_message do |aggregate|
"Aggregate with name: `#{aggregate[:name]}` uses not allowed source `#{aggregate[:source]}`"
end
end
let_it_be(:known_events) do let_it_be(:known_events) do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end end
Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics| Gitlab::Usage::Metrics::Aggregates::Aggregate.new(Time.current).send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] } event_names = aggregated_metrics&.map { |event| event[:name] }
expect(event_names).to eq(event_names&.uniq) expect(event_names).to eq(event_names&.uniq)
end end
aggregated_metrics&.each do |aggregate| it 'all aggregated metrics has known source' do
expect(aggregated_metrics).to all has_known_source
end
aggregated_metrics&.select { |agg| agg[:source] == Gitlab::Usage::Metrics::Aggregates::REDIS_SOURCE }&.each do |aggregate|
context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do
let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } } let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } }
......
...@@ -372,97 +372,4 @@ RSpec.describe Gitlab::Utils::UsageData do ...@@ -372,97 +372,4 @@ RSpec.describe Gitlab::Utils::UsageData do
end end
end end
end end
describe '#save_aggregated_metrics', :clean_gitlab_redis_shared_state do
let(:timestamp) { Time.current.to_i }
let(:time_period) { { created_at: 7.days.ago..Date.current } }
let(:metric_name) { 'test_metric' }
let(:method_params) do
{
metric_name: metric_name,
time_period: time_period,
recorded_at_timestamp: timestamp,
data: data
}
end
context 'with compatible data argument' do
let(:data) { ::Gitlab::Database::PostgresHll::Buckets.new(141 => 1, 56 => 1) }
it 'persists serialized data in Redis' do
time_period_name = 'weekly'
expect(described_class).to receive(:time_period_to_human_name).with(time_period).and_return(time_period_name)
Gitlab::Redis::SharedState.with do |redis|
expect(redis).to receive(:set).with("#{metric_name}_#{time_period_name}-#{timestamp}", '{"141":1,"56":1}', ex: 80.hours)
end
described_class.save_aggregated_metrics(**method_params)
end
context 'error handling' do
before do
allow(Gitlab::Redis::SharedState).to receive(:with).and_raise(::Redis::CommandError)
end
it 'rescues and reraise ::Redis::CommandError for development and test environments' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error ::Redis::CommandError
end
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'rescues ::Redis::CommandError' do
expect { described_class.save_aggregated_metrics(**method_params) }.not_to raise_error
end
end
end
end
context 'with incompatible data argument' do
let(:data) { 1 }
context 'for environment different than development' do
before do
stub_rails_env('production')
end
it 'does not persist data in Redis' do
Gitlab::Redis::SharedState.with do |redis|
expect(redis).not_to receive(:set)
end
described_class.save_aggregated_metrics(**method_params)
end
end
it 'raises error for development environment' do
expect { described_class.save_aggregated_metrics(**method_params) }.to raise_error /Unsupported data type/
end
end
end
describe '#time_period_to_human_name' do
it 'translates empty time period as all_time' do
expect(described_class.time_period_to_human_name({})).to eql 'all_time'
end
it 'translates time period not longer than 7 days as weekly', :aggregate_failures do
days_6_time_period = 6.days.ago..Date.current
days_7_time_period = 7.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_6_time_period)).to eql 'weekly'
expect(described_class.time_period_to_human_name(column_name: days_7_time_period)).to eql 'weekly'
end
it 'translates time period longer than 7 days as monthly', :aggregate_failures do
days_8_time_period = 8.days.ago..Date.current
days_31_time_period = 31.days.ago..Date.current
expect(described_class.time_period_to_human_name(column_name: days_8_time_period)).to eql 'monthly'
expect(described_class.time_period_to_human_name(column_name: days_31_time_period)).to eql 'monthly'
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment