Commit ea0632b5 authored by Gabriel Mazetto's avatar Gabriel Mazetto

Merge branch 'mwaw/calculate_aggregated_xmaus_over_redis_hll_amaus' into 'master'

Calculate xMAUs unions for Redis HLL metrics

See merge request gitlab-org/gitlab!44624
parents 70c5ff50 6eb80521
---
name: product_analytics_aggregated_metrics
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/44624
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/267550
type: development
group: group::product analytics
default_enabled: false
......@@ -689,6 +689,14 @@ module Gitlab
{ redis_hll_counters: ::Gitlab::UsageDataCounters::HLLRedisCounter.unique_events_data }
end
def aggregated_metrics
return {} unless Feature.enabled?(:product_analytics_aggregated_metrics)
{
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_data
}
end
def analytics_unique_visits_data
results = ::Gitlab::Analytics::UniqueVisits.analytics_events.each_with_object({}) do |target, hash|
hash[target] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target) }
......
#- name: unique name of aggregated metric
# operator: aggregation operator. Valid values are:
# - "ANY": counts unique elements that were observed triggering any of following events
# events: list of events names to aggregate into metric. All events in this list must have the same 'redis_slot' and 'aggregation' attributes
# see from lib/gitlab/usage_data_counters/known_events.yml for the list of valid events.
---
- name: product_analytics_test_aggregated_metrics
operator: ANY
events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
......@@ -5,13 +5,20 @@ module Gitlab
module HLLRedisCounter
DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks
DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days
DEFAULT_REDIS_SLOT = ''.freeze
DEFAULT_REDIS_SLOT = ''
UnknownEvent = Class.new(StandardError)
UnknownAggregation = Class.new(StandardError)
EventError = Class.new(StandardError)
UnknownEvent = Class.new(EventError)
UnknownAggregation = Class.new(EventError)
AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError)
UnknownAggregationOperator = Class.new(EventError)
KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'.freeze
KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
AGGREGATED_METRICS_PATH = 'lib/gitlab/usage_data_counters/aggregated_metrics.yml'
ALLOWED_METRICS_AGGREGATIONS = %w[ANY].freeze
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
......@@ -38,23 +45,17 @@ module Gitlab
event = event_for(event_name)
raise UnknownEvent.new("Unknown event #{event_name}") unless event.present?
raise UnknownEvent, "Unknown event #{event_name}" unless event.present?
Gitlab::Redis::HLL.add(key: redis_key(event, time), value: entity_id, expiry: expiry(event))
end
def unique_events(event_names:, start_date:, end_date:)
events = events_for(Array(event_names).map(&:to_s))
raise 'Events should be in same slot' unless events_in_same_slot?(events)
raise 'Events should be in same category' unless events_in_same_category?(events)
raise 'Events should have same aggregation level' unless events_same_aggregation?(events)
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
redis_usage_data { Gitlab::Redis::HLL.count(keys: keys) }
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise CategoryMismatch, events unless events_in_same_category?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def categories
......@@ -72,8 +73,8 @@ module Gitlab
events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash|
hash["#{event}_weekly"] = unique_events(event_names: event, start_date: 7.days.ago.to_date, end_date: Date.current)
hash["#{event}_monthly"] = unique_events(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
hash["#{event}_weekly"] = unique_events(event_names: [event], start_date: 7.days.ago.to_date, end_date: Date.current)
hash["#{event}_monthly"] = unique_events(event_names: [event], start_date: 4.weeks.ago.to_date, end_date: Date.current)
end
if eligible_for_totals?(events_names)
......@@ -89,8 +90,34 @@ module Gitlab
event_for(event_name).present?
end
def aggregated_metrics_data
aggregated_metrics.to_h do |aggregation|
[aggregation[:name], calculate_count_for_aggregation(aggregation)]
end
end
private
def calculate_count_for_aggregation(aggregation)
validate_aggregation_operator!(aggregation[:operator])
count_unique_events(event_names: aggregation[:events], start_date: 4.weeks.ago.to_date, end_date: Date.current) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def count_unique_events(event_names:, start_date:, end_date:)
events = events_for(Array(event_names).map(&:to_s))
yield events if block_given?
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
redis_usage_data { Gitlab::Redis::HLL.count(keys: keys) }
end
# Allow to add totals for events that are in the same redis slot, category and have the same aggregation level
# and if there are more than 1 event
def eligible_for_totals?(events_names)
......@@ -109,7 +136,15 @@ module Gitlab
end
def known_events
@known_events ||= YAML.load_file(Rails.root.join(KNOWN_EVENTS_PATH)).map(&:with_indifferent_access)
@known_events ||= load_yaml_from_path(KNOWN_EVENTS_PATH)
end
def aggregated_metrics
@aggregated_metrics ||= (load_yaml_from_path(AGGREGATED_METRICS_PATH) || [])
end
def load_yaml_from_path(path)
YAML.safe_load(File.read(Rails.root.join(path)))&.map(&:with_indifferent_access)
end
def known_events_names
......@@ -179,6 +214,12 @@ module Gitlab
end.flatten
end
def validate_aggregation_operator!(operator)
return true if ALLOWED_METRICS_AGGREGATIONS.include?(operator)
raise UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")
end
def weekly_redis_keys(events:, start_date:, end_date:)
weeks = end_date.to_date.cweek - start_date.to_date.cweek
weeks = 1 if weeks == 0
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe 'aggregated metrics' do
RSpec::Matchers.define :be_known_event do
match do |event|
Gitlab::UsageDataCounters::HLLRedisCounter.known_event?(event)
end
failure_message do
"Event with name: `#{event}` can not be found within `#{Gitlab::UsageDataCounters::HLLRedisCounter::KNOWN_EVENTS_PATH}`"
end
end
let_it_be(:known_events) do
YAML.load_file(
Rails.root.join(Gitlab::UsageDataCounters::HLLRedisCounter::KNOWN_EVENTS_PATH)
).map(&:with_indifferent_access)
end
YAML.load_file(Rails.root.join(Gitlab::UsageDataCounters::HLLRedisCounter::AGGREGATED_METRICS_PATH))&.map(&:with_indifferent_access).tap do |aggregated_metrics|
it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] }
expect(event_names).to eq(event_names&.uniq)
end
aggregated_metrics&.each do |aggregate|
context "for #{aggregate[:name]} aggregate of #{aggregate[:events].join(' ')}" do
let_it_be(:events_records) { known_events.select { |event| aggregate[:events].include?(event[:name]) } }
it "only refers to known events" do
expect(aggregate[:events]).to all be_known_event
end
it "has expected structure" do
expect(aggregate.keys).to match_array(%w[name operator events])
end
it "uses allowed aggregation operators" do
expect(Gitlab::UsageDataCounters::HLLRedisCounter::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
end
it "uses events from the same Redis slot" do
event_slots = events_records.map { |event| event[:redis_slot] }.uniq
expect(event_slots).to contain_exactly(be_present)
end
it "uses events with the same aggregation period" do
event_slots = events_records.map { |event| event[:aggregation] }.uniq
expect(event_slots).to contain_exactly(be_present)
end
end
end
end
end
......@@ -36,8 +36,8 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
let(:custom_daily_event) { 'g_analytics_custom' }
let(:global_category) { 'global' }
let(:compliance_category) {'compliance' }
let(:productivity_category) {'productivity' }
let(:compliance_category) { 'compliance' }
let(:productivity_category) { 'productivity' }
let(:analytics_category) { 'analytics' }
let(:known_events) do
......@@ -84,11 +84,11 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.track_event(entity1, different_aggregation, Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownAggregation)
expect { described_class.track_event(entity1, different_aggregation, Date.current) }.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownAggregation)
end
it 'raise error if metrics of unknown aggregation' do
expect { described_class.track_event(entity1, 'unknown', Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
expect { described_class.track_event(entity1, 'unknown', Date.current) }.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
end
context 'for weekly events' do
......@@ -184,41 +184,47 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end
it 'raise error if metrics are not in the same slot' do
expect { described_class.unique_events(event_names: [compliance_slot_event, analytics_slot_event], start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same slot')
expect do
described_class.unique_events(event_names: [compliance_slot_event, analytics_slot_event], start_date: 4.weeks.ago, end_date: Date.current)
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::SlotMismatch)
end
it 'raise error if metrics are not in the same category' do
expect { described_class.unique_events(event_names: [category_analytics_event, category_productivity_event], start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same category')
expect do
described_class.unique_events(event_names: [category_analytics_event, category_productivity_event], start_date: 4.weeks.ago, end_date: Date.current)
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::CategoryMismatch)
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.unique_events(event_names: [daily_event, weekly_event], start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should have same aggregation level')
expect do
described_class.unique_events(event_names: [daily_event, weekly_event], start_date: 4.weeks.ago, end_date: Date.current)
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::AggregationMismatch)
end
context 'when data for the last complete week' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 1.week.ago, end_date: Date.current)).to eq(1) }
it { expect(described_class.unique_events(event_names: [weekly_event], start_date: 1.week.ago, end_date: Date.current)).to eq(1) }
end
context 'when data for the last 4 complete weeks' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: [weekly_event], start_date: 4.weeks.ago, end_date: Date.current)).to eq(2) }
end
context 'when data for the week 4 weeks ago' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
it { expect(described_class.unique_events(event_names: [weekly_event], start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
end
context 'when using symbol as parameter' do
it { expect(described_class.unique_events(event_names: weekly_event.to_sym, start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
it { expect(described_class.unique_events(event_names: [weekly_event.to_sym], start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
end
context 'when using daily aggregation' do
it { expect(described_class.unique_events(event_names: daily_event, start_date: 7.days.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: Date.current)).to eq(3) }
it { expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: 21.days.ago)).to eq(1) }
it { expect(described_class.unique_events(event_names: [daily_event], start_date: 7.days.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: [daily_event], start_date: 28.days.ago, end_date: Date.current)).to eq(3) }
it { expect(described_class.unique_events(event_names: [daily_event], start_date: 28.days.ago, end_date: 21.days.ago)).to eq(1) }
end
context 'when no slot is set' do
it { expect(described_class.unique_events(event_names: no_slot, start_date: 7.days.ago, end_date: Date.current)).to eq(1) }
it { expect(described_class.unique_events(event_names: [no_slot], start_date: 7.days.ago, end_date: Date.current)).to eq(1) }
end
end
end
......@@ -267,4 +273,54 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
expect(subject.unique_events_data).to eq(results)
end
end
describe '.aggregated_metrics_data' do
context 'no combination is tracked' do
it 'returns empty hash' do
allow(described_class).to receive(:aggregated_metrics).and_return([])
expect(subject.aggregated_metrics_data).to eq({})
end
end
context 'there are some combinations defined' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "ANY" },
{ name: 'gmau_2', events: %w[event3], operator: "ANY" }
].map(&:with_indifferent_access)
end
before do
allow(described_class).to receive(:known_events).and_return(known_events)
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
described_class.track_event(entity1, 'event1_slot', 2.days.ago)
described_class.track_event(entity1, 'event2_slot', 2.days.ago)
described_class.track_event(entity3, 'event2_slot', 3.days.ago)
# events in different slots
described_class.track_event(entity1, 'event3', 2.days.ago)
described_class.track_event(entity2, 'event3', 2.days.ago)
described_class.track_event(entity4, 'event3', 2.days.ago)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3
}
expect(subject.aggregated_metrics_data).to eq(results)
end
end
end
end
......@@ -1234,6 +1234,32 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
end
end
describe 'aggregated_metrics' do
subject(:aggregated_metrics) { described_class.aggregated_metrics }
context 'with product_analytics_aggregated_metrics feature flag on' do
before do
stub_feature_flags(product_analytics_aggregated_metrics: true)
end
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_data).and_return(global_search_gmau: 123)
expect(aggregated_metrics).to eq(aggregated_metrics: { global_search_gmau: 123 })
end
end
context 'with product_analytics_aggregated_metrics feature flag off' do
before do
stub_feature_flags(product_analytics_aggregated_metrics: false)
end
it 'returns empty hash', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).not_to receive(:aggregated_metrics_data)
expect(aggregated_metrics).to be {}
end
end
end
describe '.service_desk_counts' do
subject { described_class.send(:service_desk_counts) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment