Commit 760ab23c authored by Adam Hegyi's avatar Adam Hegyi

Store hashcode for VSA stage events

Store hashcode for VSA stage events in the newly created
analytics_cycle_analytics_stage_event_hashes database table.

Changelog: added
parent 3c4d5efb
# frozen_string_literal: true
module Analytics
module CycleAnalytics
class StageEventHash < ApplicationRecord
has_many :cycle_analytics_project_stages, class_name: 'Analytics::CycleAnalytics::ProjectStage', inverse_of: :stage_event_hash
validates :hash_sha256, presence: true
# Creates or queries the id of the corresponding stage event hash code
def self.record_id_by_hash_sha256(hash)
casted_hash_code = Arel::Nodes.build_quoted(hash, Analytics::CycleAnalytics::StageEventHash.arel_table[:hash_sha256]).to_sql
# Atomic, safe insert without retrying
query = <<~SQL
WITH insert_cte AS #{Gitlab::Database::AsWithMaterialized.materialized_if_supported} (
INSERT INTO #{quoted_table_name} (hash_sha256) VALUES (#{casted_hash_code}) ON CONFLICT DO NOTHING RETURNING ID
)
SELECT ids.id FROM (
(SELECT id FROM #{quoted_table_name} WHERE hash_sha256=#{casted_hash_code} LIMIT 1)
UNION ALL
(SELECT id FROM insert_cte LIMIT 1)
) AS ids LIMIT 1
SQL
connection.execute(query).first['id']
end
def self.cleanup_if_unused(id)
unused_hashes_for(id)
.where(id: id)
.delete_all
end
def self.unused_hashes_for(id)
exists_query = Analytics::CycleAnalytics::ProjectStage.where(stage_event_hash_id: id).select('1').limit(1)
where.not('EXISTS (?)', exists_query)
end
end
end
end
Analytics::CycleAnalytics::StageEventHash.prepend_mod_with('Analytics::CycleAnalytics::StageEventHash')
......@@ -10,6 +10,7 @@ module Analytics
included do
belongs_to :start_event_label, class_name: 'GroupLabel', optional: true
belongs_to :end_event_label, class_name: 'GroupLabel', optional: true
belongs_to :stage_event_hash, class_name: 'Analytics::CycleAnalytics::StageEventHash', foreign_key: :stage_event_hash_id, optional: true
validates :name, presence: true
validates :name, exclusion: { in: Gitlab::Analytics::CycleAnalytics::DefaultStages.names }, if: :custom?
......@@ -28,6 +29,9 @@ module Analytics
scope :ordered, -> { order(:relative_position, :id) }
scope :for_list, -> { includes(:start_event_label, :end_event_label).ordered }
scope :by_value_stream, -> (value_stream) { where(value_stream_id: value_stream.id) }
before_save :ensure_stage_event_hash_id
after_commit :cleanup_old_stage_event_hash
end
def parent=(_)
......@@ -133,6 +137,20 @@ module Analytics
.id_in(label_id)
.exists?
end
def ensure_stage_event_hash_id
previous_stage_event_hash = stage_event_hash&.hash_sha256
if previous_stage_event_hash.blank? || events_hash_code != previous_stage_event_hash
self.stage_event_hash_id = Analytics::CycleAnalytics::StageEventHash.record_id_by_hash_sha256(events_hash_code)
end
end
def cleanup_old_stage_event_hash
if stage_event_hash_id_previously_changed? && stage_event_hash_id_previously_was
Analytics::CycleAnalytics::StageEventHash.cleanup_if_unused(stage_event_hash_id_previously_was)
end
end
end
end
end
# frozen_string_literal: true
class CreateAnalyticsCycleAnalyticsStageEventHashes < ActiveRecord::Migration[6.1]
def change
create_table :analytics_cycle_analytics_stage_event_hashes do |t|
t.binary :hash_sha256
t.index :hash_sha256, unique: true, name: 'index_cycle_analytics_stage_event_hashes_on_hash_sha_256'
end
end
end
# frozen_string_literal: true
class AddStageHashFkToProjectStages < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
def up
unless column_exists?(:analytics_cycle_analytics_project_stages, :stage_event_hash_id)
add_column :analytics_cycle_analytics_project_stages, :stage_event_hash_id, :bigint
end
add_concurrent_index :analytics_cycle_analytics_project_stages, :stage_event_hash_id, name: 'index_project_stages_on_stage_event_hash_id'
add_concurrent_foreign_key :analytics_cycle_analytics_project_stages, :analytics_cycle_analytics_stage_event_hashes, column: :stage_event_hash_id, on_delete: :cascade
end
def down
remove_column :analytics_cycle_analytics_project_stages, :stage_event_hash_id
end
end
# frozen_string_literal: true
class AddStageHashFkToGroupStages < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
def up
unless column_exists?(:analytics_cycle_analytics_group_stages, :stage_event_hash_id)
add_column :analytics_cycle_analytics_group_stages, :stage_event_hash_id, :bigint
end
add_concurrent_index :analytics_cycle_analytics_group_stages, :stage_event_hash_id, name: 'index_group_stages_on_stage_event_hash_id'
add_concurrent_foreign_key :analytics_cycle_analytics_group_stages, :analytics_cycle_analytics_stage_event_hashes, column: :stage_event_hash_id, on_delete: :cascade
end
def down
remove_column :analytics_cycle_analytics_group_stages, :stage_event_hash_id
end
end
f819eaed7e387f18f066180cbf9d0849b3e38db95bbf3e8487d3bc58d9b489ae
\ No newline at end of file
cb97b869bfb0b76dd0684aca1f40c86e7c1c9c9a0d52684830115288088e8066
\ No newline at end of file
5c104ffdb64943aa4828a9b961c8f9141dfd2ae861cea7116722d2b0d4598957
\ No newline at end of file
......@@ -9085,7 +9085,8 @@ CREATE TABLE analytics_cycle_analytics_group_stages (
hidden boolean DEFAULT false NOT NULL,
custom boolean DEFAULT true NOT NULL,
name character varying(255) NOT NULL,
group_value_stream_id bigint NOT NULL
group_value_stream_id bigint NOT NULL,
stage_event_hash_id bigint
);
CREATE SEQUENCE analytics_cycle_analytics_group_stages_id_seq
......@@ -9128,7 +9129,8 @@ CREATE TABLE analytics_cycle_analytics_project_stages (
hidden boolean DEFAULT false NOT NULL,
custom boolean DEFAULT true NOT NULL,
name character varying(255) NOT NULL,
project_value_stream_id bigint NOT NULL
project_value_stream_id bigint NOT NULL,
stage_event_hash_id bigint
);
CREATE SEQUENCE analytics_cycle_analytics_project_stages_id_seq
......@@ -9158,6 +9160,20 @@ CREATE SEQUENCE analytics_cycle_analytics_project_value_streams_id_seq
ALTER SEQUENCE analytics_cycle_analytics_project_value_streams_id_seq OWNED BY analytics_cycle_analytics_project_value_streams.id;
CREATE TABLE analytics_cycle_analytics_stage_event_hashes (
id bigint NOT NULL,
hash_sha256 bytea
);
CREATE SEQUENCE analytics_cycle_analytics_stage_event_hashes_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE analytics_cycle_analytics_stage_event_hashes_id_seq OWNED BY analytics_cycle_analytics_stage_event_hashes.id;
CREATE TABLE analytics_devops_adoption_segments (
id bigint NOT NULL,
last_recorded_at timestamp with time zone,
......@@ -19925,6 +19941,8 @@ ALTER TABLE ONLY analytics_cycle_analytics_project_stages ALTER COLUMN id SET DE
ALTER TABLE ONLY analytics_cycle_analytics_project_value_streams ALTER COLUMN id SET DEFAULT nextval('analytics_cycle_analytics_project_value_streams_id_seq'::regclass);
ALTER TABLE ONLY analytics_cycle_analytics_stage_event_hashes ALTER COLUMN id SET DEFAULT nextval('analytics_cycle_analytics_stage_event_hashes_id_seq'::regclass);
ALTER TABLE ONLY analytics_devops_adoption_segments ALTER COLUMN id SET DEFAULT nextval('analytics_devops_adoption_segments_id_seq'::regclass);
ALTER TABLE ONLY analytics_devops_adoption_snapshots ALTER COLUMN id SET DEFAULT nextval('analytics_devops_adoption_snapshots_id_seq'::regclass);
......@@ -21044,6 +21062,9 @@ ALTER TABLE ONLY analytics_cycle_analytics_project_stages
ALTER TABLE ONLY analytics_cycle_analytics_project_value_streams
ADD CONSTRAINT analytics_cycle_analytics_project_value_streams_pkey PRIMARY KEY (id);
ALTER TABLE ONLY analytics_cycle_analytics_stage_event_hashes
ADD CONSTRAINT analytics_cycle_analytics_stage_event_hashes_pkey PRIMARY KEY (id);
ALTER TABLE ONLY analytics_devops_adoption_segments
ADD CONSTRAINT analytics_devops_adoption_segments_pkey PRIMARY KEY (id);
......@@ -23536,6 +23557,8 @@ CREATE INDEX index_custom_emoji_on_creator_id ON custom_emoji USING btree (creat
CREATE UNIQUE INDEX index_custom_emoji_on_namespace_id_and_name ON custom_emoji USING btree (namespace_id, name);
CREATE UNIQUE INDEX index_cycle_analytics_stage_event_hashes_on_hash_sha_256 ON analytics_cycle_analytics_stage_event_hashes USING btree (hash_sha256);
CREATE UNIQUE INDEX index_daily_build_group_report_results_unique_columns ON ci_daily_build_group_report_results USING btree (project_id, ref_path, date, group_name);
CREATE INDEX index_dast_profile_schedules_active_next_run_at ON dast_profile_schedules USING btree (active, next_run_at);
......@@ -23960,6 +23983,8 @@ CREATE INDEX index_group_repository_storage_moves_on_group_id ON group_repositor
CREATE UNIQUE INDEX index_group_stages_on_group_id_group_value_stream_id_and_name ON analytics_cycle_analytics_group_stages USING btree (group_id, group_value_stream_id, name);
CREATE INDEX index_group_stages_on_stage_event_hash_id ON analytics_cycle_analytics_group_stages USING btree (stage_event_hash_id);
CREATE UNIQUE INDEX index_group_wiki_repositories_on_disk_path ON group_wiki_repositories USING btree (disk_path);
CREATE INDEX index_group_wiki_repositories_on_shard_id ON group_wiki_repositories USING btree (shard_id);
......@@ -24762,6 +24787,8 @@ CREATE INDEX index_project_settings_on_project_id_partially ON project_settings
CREATE UNIQUE INDEX index_project_settings_on_push_rule_id ON project_settings USING btree (push_rule_id);
CREATE INDEX index_project_stages_on_stage_event_hash_id ON analytics_cycle_analytics_project_stages USING btree (stage_event_hash_id);
CREATE INDEX index_project_statistics_on_namespace_id ON project_statistics USING btree (namespace_id);
CREATE INDEX index_project_statistics_on_packages_size_and_project_id ON project_statistics USING btree (packages_size, project_id);
......@@ -26073,6 +26100,9 @@ ALTER TABLE ONLY members
ALTER TABLE ONLY lfs_objects_projects
ADD CONSTRAINT fk_2eb33f7a78 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE NOT VALID;
ALTER TABLE ONLY analytics_cycle_analytics_group_stages
ADD CONSTRAINT fk_3078345d6d FOREIGN KEY (stage_event_hash_id) REFERENCES analytics_cycle_analytics_stage_event_hashes(id) ON DELETE CASCADE;
ALTER TABLE ONLY lists
ADD CONSTRAINT fk_30f2a831f4 FOREIGN KEY (iteration_id) REFERENCES sprints(id) ON DELETE CASCADE;
......@@ -26514,6 +26544,9 @@ ALTER TABLE ONLY packages_packages
ALTER TABLE ONLY geo_event_log
ADD CONSTRAINT fk_c1f241c70d FOREIGN KEY (upload_deleted_event_id) REFERENCES geo_upload_deleted_events(id) ON DELETE CASCADE;
ALTER TABLE ONLY analytics_cycle_analytics_project_stages
ADD CONSTRAINT fk_c3339bdfc9 FOREIGN KEY (stage_event_hash_id) REFERENCES analytics_cycle_analytics_stage_event_hashes(id) ON DELETE CASCADE;
ALTER TABLE ONLY vulnerability_exports
ADD CONSTRAINT fk_c3d3cb5d0f FOREIGN KEY (group_id) REFERENCES namespaces(id) ON DELETE CASCADE;
# frozen_string_literal: true
module EE
module Analytics
module CycleAnalytics
module StageEventHash
extend ActiveSupport::Concern
prepended do
has_many :cycle_analytics_group_stages, class_name: 'Analytics::CycleAnalytics::GroupStage', inverse_of: :stage_event_hash
end
class_methods do
def unused_hashes_for(id)
exists_query = ::Analytics::CycleAnalytics::GroupStage.where(stage_event_hash_id: id).select('1').limit(1)
super.where.not('EXISTS (?)', exists_query)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::CycleAnalytics::StageEventHash, type: :model do
let(:stage_event_hash) { described_class.create!(hash_sha256: hash_sha256) }
let(:hash_sha256) { 'does_not_matter' }
describe 'associations' do
it { is_expected.to have_many(:cycle_analytics_group_stages) }
end
describe '.cleanup_if_unused' do
it 'removes the record if there is no project or group stages with given stage events hash' do
described_class.cleanup_if_unused(stage_event_hash.id)
expect(described_class.find_by_id(stage_event_hash.id)).to be_nil
end
it 'does not remove the record if at least 1 group stage for the given stage events hash exists' do
id = create(:cycle_analytics_group_stage).stage_event_hash_id
described_class.cleanup_if_unused(id)
expect(described_class.find_by_id(id)).not_to be_nil
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::CycleAnalytics::StageEventHash, type: :model do
let(:stage_event_hash) { described_class.create!(hash_sha256: hash_sha256) }
let(:hash_sha256) { 'does_not_matter' }
describe 'associations' do
it { is_expected.to have_many(:cycle_analytics_project_stages) }
end
describe 'validations' do
it { is_expected.to validate_presence_of(:hash_sha256) }
end
describe '.record_id_by_hash_sha256' do
it 'returns an existing id' do
id = stage_event_hash.id
same_id = described_class.record_id_by_hash_sha256(hash_sha256)
expect(same_id).to eq(id)
end
it 'creates a new record' do
expect do
described_class.record_id_by_hash_sha256(hash_sha256)
end.to change { described_class.count }.from(0).to(1)
end
end
describe '.cleanup_if_unused' do
it 'removes the record' do
described_class.cleanup_if_unused(stage_event_hash.id)
expect(described_class.find_by_id(stage_event_hash.id)).to be_nil
end
it 'does not remove the record' do
id = create(:cycle_analytics_project_stage).stage_event_hash_id
described_class.cleanup_if_unused(id)
expect(described_class.find_by_id(id)).not_to be_nil
end
end
end
......@@ -13,6 +13,7 @@ RSpec.shared_examples 'value stream analytics stage' do
describe 'associations' do
it { is_expected.to belong_to(:end_event_label) }
it { is_expected.to belong_to(:start_event_label) }
it { is_expected.to belong_to(:stage_event_hash) }
end
describe 'validation' do
......@@ -138,6 +139,67 @@ RSpec.shared_examples 'value stream analytics stage' do
expect(stage_1.events_hash_code).not_to eq(stage_2.events_hash_code)
end
end
# rubocop: disable Rails/SaveBang
describe '#event_hash' do
it 'associates the same stage event hash record' do
first = create(factory)
second = create(factory)
expect(first.stage_event_hash_id).to eq(second.stage_event_hash_id)
end
it 'does not introduce duplicated stage event hash records' do
expect do
create(factory)
create(factory)
end.to change { Analytics::CycleAnalytics::StageEventHash.count }.from(0).to(1)
end
it 'creates different hash record for different event configurations' do
expect do
create(factory, start_event_identifier: :issue_created, end_event_identifier: :issue_first_mentioned_in_commit)
create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged)
end.to change { Analytics::CycleAnalytics::StageEventHash.count }.from(0).to(2)
end
context 'when the stage event hash changes' do
let(:stage) { create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged) }
it 'deletes the old, unused stage event hash record' do
old_stage_event_hash = stage.stage_event_hash
stage.update!(end_event_identifier: :merge_request_first_deployed_to_production)
expect(stage.stage_event_hash_id).not_to eq(old_stage_event_hash.id)
old_stage_event_hash_from_db = Analytics::CycleAnalytics::StageEventHash.find_by_id(old_stage_event_hash.id)
expect(old_stage_event_hash_from_db).to be_nil
end
it 'does not delete used stage event hash record' do
other_stage = create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged)
stage.update!(end_event_identifier: :merge_request_first_deployed_to_production)
expect(stage.stage_event_hash_id).not_to eq(other_stage.stage_event_hash_id)
old_stage_event_hash_from_db = Analytics::CycleAnalytics::StageEventHash.find_by_id(other_stage.stage_event_hash_id)
expect(old_stage_event_hash_from_db).not_to be_nil
end
end
context 'when the stage events hash code does not change' do
it 'does not trigger extra query on save' do
stage = create(factory, start_event_identifier: :merge_request_created, end_event_identifier: :merge_request_merged)
expect(Analytics::CycleAnalytics::StageEventHash).not_to receive(:record_id_by_hash_sha256)
stage.update!(name: 'new title')
end
end
end
# rubocop: enable Rails/SaveBang
end
RSpec.shared_examples 'value stream analytics label based stage' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment