Commit fc6c53e6 authored by Andreas Brandl's avatar Andreas Brandl

Add migration for product analytics table

This table is part of the "product analytics experiment" and as such
marked "experimental". The goal here is to explore the product analytics
as a MVP feature more. We are explicitly not spending time on relational
modeling here.

We expect significant changes to the database part of this once the
feature has been validated. Therefore, we expect to drop the table
when feature validation is complete. All data will be lost.
parent 9baace02
---
title: Add migration for experimental product analytics table
merge_request: 35168
author:
type: added
# frozen_string_literal: true
class AddProductAnalyticsTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
# Table is based on https://github.com/snowplow/snowplow/blob/master/4-storage/postgres-storage/sql/atomic-def.sql 6e07b1c, with the following differences:
# * app_id varchar -> project_id integer (+ FK)
# * Add `id bigserial`
# * Hash partitioning based on `project_id`
# * Timestamp columns: Change type to timestamp with time zone
#
# This table is part of the "product analytics experiment" and as such marked "experimental". The goal here is to
# explore the product analytics as a MVP feature more. We are explicitly not spending time on relational modeling
# here.
#
# We expect significant changes to the database part of this once the feature has been validated.
# Therefore, we expect to drop the table when feature validation is complete. All data will be lost.
def up
with_lock_retries do
execute <<~SQL
CREATE TABLE "product_analytics_events_experimental" (
id bigserial NOT NULL,
-- App
"project_id" integer NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
"platform" varchar(255),
-- Date/time
"etl_tstamp" timestamp with time zone,
"collector_tstamp" timestamp with time zone NOT NULL,
"dvce_created_tstamp" timestamp with time zone,
-- Date/time
"event" varchar(128),
"event_id" char(36) NOT NULL,
"txn_id" integer,
-- Versioning
"name_tracker" varchar(128),
"v_tracker" varchar(100),
"v_collector" varchar(100) NOT NULL,
"v_etl" varchar(100) NOT NULL,
-- User and visit
"user_id" varchar(255),
"user_ipaddress" varchar(45),
"user_fingerprint" varchar(50),
"domain_userid" varchar(36),
"domain_sessionidx" smallint,
"network_userid" varchar(38),
-- Location
"geo_country" char(2),
"geo_region" char(3),
"geo_city" varchar(75),
"geo_zipcode" varchar(15),
"geo_latitude" double precision,
"geo_longitude" double precision,
"geo_region_name" varchar(100),
-- IP lookups
"ip_isp" varchar(100),
"ip_organization" varchar(100),
"ip_domain" varchar(100),
"ip_netspeed" varchar(100),
-- Page
"page_url" text,
"page_title" varchar(2000),
"page_referrer" text,
-- Page URL components
"page_urlscheme" varchar(16),
"page_urlhost" varchar(255),
"page_urlport" integer,
"page_urlpath" varchar(3000),
"page_urlquery" varchar(6000),
"page_urlfragment" varchar(3000),
-- Referrer URL components
"refr_urlscheme" varchar(16),
"refr_urlhost" varchar(255),
"refr_urlport" integer,
"refr_urlpath" varchar(6000),
"refr_urlquery" varchar(6000),
"refr_urlfragment" varchar(3000),
-- Referrer details
"refr_medium" varchar(25),
"refr_source" varchar(50),
"refr_term" varchar(255),
-- Marketing
"mkt_medium" varchar(255),
"mkt_source" varchar(255),
"mkt_term" varchar(255),
"mkt_content" varchar(500),
"mkt_campaign" varchar(255),
-- Custom structured event
"se_category" varchar(1000),
"se_action" varchar(1000),
"se_label" varchar(1000),
"se_property" varchar(1000),
"se_value" double precision,
-- Ecommerce
"tr_orderid" varchar(255),
"tr_affiliation" varchar(255),
"tr_total" decimal(18,2),
"tr_tax" decimal(18,2),
"tr_shipping" decimal(18,2),
"tr_city" varchar(255),
"tr_state" varchar(255),
"tr_country" varchar(255),
"ti_orderid" varchar(255),
"ti_sku" varchar(255),
"ti_name" varchar(255),
"ti_category" varchar(255),
"ti_price" decimal(18,2),
"ti_quantity" integer,
-- Page ping
"pp_xoffset_min" integer,
"pp_xoffset_max" integer,
"pp_yoffset_min" integer,
"pp_yoffset_max" integer,
-- User Agent
"useragent" varchar(1000),
-- Browser
"br_name" varchar(50),
"br_family" varchar(50),
"br_version" varchar(50),
"br_type" varchar(50),
"br_renderengine" varchar(50),
"br_lang" varchar(255),
"br_features_pdf" boolean,
"br_features_flash" boolean,
"br_features_java" boolean,
"br_features_director" boolean,
"br_features_quicktime" boolean,
"br_features_realplayer" boolean,
"br_features_windowsmedia" boolean,
"br_features_gears" boolean,
"br_features_silverlight" boolean,
"br_cookies" boolean,
"br_colordepth" varchar(12),
"br_viewwidth" integer,
"br_viewheight" integer,
-- Operating System
"os_name" varchar(50),
"os_family" varchar(50),
"os_manufacturer" varchar(50),
"os_timezone" varchar(50),
-- Device/Hardware
"dvce_type" varchar(50),
"dvce_ismobile" boolean,
"dvce_screenwidth" integer,
"dvce_screenheight" integer,
-- Document
"doc_charset" varchar(128),
"doc_width" integer,
"doc_height" integer,
-- Currency
"tr_currency" char(3),
"tr_total_base" decimal(18, 2),
"tr_tax_base" decimal(18, 2),
"tr_shipping_base" decimal(18, 2),
"ti_currency" char(3),
"ti_price_base" decimal(18, 2),
"base_currency" char(3),
-- Geolocation
"geo_timezone" varchar(64),
-- Click ID
"mkt_clickid" varchar(128),
"mkt_network" varchar(64),
-- ETL tags
"etl_tags" varchar(500),
-- Time event was sent
"dvce_sent_tstamp" timestamp with time zone,
-- Referer
"refr_domain_userid" varchar(36),
"refr_dvce_tstamp" timestamp with time zone,
-- Session ID
"domain_sessionid" char(36),
-- Derived timestamp
"derived_tstamp" timestamp with time zone,
-- Event schema
"event_vendor" varchar(1000),
"event_name" varchar(1000),
"event_format" varchar(128),
"event_version" varchar(128),
-- Event fingerprint
"event_fingerprint" varchar(128),
-- True timestamp
"true_tstamp" timestamp with time zone,
PRIMARY KEY (id, project_id)
) PARTITION BY HASH (project_id)
WITHOUT OIDS;
CREATE INDEX index_product_analytics_events_experimental_project_and_time ON product_analytics_events_experimental (project_id, collector_tstamp);
SQL
create_hash_partitions :product_analytics_events_experimental, 64
end
end
def down
with_lock_retries do
execute 'DROP TABLE product_analytics_events_experimental'
end
end
end
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -63,6 +63,7 @@ RSpec.describe 'Database schema' do ...@@ -63,6 +63,7 @@ RSpec.describe 'Database schema' do
oauth_access_tokens: %w[resource_owner_id application_id], oauth_access_tokens: %w[resource_owner_id application_id],
oauth_applications: %w[owner_id], oauth_applications: %w[owner_id],
open_project_tracker_data: %w[closed_status_id], open_project_tracker_data: %w[closed_status_id],
product_analytics_events_experimental: %w[event_id txn_id user_id],
project_group_links: %w[group_id], project_group_links: %w[group_id],
project_statistics: %w[namespace_id], project_statistics: %w[namespace_id],
projects: %w[creator_id ci_id mirror_user_id], projects: %w[creator_id ci_id mirror_user_id],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment