Batch insert CI rspec_profiling data

Instead of inserting a row after each example to an external database, we save the CI profiling reports into the `rspec_profiling` directory and insert the data in the update-tests-metadata CI stage. This should make each spec run faster and also reduce the number of PostgreSQL connections needed by concurrent CI builds. `scripts/insert-rspec-profiling-data` also inserts one file at a time via the PostgreSQL COPY command for faster inserts. The one side effect is that the `created_at` and `updated_at` timestamps aren't available since they aren't generated in the CSV. Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/10154

Batch insert CI rspec_profiling data
Instead of inserting a row after each example to an external database, we save the CI profiling reports into the `rspec_profiling` directory and insert the data in the update-tests-metadata CI stage. This should make each spec run faster and also reduce the number of PostgreSQL connections needed by concurrent CI builds. `scripts/insert-rspec-profiling-data` also inserts one file at a time via the PostgreSQL COPY command for faster inserts. The one side effect is that the `created_at` and `updated_at` timestamps aren't available since they aren't generated in the CSV. Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/10154
f1103017 · Stan Hu · Rémy Coutable · 32056fc7 · f1103017 · f1103017
Commit f1103017 authored Mar 12, 2019 by Stan Hu Committed by Rémy Coutable Mar 12, 2019
Showing with 87 additions and 16 deletions

.gitlab-ci.yml .gitlab-ci.yml +5 -1

config/initializers/rspec_profiling.rb config/initializers/rspec_profiling.rb +35 -15

scripts/insert-rspec-profiling-data scripts/insert-rspec-profiling-data +47 -0

No files found.
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -66,6 +66,7 @@ stages:
    paths:
      - knapsack/
      - rspec_flaky/
+      - rspec_profiling/
 .use-pg: &use-pg
  services:
@@ -159,6 +160,7 @@ stages:
      - coverage/
      - knapsack/
      - rspec_flaky/
+      - rspec_profiling/
      - tmp/capybara/
    reports:
      junit: junit_rspec.xml
@@ -336,6 +338,7 @@ retrieve-tests-metadata:
    - wget -O $KNAPSACK_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$KNAPSACK_RSPEC_SUITE_REPORT_PATH || rm $KNAPSACK_RSPEC_SUITE_REPORT_PATH
    - '[[ -f $KNAPSACK_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${KNAPSACK_RSPEC_SUITE_REPORT_PATH}'
    - mkdir -p rspec_flaky/
+    - mkdir -p rspec_profiling/
    - wget -O $FLAKY_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$FLAKY_RSPEC_SUITE_REPORT_PATH || rm $FLAKY_RSPEC_SUITE_REPORT_PATH
    - '[[ -f $FLAKY_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${FLAKY_RSPEC_SUITE_REPORT_PATH}'
@@ -350,7 +353,7 @@ update-tests-metadata:
      - rspec_flaky/
    policy: push
  script:
-    - retry gem install fog-aws mime-types activesupport --no-document
+    - retry gem install fog-aws mime-types activesupport rspec_profiling postgres-copy --no-document
    - scripts/merge-reports ${KNAPSACK_RSPEC_SUITE_REPORT_PATH} knapsack/${CI_PROJECT_NAME}/rspec-pg_node_*.json
    - scripts/merge-reports ${FLAKY_RSPEC_SUITE_REPORT_PATH} rspec_flaky/all_*_*.json
    - FLAKY_RSPEC_GENERATE_REPORT=1 scripts/prune-old-flaky-specs ${FLAKY_RSPEC_SUITE_REPORT_PATH}
@@ -358,6 +361,7 @@ update-tests-metadata:
    - '[[ -z ${TESTS_METADATA_S3_BUCKET} ]] || scripts/sync-reports put $TESTS_METADATA_S3_BUCKET $FLAKY_RSPEC_SUITE_REPORT_PATH'
    - rm -f knapsack/${CI_PROJECT_NAME}/*_node_*.json
    - rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json
+    - scripts/insert-rspec-profiling-data
 flaky-examples-check:
  <<: *dedicated-runner

--- a/config/initializers/rspec_profiling.rb
+++ b/config/initializers/rspec_profiling.rb
+# frozen_string_literal: true
+return unless Rails.env.test?
 module RspecProfilingExt
-  module PSQL
+  module Collectors
-    def establish_connection
+    class CSVWithTimestamps < ::RspecProfiling::Collectors::CSV
-      ::RspecProfiling::Collectors::PSQL::Result.establish_connection(ENV['RSPEC_PROFILING_POSTGRES_URL'])
+      TIMESTAMP_FIELDS = %w(created_at updated_at).freeze
+      HEADERS = (::RspecProfiling::Collectors::CSV::HEADERS + TIMESTAMP_FIELDS).freeze
+      def insert(attributes)
+        output << HEADERS.map do |field|
+          if TIMESTAMP_FIELDS.include?(field)
+            Time.now
+          else
+            attributes.fetch(field.to_sym)
+          end
+        end
+      end
+      private
+      def output
+        @output ||= ::CSV.open(path, "w").tap { |csv| csv << HEADERS }
+      end
    end
  end
@@ -10,9 +31,13 @@ module RspecProfilingExt
      if ENV['CI_COMMIT_REF_NAME']
        "#{defined?(Gitlab::License) ? 'ee' : 'ce'}:#{ENV['CI_COMMIT_REF_NAME']}"
      else
-        super
+        super&.chomp
      end
    end
+    def sha
+      super&.chomp
+    end
  end
  module Run
@@ -30,16 +55,11 @@ module RspecProfilingExt
  end
 end
-if Rails.env.test?
+RspecProfiling.configure do |config|
-  RspecProfiling.configure do |config|
+  if ENV.key?('CI') || ENV.key?('RSPEC_PROFILING')
-    if ENV['RSPEC_PROFILING_POSTGRES_URL'].present?
+    RspecProfiling::VCS::Git.prepend(RspecProfilingExt::Git)
-      RspecProfiling::Collectors::PSQL.prepend(RspecProfilingExt::PSQL)
+    RspecProfiling::Run.prepend(RspecProfilingExt::Run)
-      config.collector = RspecProfiling::Collectors::PSQL
+    config.collector = RspecProfilingExt::Collectors::CSVWithTimestamps
-    end
+    config.csv_path = -> { "rspec_profiling/#{Time.now.to_i}-#{SecureRandom.hex(8)}-rspec-data.csv" }
-    if ENV.key?('CI')
-      RspecProfiling::VCS::Git.prepend(RspecProfilingExt::Git)
-      RspecProfiling::Run.prepend(RspecProfilingExt::Run)
-    end
  end
 end
--- a/scripts/insert-rspec-profiling-data
+++ b/scripts/insert-rspec-profiling-data
+#!/usr/bin/env ruby
+require 'csv'
+require 'rspec_profiling'
+require 'postgres-copy'
+module RspecProfiling
+  module Collectors
+    class PSQL
+      def establish_connection
+        # This disables the automatic creation of the database and
+        # table. In the future, we may want a way to specify the host of
+        # the database to connect so that we can call #install.
+        Result.establish_connection(results_url)
+      end
+      def prepared?
+        connection.data_source_exists?(table)
+      end
+      def results_url
+        ENV['RSPEC_PROFILING_POSTGRES_URL']
+      end
+      class Result < ActiveRecord::Base
+        acts_as_copy_target
+      end
+    end
+  end
+end
+def insert_data(path)
+  puts "#{Time.now} Inserting CI stats..."
+  collector = RspecProfiling::Collectors::PSQL.new
+  collector.install
+  files = Dir[File.join(path, "*.csv")]
+  files.each do |filename|
+    puts "#{Time.now} Inserting #{filename}..."
+    result = RspecProfiling::Collectors::PSQL::Result.copy_from(filename)
+    puts "#{Time.now} Inserted #{result.cmd_tuples} lines in #{filename}, DB response: #{result.cmd_status}"
+  end
+end
+insert_data('rspec_profiling') if ENV['RSPEC_PROFILING_POSTGRES_URL'].present?