Commit f1103017 authored by Stan Hu's avatar Stan Hu Committed by Rémy Coutable

Batch insert CI rspec_profiling data

Instead of inserting a row after each example to an external database,
we save the CI profiling reports into the `rspec_profiling` directory
and insert the data in the update-tests-metadata CI stage.  This should
make each spec run faster and also reduce the number of PostgreSQL
connections needed by concurrent CI builds.

`scripts/insert-rspec-profiling-data` also inserts one file at a time
via the PostgreSQL COPY command for faster inserts. The one side effect
is that the `created_at` and `updated_at` timestamps aren't available
since they aren't generated in the CSV.

Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/10154
parent 32056fc7
...@@ -66,6 +66,7 @@ stages: ...@@ -66,6 +66,7 @@ stages:
paths: paths:
- knapsack/ - knapsack/
- rspec_flaky/ - rspec_flaky/
- rspec_profiling/
.use-pg: &use-pg .use-pg: &use-pg
services: services:
...@@ -159,6 +160,7 @@ stages: ...@@ -159,6 +160,7 @@ stages:
- coverage/ - coverage/
- knapsack/ - knapsack/
- rspec_flaky/ - rspec_flaky/
- rspec_profiling/
- tmp/capybara/ - tmp/capybara/
reports: reports:
junit: junit_rspec.xml junit: junit_rspec.xml
...@@ -336,6 +338,7 @@ retrieve-tests-metadata: ...@@ -336,6 +338,7 @@ retrieve-tests-metadata:
- wget -O $KNAPSACK_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$KNAPSACK_RSPEC_SUITE_REPORT_PATH || rm $KNAPSACK_RSPEC_SUITE_REPORT_PATH - wget -O $KNAPSACK_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$KNAPSACK_RSPEC_SUITE_REPORT_PATH || rm $KNAPSACK_RSPEC_SUITE_REPORT_PATH
- '[[ -f $KNAPSACK_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${KNAPSACK_RSPEC_SUITE_REPORT_PATH}' - '[[ -f $KNAPSACK_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${KNAPSACK_RSPEC_SUITE_REPORT_PATH}'
- mkdir -p rspec_flaky/ - mkdir -p rspec_flaky/
- mkdir -p rspec_profiling/
- wget -O $FLAKY_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$FLAKY_RSPEC_SUITE_REPORT_PATH || rm $FLAKY_RSPEC_SUITE_REPORT_PATH - wget -O $FLAKY_RSPEC_SUITE_REPORT_PATH http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/$FLAKY_RSPEC_SUITE_REPORT_PATH || rm $FLAKY_RSPEC_SUITE_REPORT_PATH
- '[[ -f $FLAKY_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${FLAKY_RSPEC_SUITE_REPORT_PATH}' - '[[ -f $FLAKY_RSPEC_SUITE_REPORT_PATH ]] || echo "{}" > ${FLAKY_RSPEC_SUITE_REPORT_PATH}'
...@@ -350,7 +353,7 @@ update-tests-metadata: ...@@ -350,7 +353,7 @@ update-tests-metadata:
- rspec_flaky/ - rspec_flaky/
policy: push policy: push
script: script:
- retry gem install fog-aws mime-types activesupport --no-document - retry gem install fog-aws mime-types activesupport rspec_profiling postgres-copy --no-document
- scripts/merge-reports ${KNAPSACK_RSPEC_SUITE_REPORT_PATH} knapsack/${CI_PROJECT_NAME}/rspec-pg_node_*.json - scripts/merge-reports ${KNAPSACK_RSPEC_SUITE_REPORT_PATH} knapsack/${CI_PROJECT_NAME}/rspec-pg_node_*.json
- scripts/merge-reports ${FLAKY_RSPEC_SUITE_REPORT_PATH} rspec_flaky/all_*_*.json - scripts/merge-reports ${FLAKY_RSPEC_SUITE_REPORT_PATH} rspec_flaky/all_*_*.json
- FLAKY_RSPEC_GENERATE_REPORT=1 scripts/prune-old-flaky-specs ${FLAKY_RSPEC_SUITE_REPORT_PATH} - FLAKY_RSPEC_GENERATE_REPORT=1 scripts/prune-old-flaky-specs ${FLAKY_RSPEC_SUITE_REPORT_PATH}
...@@ -358,6 +361,7 @@ update-tests-metadata: ...@@ -358,6 +361,7 @@ update-tests-metadata:
- '[[ -z ${TESTS_METADATA_S3_BUCKET} ]] || scripts/sync-reports put $TESTS_METADATA_S3_BUCKET $FLAKY_RSPEC_SUITE_REPORT_PATH' - '[[ -z ${TESTS_METADATA_S3_BUCKET} ]] || scripts/sync-reports put $TESTS_METADATA_S3_BUCKET $FLAKY_RSPEC_SUITE_REPORT_PATH'
- rm -f knapsack/${CI_PROJECT_NAME}/*_node_*.json - rm -f knapsack/${CI_PROJECT_NAME}/*_node_*.json
- rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json - rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json
- scripts/insert-rspec-profiling-data
flaky-examples-check: flaky-examples-check:
<<: *dedicated-runner <<: *dedicated-runner
......
# frozen_string_literal: true
return unless Rails.env.test?
module RspecProfilingExt module RspecProfilingExt
module PSQL module Collectors
def establish_connection class CSVWithTimestamps < ::RspecProfiling::Collectors::CSV
::RspecProfiling::Collectors::PSQL::Result.establish_connection(ENV['RSPEC_PROFILING_POSTGRES_URL']) TIMESTAMP_FIELDS = %w(created_at updated_at).freeze
HEADERS = (::RspecProfiling::Collectors::CSV::HEADERS + TIMESTAMP_FIELDS).freeze
def insert(attributes)
output << HEADERS.map do |field|
if TIMESTAMP_FIELDS.include?(field)
Time.now
else
attributes.fetch(field.to_sym)
end
end
end
private
def output
@output ||= ::CSV.open(path, "w").tap { |csv| csv << HEADERS }
end
end end
end end
...@@ -10,8 +31,12 @@ module RspecProfilingExt ...@@ -10,8 +31,12 @@ module RspecProfilingExt
if ENV['CI_COMMIT_REF_NAME'] if ENV['CI_COMMIT_REF_NAME']
"#{defined?(Gitlab::License) ? 'ee' : 'ce'}:#{ENV['CI_COMMIT_REF_NAME']}" "#{defined?(Gitlab::License) ? 'ee' : 'ce'}:#{ENV['CI_COMMIT_REF_NAME']}"
else else
super super&.chomp
end
end end
def sha
super&.chomp
end end
end end
...@@ -30,16 +55,11 @@ module RspecProfilingExt ...@@ -30,16 +55,11 @@ module RspecProfilingExt
end end
end end
if Rails.env.test? RspecProfiling.configure do |config|
RspecProfiling.configure do |config| if ENV.key?('CI') || ENV.key?('RSPEC_PROFILING')
if ENV['RSPEC_PROFILING_POSTGRES_URL'].present?
RspecProfiling::Collectors::PSQL.prepend(RspecProfilingExt::PSQL)
config.collector = RspecProfiling::Collectors::PSQL
end
if ENV.key?('CI')
RspecProfiling::VCS::Git.prepend(RspecProfilingExt::Git) RspecProfiling::VCS::Git.prepend(RspecProfilingExt::Git)
RspecProfiling::Run.prepend(RspecProfilingExt::Run) RspecProfiling::Run.prepend(RspecProfilingExt::Run)
end config.collector = RspecProfilingExt::Collectors::CSVWithTimestamps
config.csv_path = -> { "rspec_profiling/#{Time.now.to_i}-#{SecureRandom.hex(8)}-rspec-data.csv" }
end end
end end
#!/usr/bin/env ruby
require 'csv'
require 'rspec_profiling'
require 'postgres-copy'
module RspecProfiling
module Collectors
class PSQL
def establish_connection
# This disables the automatic creation of the database and
# table. In the future, we may want a way to specify the host of
# the database to connect so that we can call #install.
Result.establish_connection(results_url)
end
def prepared?
connection.data_source_exists?(table)
end
def results_url
ENV['RSPEC_PROFILING_POSTGRES_URL']
end
class Result < ActiveRecord::Base
acts_as_copy_target
end
end
end
end
def insert_data(path)
puts "#{Time.now} Inserting CI stats..."
collector = RspecProfiling::Collectors::PSQL.new
collector.install
files = Dir[File.join(path, "*.csv")]
files.each do |filename|
puts "#{Time.now} Inserting #{filename}..."
result = RspecProfiling::Collectors::PSQL::Result.copy_from(filename)
puts "#{Time.now} Inserted #{result.cmd_tuples} lines in #{filename}, DB response: #{result.cmd_status}"
end
end
insert_data('rspec_profiling') if ENV['RSPEC_PROFILING_POSTGRES_URL'].present?
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment