Commit 40919c67 authored by Rémy Coutable's avatar Rémy Coutable

Download Knapsack and flaky specs report from latest master artifacts

This dogfood artifacts and download the Knapsack report, flaky specs
report, and Crystalball report from the latest successful master
pipeline triggered by @gitlab-bot (this should be a scheduled pipeline
for which we're sure update-tests-metadata was run).

This also remove the need to upload these reports to S3 since they're
already uploaded as artifacts.
Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent f3c9c0e1
.tests-metadata-state:
variables:
TESTS_METADATA_S3_BUCKET: "gitlab-ce-cache"
image: ruby:2.7
before_script:
- source scripts/utils.sh
artifacts:
......@@ -17,7 +16,8 @@ retrieve-tests-metadata:
- .test-metadata:rules:retrieve-tests-metadata
stage: prepare
script:
- source scripts/rspec_helpers.sh
- install_api_client_dependencies_with_apt
- source ./scripts/rspec_helpers.sh
- retrieve_tests_metadata
update-tests-metadata:
......
......@@ -12,8 +12,8 @@ Our current CI parallelization setup is as follows:
1. The `retrieve-tests-metadata` job in the `prepare` stage ensures we have a
`knapsack/report-master.json` file:
- The `knapsack/report-master.json` file is fetched from S3, if it's not here
we initialize the file with `{}`.
- The `knapsack/report-master.json` file is fetched from the latest `master` pipeline which runs `update-tests-metadata`
(for now it's the 2-hourly scheduled master pipeline), if it's not here we initialize the file with `{}`.
1. Each `[rspec|rspec-ee] [unit|integration|system|geo] n m` job are run with
`knapsack rspec` and should have an evenly distributed share of tests:
- It works because the jobs have access to the `knapsack/report-master.json`
......@@ -25,7 +25,7 @@ Our current CI parallelization setup is as follows:
1. The `update-tests-metadata` job (which only runs on scheduled pipelines for
[the canonical project](https://gitlab.com/gitlab-org/gitlab) takes all the
`knapsack/rspec*_pg_*.json` files and merge them all together into a single
`knapsack/report-master.json` file that is then uploaded to S3.
`knapsack/report-master.json` file that is saved as artifact.
After that, the next pipeline will use the up-to-date `knapsack/report-master.json` file.
......
#!/usr/bin/env bash
function retrieve_tests_metadata() {
mkdir -p knapsack/ rspec_flaky/ rspec_profiling/
mkdir -p crystalball/ knapsack/ rspec_flaky/ rspec_profiling/
local project_path="gitlab-org%2Fgitlab"
local latest_scheduled_master_pipeline_id
local job_id
latest_scheduled_master_pipeline_id=$(get_pipelines "${project_path}" "status=success&ref=master&username=gitlab-bot" | jq "first | .id")
job_id=$(get_job_id "${project_path}" "${latest_scheduled_master_pipeline_id}" "update-tests-metadata" "scope=success")
if [[ ! -f "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
get_job_artifact "${project_path}" "${job_id}" "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
fi
if [[ ! -f "${FLAKY_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${FLAKY_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
get_job_artifact "${project_path}" "${job_id}" "${FLAKY_RSPEC_SUITE_REPORT_PATH}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
fi
# Disabled for now
# if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
# (get_job_artifact "${project_path}" "${job_id}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
# fi
#
# scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_metadata() {
echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
scripts/merge-reports "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" knapsack/rspec*.json
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uplaoding report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f knapsack/rspec*.json
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
export FLAKY_RSPEC_GENERATE_REPORT="true"
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
scripts/flaky_examples/prune-old-flaky-examples "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
if [[ -n ${TESTS_METADATA_S3_BUCKET} ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
......@@ -48,16 +43,6 @@ function update_tests_metadata() {
fi
}
function retrieve_tests_mapping() {
mkdir -p crystalball/
if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
(wget -O "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
fi
scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_mapping() {
if ! crystalball_rspec_data_exists; then
echo "No crystalball rspec data found."
......@@ -65,19 +50,8 @@ function update_tests_mapping() {
fi
scripts/generate-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" crystalball/rspec*.yml
scripts/pack-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
gzip "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f crystalball/rspec*.yml
}
......
......@@ -87,24 +87,38 @@ function echosuccess() {
fi
}
function get_job_id() {
local job_name="${1}"
function get_pipelines() {
local project_id="${1}"
local query_string="${2:+&${2}}"
local url="https://gitlab.com/api/v4/projects/${project_id}/pipelines?per_page=100${query_string}"
echoinfo "GET ${url}"
curl --silent --show-error "${url}"
}
function get_job_id() {
local project_id="${1}"
local pipeline_id="${2}"
local job_name="${3}"
local query_string="${4:+&${4}}"
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
if [ -z "${api_token}" ]; then
echoerr "Please provide an API token with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
return
local curl_opts
if [ -n "${api_token}" ]; then
curl_opts="--header 'PRIVATE-TOKEN: ${api_token}'"
else
echoinfo "No API token given with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
fi
local max_page=3
local page=1
while true; do
local url="https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/jobs?per_page=100&page=${page}${query_string}"
local url="https://gitlab.com/api/v4/projects/${project_id}/pipelines/${pipeline_id}/jobs?per_page=100&page=${page}${query_string}"
echoinfo "GET ${url}"
local job_id
job_id=$(curl --silent --show-error --header "PRIVATE-TOKEN: ${api_token}" "${url}" | jq "map(select(.name == \"${job_name}\")) | map(.id) | last")
job_id=$(curl --silent --show-error ${curl_opts} "${url}" | jq "map(select(.name == \"${job_name}\")) | map(.id) | last")
[[ "${job_id}" == "null" && "${page}" -lt "$max_page" ]] || break
let "page++"
......@@ -118,10 +132,21 @@ function get_job_id() {
fi
}
function get_job_artifact() {
local project_id="${1}"
local job_id="${2}"
local artifact_path="${3}"
local url="https://gitlab.com/api/v4/projects/${project_id}/jobs/${job_id}/artifacts/${artifact_path}"
echoinfo "GET ${url}"
curl --silent --show-error "${url}"
}
function play_job() {
local job_name="${1}"
local job_id
job_id=$(get_job_id "${job_name}" "scope=manual");
job_id=$(get_job_id "${CI_PROJECT_ID}" "${CI_PIPELINE_ID}" "${job_name}" "scope=manual");
if [ -z "${job_id}" ]; then return; fi
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
......@@ -140,7 +165,7 @@ function play_job() {
function fail_pipeline_early() {
local dont_interrupt_me_job_id
dont_interrupt_me_job_id=$(get_job_id 'dont-interrupt-me' 'scope=success')
dont_interrupt_me_job_id=$(get_job_id "${CI_PROJECT_ID}" "${CI_PIPELINE_ID}" "dont-interrupt-me" "scope=success")
if [[ -n "${dont_interrupt_me_job_id}" ]]; then
echoinfo "This pipeline cannot be interrupted due to \`dont-interrupt-me\` job ${dont_interrupt_me_job_id}"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment