Commit 40919c67 authored by Rémy Coutable's avatar Rémy Coutable

Download Knapsack and flaky specs report from latest master artifacts

This dogfood artifacts and download the Knapsack report, flaky specs
report, and Crystalball report from the latest successful master
pipeline triggered by @gitlab-bot (this should be a scheduled pipeline
for which we're sure update-tests-metadata was run).

This also remove the need to upload these reports to S3 since they're
already uploaded as artifacts.
Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent f3c9c0e1
.tests-metadata-state: .tests-metadata-state:
variables: image: ruby:2.7
TESTS_METADATA_S3_BUCKET: "gitlab-ce-cache"
before_script: before_script:
- source scripts/utils.sh - source scripts/utils.sh
artifacts: artifacts:
...@@ -17,7 +16,8 @@ retrieve-tests-metadata: ...@@ -17,7 +16,8 @@ retrieve-tests-metadata:
- .test-metadata:rules:retrieve-tests-metadata - .test-metadata:rules:retrieve-tests-metadata
stage: prepare stage: prepare
script: script:
- source scripts/rspec_helpers.sh - install_api_client_dependencies_with_apt
- source ./scripts/rspec_helpers.sh
- retrieve_tests_metadata - retrieve_tests_metadata
update-tests-metadata: update-tests-metadata:
......
...@@ -12,8 +12,8 @@ Our current CI parallelization setup is as follows: ...@@ -12,8 +12,8 @@ Our current CI parallelization setup is as follows:
1. The `retrieve-tests-metadata` job in the `prepare` stage ensures we have a 1. The `retrieve-tests-metadata` job in the `prepare` stage ensures we have a
`knapsack/report-master.json` file: `knapsack/report-master.json` file:
- The `knapsack/report-master.json` file is fetched from S3, if it's not here - The `knapsack/report-master.json` file is fetched from the latest `master` pipeline which runs `update-tests-metadata`
we initialize the file with `{}`. (for now it's the 2-hourly scheduled master pipeline), if it's not here we initialize the file with `{}`.
1. Each `[rspec|rspec-ee] [unit|integration|system|geo] n m` job are run with 1. Each `[rspec|rspec-ee] [unit|integration|system|geo] n m` job are run with
`knapsack rspec` and should have an evenly distributed share of tests: `knapsack rspec` and should have an evenly distributed share of tests:
- It works because the jobs have access to the `knapsack/report-master.json` - It works because the jobs have access to the `knapsack/report-master.json`
...@@ -25,7 +25,7 @@ Our current CI parallelization setup is as follows: ...@@ -25,7 +25,7 @@ Our current CI parallelization setup is as follows:
1. The `update-tests-metadata` job (which only runs on scheduled pipelines for 1. The `update-tests-metadata` job (which only runs on scheduled pipelines for
[the canonical project](https://gitlab.com/gitlab-org/gitlab) takes all the [the canonical project](https://gitlab.com/gitlab-org/gitlab) takes all the
`knapsack/rspec*_pg_*.json` files and merge them all together into a single `knapsack/rspec*_pg_*.json` files and merge them all together into a single
`knapsack/report-master.json` file that is then uploaded to S3. `knapsack/report-master.json` file that is saved as artifact.
After that, the next pipeline will use the up-to-date `knapsack/report-master.json` file. After that, the next pipeline will use the up-to-date `knapsack/report-master.json` file.
......
#!/usr/bin/env bash #!/usr/bin/env bash
function retrieve_tests_metadata() { function retrieve_tests_metadata() {
mkdir -p knapsack/ rspec_flaky/ rspec_profiling/ mkdir -p crystalball/ knapsack/ rspec_flaky/ rspec_profiling/
local project_path="gitlab-org%2Fgitlab"
local latest_scheduled_master_pipeline_id
local job_id
latest_scheduled_master_pipeline_id=$(get_pipelines "${project_path}" "status=success&ref=master&username=gitlab-bot" | jq "first | .id")
job_id=$(get_job_id "${project_path}" "${latest_scheduled_master_pipeline_id}" "update-tests-metadata" "scope=success")
if [[ ! -f "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" ]]; then if [[ ! -f "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" get_job_artifact "${project_path}" "${job_id}" "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
fi fi
if [[ ! -f "${FLAKY_RSPEC_SUITE_REPORT_PATH}" ]]; then if [[ ! -f "${FLAKY_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${FLAKY_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}" get_job_artifact "${project_path}" "${job_id}" "${FLAKY_RSPEC_SUITE_REPORT_PATH}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
fi fi
# Disabled for now
# if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
# (get_job_artifact "${project_path}" "${job_id}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
# fi
#
# scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
} }
function update_tests_metadata() { function update_tests_metadata() {
echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
scripts/merge-reports "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" knapsack/rspec*.json scripts/merge-reports "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" knapsack/rspec*.json
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uplaoding report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f knapsack/rspec*.json rm -f knapsack/rspec*.json
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
export FLAKY_RSPEC_GENERATE_REPORT="true" export FLAKY_RSPEC_GENERATE_REPORT="true"
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
scripts/flaky_examples/prune-old-flaky-examples "${FLAKY_RSPEC_SUITE_REPORT_PATH}" scripts/flaky_examples/prune-old-flaky-examples "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
if [[ -n ${TESTS_METADATA_S3_BUCKET} ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
...@@ -48,16 +43,6 @@ function update_tests_metadata() { ...@@ -48,16 +43,6 @@ function update_tests_metadata() {
fi fi
} }
function retrieve_tests_mapping() {
mkdir -p crystalball/
if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
(wget -O "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
fi
scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_mapping() { function update_tests_mapping() {
if ! crystalball_rspec_data_exists; then if ! crystalball_rspec_data_exists; then
echo "No crystalball rspec data found." echo "No crystalball rspec data found."
...@@ -65,19 +50,8 @@ function update_tests_mapping() { ...@@ -65,19 +50,8 @@ function update_tests_mapping() {
fi fi
scripts/generate-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" crystalball/rspec*.yml scripts/generate-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" crystalball/rspec*.yml
scripts/pack-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}" scripts/pack-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
gzip "${RSPEC_PACKED_TESTS_MAPPING_PATH}" gzip "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f crystalball/rspec*.yml rm -f crystalball/rspec*.yml
} }
......
...@@ -87,24 +87,38 @@ function echosuccess() { ...@@ -87,24 +87,38 @@ function echosuccess() {
fi fi
} }
function get_job_id() { function get_pipelines() {
local job_name="${1}" local project_id="${1}"
local query_string="${2:+&${2}}" local query_string="${2:+&${2}}"
local url="https://gitlab.com/api/v4/projects/${project_id}/pipelines?per_page=100${query_string}"
echoinfo "GET ${url}"
curl --silent --show-error "${url}"
}
function get_job_id() {
local project_id="${1}"
local pipeline_id="${2}"
local job_name="${3}"
local query_string="${4:+&${4}}"
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}" local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
if [ -z "${api_token}" ]; then local curl_opts
echoerr "Please provide an API token with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN." if [ -n "${api_token}" ]; then
return curl_opts="--header 'PRIVATE-TOKEN: ${api_token}'"
else
echoinfo "No API token given with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
fi fi
local max_page=3 local max_page=3
local page=1 local page=1
while true; do while true; do
local url="https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/jobs?per_page=100&page=${page}${query_string}" local url="https://gitlab.com/api/v4/projects/${project_id}/pipelines/${pipeline_id}/jobs?per_page=100&page=${page}${query_string}"
echoinfo "GET ${url}" echoinfo "GET ${url}"
local job_id local job_id
job_id=$(curl --silent --show-error --header "PRIVATE-TOKEN: ${api_token}" "${url}" | jq "map(select(.name == \"${job_name}\")) | map(.id) | last") job_id=$(curl --silent --show-error ${curl_opts} "${url}" | jq "map(select(.name == \"${job_name}\")) | map(.id) | last")
[[ "${job_id}" == "null" && "${page}" -lt "$max_page" ]] || break [[ "${job_id}" == "null" && "${page}" -lt "$max_page" ]] || break
let "page++" let "page++"
...@@ -118,10 +132,21 @@ function get_job_id() { ...@@ -118,10 +132,21 @@ function get_job_id() {
fi fi
} }
function get_job_artifact() {
local project_id="${1}"
local job_id="${2}"
local artifact_path="${3}"
local url="https://gitlab.com/api/v4/projects/${project_id}/jobs/${job_id}/artifacts/${artifact_path}"
echoinfo "GET ${url}"
curl --silent --show-error "${url}"
}
function play_job() { function play_job() {
local job_name="${1}" local job_name="${1}"
local job_id local job_id
job_id=$(get_job_id "${job_name}" "scope=manual"); job_id=$(get_job_id "${CI_PROJECT_ID}" "${CI_PIPELINE_ID}" "${job_name}" "scope=manual");
if [ -z "${job_id}" ]; then return; fi if [ -z "${job_id}" ]; then return; fi
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}" local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
...@@ -140,7 +165,7 @@ function play_job() { ...@@ -140,7 +165,7 @@ function play_job() {
function fail_pipeline_early() { function fail_pipeline_early() {
local dont_interrupt_me_job_id local dont_interrupt_me_job_id
dont_interrupt_me_job_id=$(get_job_id 'dont-interrupt-me' 'scope=success') dont_interrupt_me_job_id=$(get_job_id "${CI_PROJECT_ID}" "${CI_PIPELINE_ID}" "dont-interrupt-me" "scope=success")
if [[ -n "${dont_interrupt_me_job_id}" ]]; then if [[ -n "${dont_interrupt_me_job_id}" ]]; then
echoinfo "This pipeline cannot be interrupted due to \`dont-interrupt-me\` job ${dont_interrupt_me_job_id}" echoinfo "This pipeline cannot be interrupted due to \`dont-interrupt-me\` job ${dont_interrupt_me_job_id}"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment