Commit 51011d1b authored by Lin Jen-Shin's avatar Lin Jen-Shin

Merge branch 'improve-review-apps-cleanup-when-previous-deployment-failed' into 'master'

Improve Review Apps cleanup when previous deployment failed by only issuing an `helm delete` command

Closes #63639 and #62161

See merge request gitlab-org/gitlab-ce!28661
parents 546355f7 e2669110
...@@ -77,6 +77,7 @@ schedule:review-build-cng: ...@@ -77,6 +77,7 @@ schedule:review-build-cng:
.review-deploy-base: &review-deploy-base .review-deploy-base: &review-deploy-base
<<: *review-base <<: *review-base
allow_failure: true allow_failure: true
retry: 2
stage: review stage: review
variables: variables:
HOST_SUFFIX: "${CI_ENVIRONMENT_SLUG}" HOST_SUFFIX: "${CI_ENVIRONMENT_SLUG}"
...@@ -95,10 +96,16 @@ schedule:review-build-cng: ...@@ -95,10 +96,16 @@ schedule:review-build-cng:
- install_api_client_dependencies_with_apk - install_api_client_dependencies_with_apk
- source scripts/review_apps/review-apps.sh - source scripts/review_apps/review-apps.sh
script: script:
- perform_review_app_deployment - check_kube_domain
- ensure_namespace
- install_tiller
- install_external_dns
- download_chart
- deploy || display_deployment_debug
- wait_for_review_app_to_be_accessible
- add_license
artifacts: artifacts:
paths: paths: [review_app_url.txt]
- review_app_url.txt
expire_in: 2 days expire_in: 2 days
when: always when: always
...@@ -108,8 +115,6 @@ review-deploy: ...@@ -108,8 +115,6 @@ review-deploy:
schedule:review-deploy: schedule:review-deploy:
<<: *review-deploy-base <<: *review-deploy-base
<<: *review-schedules-only <<: *review-schedules-only
script:
- perform_review_app_deployment
review-stop: review-stop:
<<: *review-base <<: *review-base
...@@ -124,11 +129,11 @@ review-stop: ...@@ -124,11 +129,11 @@ review-stop:
script: script:
- source scripts/review_apps/review-apps.sh - source scripts/review_apps/review-apps.sh
- delete - delete
- cleanup
.review-qa-base: &review-qa-base .review-qa-base: &review-qa-base
<<: *review-docker <<: *review-docker
allow_failure: true allow_failure: true
retry: 2
stage: qa stage: qa
variables: variables:
<<: *review-docker-variables <<: *review-docker-variables
......
[[ "$TRACE" ]] && set -x [[ "$TRACE" ]] && set -x
export TILLER_NAMESPACE="$KUBE_NAMESPACE" export TILLER_NAMESPACE="$KUBE_NAMESPACE"
function deployExists() { function deploy_exists() {
local namespace="${1}" local namespace="${1}"
local deploy="${2}" local deploy="${2}"
echoinfo "Checking if ${deploy} exists in the ${namespace} namespace..." true echoinfo "Checking if ${deploy} exists in the ${namespace} namespace..." true
...@@ -13,8 +13,7 @@ function deployExists() { ...@@ -13,8 +13,7 @@ function deployExists() {
return $deploy_exists return $deploy_exists
} }
function previousDeployFailed() { function previous_deploy_failed() {
set +e
local deploy="${1}" local deploy="${1}"
echoinfo "Checking for previous deployment of ${deploy}" true echoinfo "Checking for previous deployment of ${deploy}" true
...@@ -34,7 +33,6 @@ function previousDeployFailed() { ...@@ -34,7 +33,6 @@ function previousDeployFailed() {
else else
echoerr "Previous deployment NOT found." echoerr "Previous deployment NOT found."
fi fi
set -e
return $status return $status
} }
...@@ -51,49 +49,35 @@ function delete() { ...@@ -51,49 +49,35 @@ function delete() {
helm delete --purge "$name" helm delete --purge "$name"
} }
function cleanup() {
if [ -z "$CI_ENVIRONMENT_SLUG" ]; then
echoerr "No release given, aborting the delete!"
return
fi
echoinfo "Cleaning up '$CI_ENVIRONMENT_SLUG'..." true
kubectl -n "$KUBE_NAMESPACE" delete \
ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa \
--now --ignore-not-found --include-uninitialized \
-l release="$CI_ENVIRONMENT_SLUG"
}
function get_pod() { function get_pod() {
local app_name="${1}" local app_name="${1}"
local status="${2-Running}" local status="${2-Running}"
get_pod_cmd="kubectl get pods -n ${KUBE_NAMESPACE} --field-selector=status.phase=${status} -lapp=${app_name},release=${CI_ENVIRONMENT_SLUG} --no-headers -o=custom-columns=NAME:.metadata.name" get_pod_cmd="kubectl get pods -n ${KUBE_NAMESPACE} --field-selector=status.phase=${status} -lapp=${app_name},release=${CI_ENVIRONMENT_SLUG} --no-headers -o=custom-columns=NAME:.metadata.name"
echoinfo "Running '${get_pod_cmd}'" true echoinfo "Waiting till '${app_name}' pod is ready" true
echoinfo "Running '${get_pod_cmd}'"
local interval=5
local elapsed_seconds=0
local max_seconds=$((2 * 60))
while true; do while true; do
local pod_name local pod_name
pod_name="$(eval "${get_pod_cmd}")" pod_name="$(eval "${get_pod_cmd}")"
[[ "${pod_name}" == "" ]] || break [[ "${pod_name}" == "" ]] || break
echoinfo "Waiting till '${app_name}' pod is ready"; if [[ "${elapsed_seconds}" -gt "${max_seconds}" ]]; then
sleep 5; echoerr "The pod name couldn't be found after ${elapsed_seconds} seconds, aborting."
break
fi
printf "."
let "elapsed_seconds+=interval"
sleep ${interval}
done done
echoinfo "The pod name is '${pod_name}'." echoinfo "The pod name is '${pod_name}'."
echo "${pod_name}" echo "${pod_name}"
} }
function perform_review_app_deployment() {
check_kube_domain
ensure_namespace
install_tiller
install_external_dns
time deploy
wait_for_review_app_to_be_accessible
add_license
}
function check_kube_domain() { function check_kube_domain() {
echoinfo "Checking that Kube domain exists..." true echoinfo "Checking that Kube domain exists..." true
...@@ -119,9 +103,16 @@ function install_tiller() { ...@@ -119,9 +103,16 @@ function install_tiller() {
echoinfo "Initiating the Helm client..." echoinfo "Initiating the Helm client..."
helm init --client-only helm init --client-only
# Set toleration for Tiller to be installed on a specific node pool
helm init \ helm init \
--wait \
--upgrade \ --upgrade \
--replicas 2 --node-selectors "app=helm" \
--replicas 3 \
--override "spec.template.spec.tolerations[0].key"="dedicated" \
--override "spec.template.spec.tolerations[0].operator"="Equal" \
--override "spec.template.spec.tolerations[0].value"="helm" \
--override "spec.template.spec.tolerations[0].effect"="NoSchedule"
kubectl rollout status -n "$TILLER_NAMESPACE" -w "deployment/tiller-deploy" kubectl rollout status -n "$TILLER_NAMESPACE" -w "deployment/tiller-deploy"
...@@ -137,7 +128,7 @@ function install_external_dns() { ...@@ -137,7 +128,7 @@ function install_external_dns() {
domain=$(echo "${REVIEW_APPS_DOMAIN}" | awk -F. '{printf "%s.%s", $(NF-1), $NF}') domain=$(echo "${REVIEW_APPS_DOMAIN}" | awk -F. '{printf "%s.%s", $(NF-1), $NF}')
echoinfo "Installing external DNS for domain ${domain}..." true echoinfo "Installing external DNS for domain ${domain}..." true
if ! deployExists "${KUBE_NAMESPACE}" "${release_name}" || previousDeployFailed "${release_name}" ; then if ! deploy_exists "${KUBE_NAMESPACE}" "${release_name}" || previous_deploy_failed "${release_name}" ; then
echoinfo "Installing external-dns Helm chart" echoinfo "Installing external-dns Helm chart"
helm repo update helm repo update
helm install stable/external-dns \ helm install stable/external-dns \
...@@ -156,7 +147,7 @@ function install_external_dns() { ...@@ -156,7 +147,7 @@ function install_external_dns() {
fi fi
} }
function create_secret() { function create_application_secret() {
echoinfo "Creating the ${CI_ENVIRONMENT_SLUG}-gitlab-initial-root-password secret in the ${KUBE_NAMESPACE} namespace..." true echoinfo "Creating the ${CI_ENVIRONMENT_SLUG}-gitlab-initial-root-password secret in the ${KUBE_NAMESPACE} namespace..." true
kubectl create secret generic -n "$KUBE_NAMESPACE" \ kubectl create secret generic -n "$KUBE_NAMESPACE" \
...@@ -165,7 +156,7 @@ function create_secret() { ...@@ -165,7 +156,7 @@ function create_secret() {
--dry-run -o json | kubectl apply -f - --dry-run -o json | kubectl apply -f -
} }
function download_gitlab_chart() { function download_chart() {
echoinfo "Downloading the GitLab chart..." true echoinfo "Downloading the GitLab chart..." true
curl -o gitlab.tar.bz2 "https://gitlab.com/charts/gitlab/-/archive/${GITLAB_HELM_CHART_REF}/gitlab-${GITLAB_HELM_CHART_REF}.tar.bz2" curl -o gitlab.tar.bz2 "https://gitlab.com/charts/gitlab/-/archive/${GITLAB_HELM_CHART_REF}/gitlab-${GITLAB_HELM_CHART_REF}.tar.bz2"
...@@ -194,14 +185,12 @@ function deploy() { ...@@ -194,14 +185,12 @@ function deploy() {
gitlab_workhorse_image_repository="${IMAGE_REPOSITORY}/gitlab-workhorse-${IMAGE_VERSION}" gitlab_workhorse_image_repository="${IMAGE_REPOSITORY}/gitlab-workhorse-${IMAGE_VERSION}"
# Cleanup and previous installs, as FAILED and PENDING_UPGRADE will cause errors with `upgrade` # Cleanup and previous installs, as FAILED and PENDING_UPGRADE will cause errors with `upgrade`
if [ "$CI_ENVIRONMENT_SLUG" != "production" ] && previousDeployFailed "$CI_ENVIRONMENT_SLUG" ; then if [ "$CI_ENVIRONMENT_SLUG" != "production" ] && previous_deploy_failed "$CI_ENVIRONMENT_SLUG" ; then
echo "Deployment in bad state, cleaning up $CI_ENVIRONMENT_SLUG" echo "Deployment in bad state, cleaning up $CI_ENVIRONMENT_SLUG"
delete delete
cleanup
fi fi
create_secret create_application_secret
download_gitlab_chart
HELM_CMD=$(cat << EOF HELM_CMD=$(cat << EOF
helm upgrade --install \ helm upgrade --install \
...@@ -216,7 +205,7 @@ HELM_CMD=$(cat << EOF ...@@ -216,7 +205,7 @@ HELM_CMD=$(cat << EOF
--set prometheus.install=false \ --set prometheus.install=false \
--set global.ingress.configureCertmanager=false \ --set global.ingress.configureCertmanager=false \
--set global.ingress.tls.secretName=tls-cert \ --set global.ingress.tls.secretName=tls-cert \
--set global.ingress.annotations."external-dns\.alpha\.kubernetes\.io/ttl"="10" --set global.ingress.annotations."external-dns\.alpha\.kubernetes\.io/ttl"="10" \
--set nginx-ingress.controller.service.enableHttp=false \ --set nginx-ingress.controller.service.enableHttp=false \
--set nginx-ingress.defaultBackend.resources.requests.memory=7Mi \ --set nginx-ingress.defaultBackend.resources.requests.memory=7Mi \
--set nginx-ingress.controller.resources.requests.memory=440M \ --set nginx-ingress.controller.resources.requests.memory=440M \
...@@ -252,14 +241,35 @@ EOF ...@@ -252,14 +241,35 @@ EOF
echoinfo "Deploying with:" echoinfo "Deploying with:"
echoinfo "${HELM_CMD}" echoinfo "${HELM_CMD}"
eval $HELM_CMD || true eval "${HELM_CMD}"
}
function display_deployment_debug() {
migrations_pod=$(get_pod "migrations");
if [ -z "${migrations_pod}" ]; then
echoerr "Migrations pod not found."
else
echoinfo "Logs tail of the ${migrations_pod} pod..."
kubectl logs -n "$KUBE_NAMESPACE" "${migrations_pod}" | sed "s/${REVIEW_APPS_ROOT_PASSWORD}/[REDACTED]/g"
fi
unicorn_pod=$(get_pod "unicorn");
if [ -z "${unicorn_pod}" ]; then
echoerr "Unicorn pod not found."
else
echoinfo "Logs tail of the ${unicorn_pod} pod..."
kubectl logs -n "$KUBE_NAMESPACE" -c unicorn "${unicorn_pod}" | sed "s/${REVIEW_APPS_ROOT_PASSWORD}/[REDACTED]/g"
fi
} }
function wait_for_review_app_to_be_accessible() { function wait_for_review_app_to_be_accessible() {
# In case the Review App isn't completely available yet. Keep trying for 5 minutes. echoinfo "Waiting for the Review App at ${CI_ENVIRONMENT_URL} to be accessible..." true
local interval=5 local interval=5
local elapsed_seconds=0 local elapsed_seconds=0
local max_seconds=$((5 * 60)) local max_seconds=$((2 * 60))
while true; do while true; do
local review_app_http_code local review_app_http_code
review_app_http_code=$(curl --silent --output /dev/null --max-time 5 --write-out "%{http_code}" "${CI_ENVIRONMENT_URL}/users/sign_in") review_app_http_code=$(curl --silent --output /dev/null --max-time 5 --write-out "%{http_code}" "${CI_ENVIRONMENT_URL}/users/sign_in")
...@@ -272,10 +282,10 @@ function wait_for_review_app_to_be_accessible() { ...@@ -272,10 +282,10 @@ function wait_for_review_app_to_be_accessible() {
sleep ${interval} sleep ${interval}
done done
if [[ "${review_app_http_code}" == "200" ]]; then if [[ "${review_app_http_code}" -eq "200" ]]; then
echoinfo "The Review App at ${CI_ENVIRONMENT_URL} is ready!" echoinfo "The Review App at ${CI_ENVIRONMENT_URL} is ready after ${elapsed_seconds} seconds!"
else else
echoerr "The Review App at ${CI_ENVIRONMENT_URL} isn't ready after 5 minutes of polling..." echoerr "The Review App at ${CI_ENVIRONMENT_URL} isn't ready after ${max_seconds} seconds of polling..."
exit 1 exit 1
fi fi
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment