From 8c67f885710d9b09a4674d98014866a1e6304c5e Mon Sep 17 00:00:00 2001 From: Alain Takoudjou <alain.takoudjou@nexedi.com> Date: Fri, 6 May 2022 20:33:42 +0200 Subject: [PATCH] repman: waitdatabases api is not stable, try to bootstrap morre often waitdatabases can fail event if databases are ready, reduce amount of check databases and try to bootstrap. --- software/repman/buildout.hash.cfg | 2 +- .../repman/templates/repman-manager.sh.in | 27 ++++++++++++++++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/software/repman/buildout.hash.cfg b/software/repman/buildout.hash.cfg index 74b7dcd99..01a124107 100644 --- a/software/repman/buildout.hash.cfg +++ b/software/repman/buildout.hash.cfg @@ -58,7 +58,7 @@ md5sum = c203f40a58386310a433b58fd345a341 [repman-manager-sh.in] _update_hash_filename_ = templates/repman-manager.sh.in -md5sum = 852dfab6d798aa1382eec4de2fd624f9 +md5sum = 70ddec7450ae8be728ec107b805fa9a6 [dbjobs-in] _update_hash_filename_ = templates/dbjobs.in diff --git a/software/repman/templates/repman-manager.sh.in b/software/repman/templates/repman-manager.sh.in index 79c6fdcf6..21a185d20 100644 --- a/software/repman/templates/repman-manager.sh.in +++ b/software/repman/templates/repman-manager.sh.in @@ -1,6 +1,6 @@ #!{{ bash_bin }} -#set -e +set -e curl () { {{ curl_bin }} -k --silent -H "Accept: application/json" "$@" @@ -12,7 +12,7 @@ get_token () { wait_database () { NAME=$1 - for retry in {1..50}; do + for retry in {1..5}; do echo ">> Wait until $NAME databases are ready..."; CODE=$(curl -H "Authorization: Bearer ${TOKEN}" -o /dev/null -w "%{http_code}" {{ secure_url }}/api/clusters/$NAME/actions/waitdatabases); if [ $CODE -eq 504 ]; then @@ -29,8 +29,25 @@ wait_database () { fi echo ">> [$retry] waitdatabases returned code $CODE..."; fi - sleep 30 + sleep 15 + echo "Reloading cluster settings..." + curl -H "Authorization: Bearer ${TOKEN}" \ + {{ secure_url }}/api/clusters/$NAME/settings/actions/reload done + echo $CODE +} + +check_cluster () { + # Check if cluster is boostrapped + NAME=$1 + TOKEN=$(get_token | {{ jq_bin }} -r '.token') + ERRORS=$(curl -H "Authorization: Bearer ${TOKEN}" {{ secure_url }}/api/clusters/$NAME/topology/alerts | {{ jq_bin }} -r '.errors') + if [ "$ERRORS" != "null" ] && [ ! -z "$ERRORS" ]; then + echo "ERROR: Bootstrap replication of cluster $NAME failed!"; + echo $ERRORS; + return 1; + fi + return 0 } activate_proxy () { @@ -66,7 +83,6 @@ if [ ! -f "{{ parameter_dict['bootstrap'] }}/{{ name }}_bootstrapped" ]; then curl -H "Authorization: Bearer ${TOKEN}" \ {{ secure_url }}/api/clusters/{{ name }}/actions/replication/cleanup CODE=$(curl -H "Authorization: Bearer ${TOKEN}" -o /dev/null -w "%{http_code}" {{ secure_url }}/api/clusters/{{ name }}/actions/replication/bootstrap/master-slave) - SUCCESS=0 if [ $CODE -eq 200 ]; then activate_proxy {{ name }} if [ $? -eq 0 ]; then @@ -77,6 +93,9 @@ if [ ! -f "{{ parameter_dict['bootstrap'] }}/{{ name }}_bootstrapped" ]; then else echo "ERROR: Failed to bootstrap cluster {{ name }}... http_code $CODE" fi +else + # Check cluster health + check_cluster {{ name }} fi {% endfor %} -- 2.30.9