From 8c67f885710d9b09a4674d98014866a1e6304c5e Mon Sep 17 00:00:00 2001
From: Alain Takoudjou <alain.takoudjou@nexedi.com>
Date: Fri, 6 May 2022 20:33:42 +0200
Subject: [PATCH] repman: waitdatabases api is not stable, try to bootstrap
 morre often

waitdatabases can fail event if databases are ready, reduce amount of check databases
and try to bootstrap.
---
 software/repman/buildout.hash.cfg             |  2 +-
 .../repman/templates/repman-manager.sh.in     | 27 ++++++++++++++++---
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/software/repman/buildout.hash.cfg b/software/repman/buildout.hash.cfg
index 74b7dcd99..01a124107 100644
--- a/software/repman/buildout.hash.cfg
+++ b/software/repman/buildout.hash.cfg
@@ -58,7 +58,7 @@ md5sum = c203f40a58386310a433b58fd345a341
 
 [repman-manager-sh.in]
 _update_hash_filename_ = templates/repman-manager.sh.in
-md5sum = 852dfab6d798aa1382eec4de2fd624f9
+md5sum = 70ddec7450ae8be728ec107b805fa9a6
 
 [dbjobs-in]
 _update_hash_filename_ = templates/dbjobs.in
diff --git a/software/repman/templates/repman-manager.sh.in b/software/repman/templates/repman-manager.sh.in
index 79c6fdcf6..21a185d20 100644
--- a/software/repman/templates/repman-manager.sh.in
+++ b/software/repman/templates/repman-manager.sh.in
@@ -1,6 +1,6 @@
 #!{{ bash_bin }}
 
-#set -e
+set -e
 
 curl () {
   {{ curl_bin }} -k --silent -H "Accept: application/json" "$@"
@@ -12,7 +12,7 @@ get_token () {
 
 wait_database () {
   NAME=$1
-  for retry in {1..50}; do
+  for retry in {1..5}; do
     echo ">> Wait until $NAME databases are ready...";
     CODE=$(curl -H "Authorization: Bearer ${TOKEN}" -o /dev/null -w "%{http_code}" {{ secure_url }}/api/clusters/$NAME/actions/waitdatabases);
     if [ $CODE -eq 504 ]; then
@@ -29,8 +29,25 @@ wait_database () {
       fi
       echo ">> [$retry] waitdatabases returned code $CODE...";
     fi
-    sleep 30
+    sleep 15
+    echo "Reloading cluster settings..."
+    curl -H "Authorization: Bearer ${TOKEN}" \
+      {{ secure_url }}/api/clusters/$NAME/settings/actions/reload
   done
+  echo $CODE
+}
+
+check_cluster () {
+  # Check if cluster is boostrapped
+  NAME=$1
+  TOKEN=$(get_token | {{ jq_bin }} -r '.token')
+  ERRORS=$(curl -H "Authorization: Bearer ${TOKEN}" {{ secure_url }}/api/clusters/$NAME/topology/alerts | {{ jq_bin }} -r '.errors')
+  if [ "$ERRORS" != "null" ] && [ ! -z "$ERRORS" ]; then
+    echo "ERROR: Bootstrap replication of cluster $NAME failed!";
+    echo $ERRORS;
+    return 1;
+  fi
+  return 0
 }
 
 activate_proxy () {
@@ -66,7 +83,6 @@ if [ ! -f "{{ parameter_dict['bootstrap'] }}/{{ name }}_bootstrapped" ]; then
   curl -H "Authorization: Bearer ${TOKEN}" \
     {{ secure_url }}/api/clusters/{{ name }}/actions/replication/cleanup
   CODE=$(curl -H "Authorization: Bearer ${TOKEN}" -o /dev/null -w "%{http_code}" {{ secure_url }}/api/clusters/{{ name }}/actions/replication/bootstrap/master-slave)
-  SUCCESS=0
   if [ $CODE -eq 200 ]; then
     activate_proxy {{ name }}
     if [ $? -eq 0 ]; then
@@ -77,6 +93,9 @@ if [ ! -f "{{ parameter_dict['bootstrap'] }}/{{ name }}_bootstrapped" ]; then
   else
     echo "ERROR: Failed to bootstrap cluster {{ name }}... http_code $CODE"
   fi
+else
+  # Check cluster health
+  check_cluster {{ name }}
 fi
 
 {% endfor %}
-- 
2.30.9