Commit 2d89a14b authored by Nicolas Wavrant's avatar Nicolas Wavrant

runner: improves feedback on importer script failure.

If slapgrid fails, print the end of instance/software log in resilient log.
Now an the exit-code-file is always written, even if runner-importer fails.
The promise on this file gives the URL where the resilient log can
be accessed, to simplify the debugging
parent 58a19aea
......@@ -68,7 +68,7 @@ recipe = hexagonit.recipe.download
ignore-existing = true
url = ${:_profile_base_location_}/template/runner-import.sh.jinja2
download-only = true
md5sum = 3cebc5d793ff1b5c786392723babc510
md5sum = 52ae874aad06acd2a9cc0eb2b2bd29b1
filename = runner-import.sh.jinja2
mode = 0644
......@@ -76,7 +76,7 @@ mode = 0644
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance-runner-import.cfg.in
output = ${buildout:directory}/instance-runner-import.cfg
md5sum = 91c34a55b7a45b14b0fac8b7faa202fe
md5sum = 0950285e9a579a47e501d98d7e79e0fa
mode = 0644
[template-runner-export-script]
......
......@@ -79,34 +79,38 @@ rendered = $${directory:bin}/$${slap-parameter:namebase}-importer
# backward compatibility for resilient stack
wrapper = $${:rendered}
mode = 700
restore-exit-code-file=$${directory:srv}/importer-exit-code-file
restore-exit-code-file = $${directory:srv}/$${:restore-exit-code-file-basename}
restore-exit-code-file-basename = importer-exit-code-file
resilient-log-basename = resilient.log
context =
key backend_url slaprunner:access-url
key ipv4 slaprunner:ipv4
key ipv6 slaprunner:ipv6
key proxy_port slaprunner:proxy_port
section directory directory
raw output_log_file $${directory:log}/resilient.log
section supervisord supervisord
raw output_log_file $${directory:log}/$${:resilient-log-basename}
raw shell_binary ${bash:location}/bin/bash
raw rsync_binary ${rsync:location}/bin/rsync
raw restore_exit_code_file $${:restore-exit-code-file}
[importer-consistency-promise]
# Test that the importer script and "after-import" subscripts
# are not older than 1 day (24h), and have succeeded
# are not older than 2 days (1 day + some slack), and have succeeded
recipe = collective.recipe.template
input = inline: #!/bin/sh
EXIT_CODE_FILE=$(find "$${importer:restore-exit-code-file}")
RECENT_EXIT_CODE_FILE=$(find "$${importer:restore-exit-code-file}" -mtime -1)
if [ -z "$EXIT_CODE_FILE" ]; then
EXIT_CODE_FILE="$${importer:restore-exit-code-file}"
RECENT_EXIT_CODE_FILE=$(find $${directory:srv} -maxdepth 1 -name "$${importer:restore-exit-code-file-basename}" -mtime -2)
RESILIENT_LOG_URL=$${publish:monitor-base-url}/log/$${importer:resilient-log-basename}
if [ ! -f "$EXIT_CODE_FILE" ]; then
exit 0;
else
if [ -z "$RECENT_EXIT_CODE_FILE" ]; then
echo "Consistency check is too old.";
exit 1;
else
EXIT_CODE=$(cat $EXIT_CODE_FILE)
exit $EXIT_CODE
echo "Error during import. Please check here : $RESILIENT_LOG_URL";
exit $(cat $EXIT_CODE_FILE);
fi
fi
exit 1; # Something else went wrong
......
......@@ -8,7 +8,13 @@ umask 077
exec > >(tee -ai {{ output_log_file }})
exec 2>&1
echo -e "\n\nrunner-import run at : $(date)"
RESTORE_EXIT_CODE_FILE="{{ restore_exit_code_file }}"
fail_with_exit_code () {
echo 1 > $RESTORE_EXIT_CODE_FILE
}
trap fail_with_exit_code ERR
srv_directory={{ directory['srv'] }}
restore_element () {
......@@ -27,6 +33,8 @@ restore_element () {
done
}
echo -e "\n\nrunner-import run at : $(date)"
restore_element {{ directory['backup'] }}/runner/ $srv_directory/runner instance project proxy.db
restore_element {{ directory['backup'] }}/etc/ {{ directory['etc'] }} config.json
cp -r {{ directory['backup'] }}/etc/.??* {{ directory['etc'] }};
......@@ -70,16 +78,18 @@ $SQLITE3 $DATABASE "update partition_network11 set address='$IPV6' where netmask
MASTERURL="http://{{ ipv4 }}:{{ proxy_port }}"
echo "Building newest software..."
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 ||
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 ||
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1
$SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
(tail -n 200 {{ supervisord['slapgrid-sr-log'] }} && false)
# Remove defined scripts to force buildout to recreate them to have updated paths
rm $srv_directory/runner/instance/slappart*/srv/runner-import-restore || true
echo "Running slapos node instance..."
# XXX hardcoded
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 ||
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 ||
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1
$SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
(tail -n 200 {{ supervisord['slapgrid-cp-log'] }} && false)
# Invoke defined scripts for each partition inside of slaprunner
echo "Invoke custom import scripts defined by each instances..."
......@@ -98,6 +108,5 @@ $SQLITE3 $DATABASE "update partition11 set requested_state='started';"
# Write exit code to an arbitrary file that will be checked by promise/monitor
echo "Write status file... End"
RESTORE_EXIT_CODE_FILE="{{ restore_exit_code_file }}"
echo $RESTORE_EXIT_CODE > $RESTORE_EXIT_CODE_FILE
exit $RESTORE_EXIT_CODE
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment