Commit 7e9699d0 authored by Nicolas Wavrant's avatar Nicolas Wavrant

runner: improves feedback on importer script failure.

If slapgrid fails, print the end of instance/software log in resilient log.
Now an the exit-code-file is always written, even if runner-importer fails.
The promise on this file gives the URL where the resilient log can
be accessed, to simplify the debugging
parent 1df94205
...@@ -68,7 +68,7 @@ recipe = hexagonit.recipe.download ...@@ -68,7 +68,7 @@ recipe = hexagonit.recipe.download
ignore-existing = true ignore-existing = true
url = ${:_profile_base_location_}/template/runner-import.sh.jinja2 url = ${:_profile_base_location_}/template/runner-import.sh.jinja2
download-only = true download-only = true
md5sum = 3cebc5d793ff1b5c786392723babc510 md5sum = 52ae874aad06acd2a9cc0eb2b2bd29b1
filename = runner-import.sh.jinja2 filename = runner-import.sh.jinja2
mode = 0644 mode = 0644
...@@ -76,7 +76,7 @@ mode = 0644 ...@@ -76,7 +76,7 @@ mode = 0644
recipe = slapos.recipe.template recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance-runner-import.cfg.in url = ${:_profile_base_location_}/instance-runner-import.cfg.in
output = ${buildout:directory}/instance-runner-import.cfg output = ${buildout:directory}/instance-runner-import.cfg
md5sum = 91c34a55b7a45b14b0fac8b7faa202fe md5sum = 0950285e9a579a47e501d98d7e79e0fa
mode = 0644 mode = 0644
[template-runner-export-script] [template-runner-export-script]
......
...@@ -79,34 +79,38 @@ rendered = $${directory:bin}/$${slap-parameter:namebase}-importer ...@@ -79,34 +79,38 @@ rendered = $${directory:bin}/$${slap-parameter:namebase}-importer
# backward compatibility for resilient stack # backward compatibility for resilient stack
wrapper = $${:rendered} wrapper = $${:rendered}
mode = 700 mode = 700
restore-exit-code-file=$${directory:srv}/importer-exit-code-file restore-exit-code-file = $${directory:srv}/$${:restore-exit-code-file-basename}
restore-exit-code-file-basename = importer-exit-code-file
resilient-log-basename = resilient.log
context = context =
key backend_url slaprunner:access-url key backend_url slaprunner:access-url
key ipv4 slaprunner:ipv4 key ipv4 slaprunner:ipv4
key ipv6 slaprunner:ipv6 key ipv6 slaprunner:ipv6
key proxy_port slaprunner:proxy_port key proxy_port slaprunner:proxy_port
section directory directory section directory directory
raw output_log_file $${directory:log}/resilient.log section supervisord supervisord
raw output_log_file $${directory:log}/$${:resilient-log-basename}
raw shell_binary ${bash:location}/bin/bash raw shell_binary ${bash:location}/bin/bash
raw rsync_binary ${rsync:location}/bin/rsync raw rsync_binary ${rsync:location}/bin/rsync
raw restore_exit_code_file $${:restore-exit-code-file} raw restore_exit_code_file $${:restore-exit-code-file}
[importer-consistency-promise] [importer-consistency-promise]
# Test that the importer script and "after-import" subscripts # Test that the importer script and "after-import" subscripts
# are not older than 1 day (24h), and have succeeded # are not older than 2 days (1 day + some slack), and have succeeded
recipe = collective.recipe.template recipe = collective.recipe.template
input = inline: #!/bin/sh input = inline: #!/bin/sh
EXIT_CODE_FILE=$(find "$${importer:restore-exit-code-file}") EXIT_CODE_FILE="$${importer:restore-exit-code-file}"
RECENT_EXIT_CODE_FILE=$(find "$${importer:restore-exit-code-file}" -mtime -1) RECENT_EXIT_CODE_FILE=$(find $${directory:srv} -maxdepth 1 -name "$${importer:restore-exit-code-file-basename}" -mtime -2)
if [ -z "$EXIT_CODE_FILE" ]; then RESILIENT_LOG_URL=$${publish:monitor-base-url}/log/$${importer:resilient-log-basename}
if [ ! -f "$EXIT_CODE_FILE" ]; then
exit 0; exit 0;
else else
if [ -z "$RECENT_EXIT_CODE_FILE" ]; then if [ -z "$RECENT_EXIT_CODE_FILE" ]; then
echo "Consistency check is too old."; echo "Consistency check is too old.";
exit 1; exit 1;
else else
EXIT_CODE=$(cat $EXIT_CODE_FILE) echo "Error during import. Please check here : $RESILIENT_LOG_URL";
exit $EXIT_CODE exit $(cat $EXIT_CODE_FILE);
fi fi
fi fi
exit 1; # Something else went wrong exit 1; # Something else went wrong
......
...@@ -8,7 +8,13 @@ umask 077 ...@@ -8,7 +8,13 @@ umask 077
exec > >(tee -ai {{ output_log_file }}) exec > >(tee -ai {{ output_log_file }})
exec 2>&1 exec 2>&1
echo -e "\n\nrunner-import run at : $(date)" RESTORE_EXIT_CODE_FILE="{{ restore_exit_code_file }}"
fail_with_exit_code () {
echo 1 > $RESTORE_EXIT_CODE_FILE
}
trap fail_with_exit_code ERR
srv_directory={{ directory['srv'] }} srv_directory={{ directory['srv'] }}
restore_element () { restore_element () {
...@@ -27,6 +33,8 @@ restore_element () { ...@@ -27,6 +33,8 @@ restore_element () {
done done
} }
echo -e "\n\nrunner-import run at : $(date)"
restore_element {{ directory['backup'] }}/runner/ $srv_directory/runner instance project proxy.db restore_element {{ directory['backup'] }}/runner/ $srv_directory/runner instance project proxy.db
restore_element {{ directory['backup'] }}/etc/ {{ directory['etc'] }} config.json restore_element {{ directory['backup'] }}/etc/ {{ directory['etc'] }} config.json
cp -r {{ directory['backup'] }}/etc/.??* {{ directory['etc'] }}; cp -r {{ directory['backup'] }}/etc/.??* {{ directory['etc'] }};
...@@ -70,16 +78,18 @@ $SQLITE3 $DATABASE "update partition_network11 set address='$IPV6' where netmask ...@@ -70,16 +78,18 @@ $SQLITE3 $DATABASE "update partition_network11 set address='$IPV6' where netmask
MASTERURL="http://{{ ipv4 }}:{{ proxy_port }}" MASTERURL="http://{{ ipv4 }}:{{ proxy_port }}"
echo "Building newest software..." echo "Building newest software..."
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 || $SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 || $SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 $SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
(tail -n 200 {{ supervisord['slapgrid-sr-log'] }} && false)
# Remove defined scripts to force buildout to recreate them to have updated paths # Remove defined scripts to force buildout to recreate them to have updated paths
rm $srv_directory/runner/instance/slappart*/srv/runner-import-restore || true rm $srv_directory/runner/instance/slappart*/srv/runner-import-restore || true
echo "Running slapos node instance..." echo "Running slapos node instance..."
# XXX hardcoded # XXX hardcoded
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 || $SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 || $SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 $SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
(tail -n 200 {{ supervisord['slapgrid-cp-log'] }} && false)
# Invoke defined scripts for each partition inside of slaprunner # Invoke defined scripts for each partition inside of slaprunner
echo "Invoke custom import scripts defined by each instances..." echo "Invoke custom import scripts defined by each instances..."
...@@ -98,6 +108,5 @@ $SQLITE3 $DATABASE "update partition11 set requested_state='started';" ...@@ -98,6 +108,5 @@ $SQLITE3 $DATABASE "update partition11 set requested_state='started';"
# Write exit code to an arbitrary file that will be checked by promise/monitor # Write exit code to an arbitrary file that will be checked by promise/monitor
echo "Write status file... End" echo "Write status file... End"
RESTORE_EXIT_CODE_FILE="{{ restore_exit_code_file }}"
echo $RESTORE_EXIT_CODE > $RESTORE_EXIT_CODE_FILE echo $RESTORE_EXIT_CODE > $RESTORE_EXIT_CODE_FILE
exit $RESTORE_EXIT_CODE exit $RESTORE_EXIT_CODE
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment