Commit 02973bbb authored by Nicolas Wavrant's avatar Nicolas Wavrant

pbs: remove infinite loops from pbs scripts.

Instead, of that, the notifier wrappers will run the scripts several more times in case of failures
This is better than silent failures.
parent c08a6567
...@@ -82,7 +82,7 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -82,7 +82,7 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
LC_ALL=C LC_ALL=C
export LC_ALL export LC_ALL
RDIFF_BACKUP=%(rdiffbackup_binary)s RDIFF_BACKUP=%(rdiffbackup_binary)s
until $RDIFF_BACKUP \\ $RDIFF_BACKUP \\
--remote-schema %(remote_schema)s \\ --remote-schema %(remote_schema)s \\
--restore-as-of now \\ --restore-as-of now \\
--force \\ --force \\
...@@ -90,7 +90,6 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -90,7 +90,6 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
%(remote_dir)s; do %(remote_dir)s; do
echo "repeating rdiff-backup..." echo "repeating rdiff-backup..."
sleep 10 sleep 10
done
""") """)
template_dict = { template_dict = {
...@@ -137,76 +136,75 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -137,76 +136,75 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
CANTFIND_FILE=$TMPDIR/$$.rdiff_cantfind CANTFIND_FILE=$TMPDIR/$$.rdiff_cantfind
SUCCEEDED=false SUCCEEDED=false
while ! $SUCCEEDED; do
# not using --fix-corrupted can lead to an infinite loop # not using --fix-corrupted can lead to an infinite loop
# in case of manual changes to the backup repository. # in case of manual changes to the backup repository.
CORRUPTED_ARGS="" CORRUPTED_ARGS=""
if [ "$1" = "--fix-corrupted" ]; then if [ "$1" = "--fix-corrupted" ]; then
VERIFY=$($RDIFF_BACKUP --verify $BACKUP_DIR 2>&1 >/dev/null) VERIFY=$($RDIFF_BACKUP --verify $BACKUP_DIR 2>&1 >/dev/null)
echo "$VERIFY" | egrep "$CORRUPTED_MSG" | sed "s/$CORRUPTED_MSG//g" > $CORRUPTED_FILE echo "$VERIFY" | egrep "$CORRUPTED_MSG" | sed "s/$CORRUPTED_MSG//g" > $CORRUPTED_FILE
# Sometimes --verify reports this spurious warning: # Sometimes --verify reports this spurious warning:
echo "$VERIFY" | egrep "$CANTFIND_MSG" | sed "s/$CANTFIND_MSG\(.*\),/--always-snapshot\ '\\1'/g" > $CANTFIND_FILE echo "$VERIFY" | egrep "$CANTFIND_MSG" | sed "s/$CANTFIND_MSG\(.*\),/--always-snapshot\ '\\1'/g" > $CANTFIND_FILE
# There can be too many files, better not to provide them through separate command line parameters # There can be too many files, better not to provide them through separate command line parameters
CORRUPTED_ARGS="--always-snapshot-fromfile $CORRUPTED_FILE --always-snapshot-fromfile $CANTFIND_FILE" CORRUPTED_ARGS="--always-snapshot-fromfile $CORRUPTED_FILE --always-snapshot-fromfile $CANTFIND_FILE"
if [ -s "$CORRUPTED_FILE" -o -s "$CANTFIND_FILE" ]; then if [ -s "$CORRUPTED_FILE" -o -s "$CANTFIND_FILE" ]; then
echo Retransmitting $(cat "$CORRUPTED_FILE" "$CANTFIND_FILE" | wc -l) corrupted/missing files echo Retransmitting $(cat "$CORRUPTED_FILE" "$CANTFIND_FILE" | wc -l) corrupted/missing files
else else
echo "No corrupted or missing files to retransmit" echo "No corrupted or missing files to retransmit"
fi
fi fi
fi
$RDIFF_BACKUP \\ $RDIFF_BACKUP \\
$CORRUPTED_ARGS \\ $CORRUPTED_ARGS \\
--remote-schema %(remote_schema)s \\ --remote-schema %(remote_schema)s \\
%(remote_dir)s \\ %(remote_dir)s \\
$BACKUP_DIR $BACKUP_DIR
RDIFF_BACKUP_STATUS=$? RDIFF_BACKUP_STATUS=$?
[ "$CORRUPTED_ARGS" ] && rm -f "$CORRUPTED_FILE" "$CANTFIND_FILE" [ "$CORRUPTED_ARGS" ] && rm -f "$CORRUPTED_FILE" "$CANTFIND_FILE"
if [ ! $RDIFF_BACKUP_STATUS -eq 0 ]; then if [ ! $RDIFF_BACKUP_STATUS -eq 0 ]; then
# Check the backup, go to the last consistent backup, so that next # Check the backup, go to the last consistent backup, so that next
# run will be okay. # run will be okay.
echo "Checking backup directory..." echo "Checking backup directory..."
$RDIFF_BACKUP --check-destination-dir $BACKUP_DIR $RDIFF_BACKUP --check-destination-dir $BACKUP_DIR
if [ ! $? -eq 0 ]; then if [ ! $? -eq 0 ]; then
# Here, two possiblities: # Here, two possiblities:
if [ is_first_backup ]; then if [ is_first_backup ]; then
continue continue
# The first backup failed, and check-destination as well. # The first backup failed, and check-destination as well.
# we may want to remove the backup. # we may want to remove the backup.
else else
continue continue
# The backup command has failed, while transferring an increment, and check-destination as well. # The backup command has failed, while transferring an increment, and check-destination as well.
# XXX We may need to publish the failure and ask the the equeue, re-run this script again, # XXX We may need to publish the failure and ask the the equeue, re-run this script again,
# instead do a push to the clone. # instead do a push to the clone.
fi
fi fi
else
# Everything's okay, cleaning up...
$RDIFF_BACKUP --remove-older-than %(remove_backup_older_than)s --force $BACKUP_DIR
fi fi
else
# Everything's okay, cleaning up...
$RDIFF_BACKUP --remove-older-than %(remove_backup_older_than)s --force $BACKUP_DIR
fi
SUCCEEDED=true SUCCEEDED=true
if [ -e %(backup_signature)s ]; then if [ -e %(backup_signature)s ]; then
cd $BACKUP_DIR cd $BACKUP_DIR
find -type f ! -name backup.signature ! -wholename "./rdiff-backup-data/*" -print0 | xargs -P4 -0 sha256sum | LC_ALL=C sort -k 66 > ../proof.signature find -type f ! -name backup.signature ! -wholename "./rdiff-backup-data/*" -print0 | xargs -P4 -0 sha256sum | LC_ALL=C sort -k 66 > ../proof.signature
cmp backup.signature ../proof.signature || SUCCEEDED=false cmp backup.signature ../proof.signature || SUCCEEDED=false
diff -ruw backup.signature ../proof.signature > ../backup.diff diff -ruw backup.signature ../proof.signature > ../backup.diff
# XXX If there is a difference on the backup, we should publish the # XXX If there is a difference on the backup, we should publish the
# failure and ask the equeue, re-run this script again, # failure and ask the equeue, re-run this script again,
# instead do a push it to the clone. # instead do a push it to the clone.
fi fi
$SUCCEEDED || find $BACKUP_DIR -name rdiff-backup.tmp.* -exec rm -rf {} \;
$SUCCEEDED || find $BACKUP_DIR -name rdiff-backup.tmp.* -exec rm -rf {} \;
done
""") """)
template_dict = { template_dict = {
...@@ -305,6 +303,7 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -305,6 +303,7 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
title=entry.get('title', slave_id), title=entry.get('title', slave_id),
notification_url=entry['notify'] or '', notification_url=entry['notify'] or '',
feed_url='%s/get/%s' % (self.options['notifier-url'], entry['notification-id']), feed_url='%s/get/%s' % (self.options['notifier-url'], entry['notification-id']),
max_run=self.options.get('pull-push-maximum-run', 1),
pidfile=os.path.join(self.options['run-directory'], '%s.pid' % slave_id), pidfile=os.path.join(self.options['run-directory'], '%s.pid' % slave_id),
instance_root_name=self.options.get('instance-root-name', None), instance_root_name=self.options.get('instance-root-name', None),
log_url=self.options.get('log-url', None), log_url=self.options.get('log-url', None),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment