Resilient pbs recipe: recover from rdiff-backup failures.

parent be7f2fc2
...@@ -30,6 +30,7 @@ import os ...@@ -30,6 +30,7 @@ import os
import signal import signal
import subprocess import subprocess
import sys import sys
import textwrap
import urlparse import urlparse
from slapos.recipe.librecipe import GenericSlapRecipe from slapos.recipe.librecipe import GenericSlapRecipe
...@@ -88,11 +89,14 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -88,11 +89,14 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
raise ValueError('Missing URL parameter for PBS recipe') raise ValueError('Missing URL parameter for PBS recipe')
parsed_url = urlparse.urlparse(url) parsed_url = urlparse.urlparse(url)
slave_id = entry['notification-id']
slave_type = entry['type'] slave_type = entry['type']
if not slave_type in ['pull', 'push']: if not slave_type in ['pull', 'push']:
raise ValueError('type parameter must be either pull or push.') raise ValueError('type parameter must be either pull or push.')
slave_id = entry['notification-id']
print 'Processing PBS slave %s with type %s' % (slave_id, slave_type)
promise_path = os.path.join(self.options['promises-directory'], slave_id) promise_path = os.path.join(self.options['promises-directory'], slave_id)
promise_dict = self.promise_base_dict.copy() promise_dict = self.promise_base_dict.copy()
promise_dict.update(user=parsed_url.username, promise_dict.update(user=parsed_url.username,
...@@ -106,42 +110,84 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -106,42 +110,84 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
host = parsed_url.hostname host = parsed_url.hostname
known_hosts_file[host] = entry['server-key'] known_hosts_file[host] = entry['server-key']
notifier_path = os.path.join(self.options['wrappers-directory'], slave_id) notifier_wrapper_path = os.path.join(self.options['wrappers-directory'], slave_id)
rdiff_path = notifier_path + '_raw' rdiff_wrapper_path = notifier_wrapper_path + '_raw'
# Create the rdiff-backup wrapper # Create the rdiff-backup wrapper
# It is useful to separate it from the notifier so that we can run it # It is useful to separate it from the notifier so that we can run it
# Manually. # Manually.
rdiff_parameter_list = [] rdiffbackup_parameter_list = []
# XXX use -y because the host might not yet be in the # XXX use -y because the host might not yet be in the
# trusted hosts file until the next time slapgrid is run. # trusted hosts file until the next time slapgrid is run.
rdiff_parameter_list.extend([ rdiffbackup_remote_schema = '%(ssh)s -y -p %%s %(user)s@%(host)s' % {
'--remote-schema', '%(ssh)s -y -p %%s %(user)s@%(host)s' % {
'ssh': self.options['sshclient-binary'], 'ssh': self.options['sshclient-binary'],
'user': parsed_url.username, 'user': parsed_url.username,
'host': parsed_url.hostname, 'host': parsed_url.hostname,
}]) }
if slave_type == 'push':
rdiff_parameter_list.extend(['--restore-as-of', 'now', '--force'])
comments = ['','Push data to a PBS *-import instance.', '']
elif slave_type == 'pull':
comments = ['','Pull data from a PBS *-export instance.', '']
remote_directory = '%(port)s::%(path)s' % {'port': parsed_url.port, remote_directory = '%(port)s::%(path)s' % {'port': parsed_url.port,
'path': parsed_url.path} 'path': parsed_url.path}
local_directory = self.createDirectory(self.options['directory'], entry['name']) local_directory = self.createDirectory(self.options['directory'], entry['name'])
rdiff_parameter_list.extend([local_directory, remote_directory])
rdiff_wrapper = self.createWrapper( if slave_type == 'push':
name=rdiff_path, # Create a simple rdiff-backup wrapper that will push
command=self.options['rdiffbackup-binary'], rdiffbackup_parameter_list.extend(['--remote-schema', rdiffbackup_remote_schema])
parameters=rdiff_parameter_list, rdiffbackup_parameter_list.extend(['--restore-as-of', 'now'])
comments=comments, rdiffbackup_parameter_list.append('--force')
) rdiffbackup_parameter_list.append(local_directory)
rdiffbackup_parameter_list.append(remote_directory)
comments = ['', 'Push data to a PBS *-import instance.', '']
rdiff_wrapper = self.createWrapper(
name=rdiff_wrapper_path,
command=self.options['rdiffbackup-binary'],
parameters=rdiffbackup_parameter_list,
comments=comments,
)
elif slave_type == 'pull':
# Wrap rdiff-backup call into a script that checks consistency of backup
# We need to manually escape the remote schema
rdiffbackup_parameter_list.extend(['--remote-schema', '"%s"' % rdiffbackup_remote_schema])
rdiffbackup_parameter_list.append(remote_directory)
rdiffbackup_parameter_list.append(local_directory)
comments = ['', 'Pull data from a PBS *-export instance.', '']
rdiff_wrapper_template = textwrap.dedent("""\
#!/bin/sh
# %(comment)s
RDIFF_BACKUP="%(rdiffbackup_binary)s"
$RDIFF_BACKUP %(rdiffbackup_parameter)s
if [ ! $? -eq 0 ]; then
# Check the backup, go to the last consistent backup, so that next
# run will be okay.
echo "Checking backup directory..."
$RDIFF_BACKUP --check-destination-dir %(local_directory)s
if [ ! $? -eq 0 ]; then
# Here, two possiblities:
# * The first backup failed. It is safe to remove it since there is nothing valuable there.
# * The backup has been complete, but is now in a really weird state. Not safe to remove it.
echo "Impossible to check backup: we move it to a safe place."
# XXX: bang
mv %(local_directory)s %(local_directory)s.$(date +%%s)
fi
fi
""")
rdiff_wrapper_content = rdiff_wrapper_template % {
'comment': comments,
'rdiffbackup_binary': self.options['rdiffbackup-binary'],
'local_directory': local_directory,
'rdiffbackup_parameter': ' \\\n '.join(rdiffbackup_parameter_list),
}
rdiff_wrapper = self.createFile(
name=rdiff_wrapper_path,
content=rdiff_wrapper_content,
mode=0700
)
path_list.append(rdiff_wrapper) path_list.append(rdiff_wrapper)
# Create notifier wrapper # Create notifier wrapper
notifier_wrapper = self.createNotifier( notifier_wrapper = self.createNotifier(
notifier_binary=self.options['notifier-binary'], notifier_binary=self.options['notifier-binary'],
wrapper=notifier_path, wrapper=notifier_wrapper_path,
executable=rdiff_wrapper, executable=rdiff_wrapper,
log=os.path.join(self.options['feeds'], entry['notification-id']), log=os.path.join(self.options['feeds'], entry['notification-id']),
title=entry.get('title', slave_id), title=entry.get('title', slave_id),
...@@ -195,4 +241,3 @@ class Recipe(GenericSlapRecipe, Notify, Callback): ...@@ -195,4 +241,3 @@ class Recipe(GenericSlapRecipe, Notify, Callback):
path_list.append(wrapper) path_list.append(wrapper)
return path_list return path_list
* Report, from pbs and from clone, when a backup failed * Report, from pbs and from clone, when a backup failed
* When a rdiff-backup command failed, automatically recover from it (by removing rdiff-backup-data?). How not to loose all history?
* PBSs and mirrors should monitor/replace themselves * PBSs and mirrors should monitor/replace themselves
* Report errors from backup * Report errors from backup
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment