Commit 4fadb21e authored by Nicolas Wavrant's avatar Nicolas Wavrant

WIP: adds resiliency on RootInstance

parent 829b154e
......@@ -73,6 +73,7 @@ setup(name=name,
entry_points={
'zc.buildout': [
'addresiliency = slapos.recipe.addresiliency:Recipe',
'addresiliency-root = slapos.recipe.addresiliency:RootTakeover',
'accords = slapos.recipe.accords:Recipe',
'apache.zope.backend = slapos.recipe.apache_zope_backend:Recipe',
'apacheperl = slapos.recipe.apacheperl:Recipe',
......
......@@ -37,27 +37,45 @@ class Recipe(GenericSlapRecipe):
bin/bully will monitor, run elections and perform renames when needed.
"""
def _install(self):
path_list = []
def _createTakeoverWrapper(self, script_extra_kw=None, method_extra_kw=None):
slap_connection = self.buildout['slap-connection']
method_parameter_dict = {
'server_url': slap_connection['server-url'],
'key_file': slap_connection.get('key-file'),
'cert_file': slap_connection.get('cert-file'),
'computer_id': slap_connection['computer-id'],
'partition_id': slap_connection['partition-id'],
'software': slap_connection['software-release-url'],
'namebase': self.parameter_dict['namebase'],
'takeover_triggered_file_path': self.options['takeover-triggered-file-path'],
}
if isinstance(method_extra_kw, dict):
method_parameter_dict.update(method_extra_kw)
slap_connection = self.buildout['slap-connection']
script_argument_dict = {
'name': self.options['wrapper-takeover'],
'absolute_function': 'slapos.recipe.addresiliency.takeover.run',
'arguments': method_parameter_dict,
}
if isinstance(script_extra_kw, dict):
script_argument_dict.update(script_extra_kw)
takeover_wrapper = self.createPythonScript(
name=self.options['wrapper-takeover'],
absolute_function='slapos.recipe.addresiliency.takeover.run',
arguments={
'server_url': slap_connection['server-url'],
'key_file': slap_connection.get('key-file'),
'cert_file': slap_connection.get('cert-file'),
'computer_id': slap_connection['computer-id'],
'partition_id': slap_connection['partition-id'],
'software': slap_connection['software-release-url'],
'namebase': self.parameter_dict['namebase'],
'takeover_triggered_file_path': self.options['takeover-triggered-file-path'],
})
return self.createPythonScript(**script_argument_dict)
path_list.append(takeover_wrapper)
def _install(self):
takeover_wrapper = self._createTakeoverWrapper()
return [takeover_wrapper,]
return path_list
class RootTakeover(Recipe):
""" This class provides the installation of the resilience
scripts on the partition, in the case of a root instance.
bin/takeover will perform a rename (must be run manually).
bin/bully will monitor, run elections and perform renames when needed.
"""
def _install(self):
script_extra_kw = {'absolute_function': 'slapos.recipe.addresiliency.takeover_root.run'}
takeover_wrapper = self._createTakeoverWrapper(script_extra_kw=script_extra_kw)
return [takeover_wrapper,]
# -*- coding: utf-8 -*-
import logging
import time
import traceback
import slapos
from slapos.slap.slap import NotFoundError
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
def takeover(server_url, key_file, cert_file, computer_guid,
partition_id, software_release, namebase,
winner_instance_suffix = None,
takeover_triggered_file_path=None):
"""
This function does
- retrieve the broken computer partition
- change its reference to 'broken-...' and its software type to 'frozen'
- retrieve the winner computer partition (attached to this process)
- change its reference to replace the broken one.
later, slapgrid will change its software_type as well.
Then, after running slapgrid-cp a few times, the winner takes over and
a new cp is created to replace it as an importer.
"""
slap = slapos.slap.slap()
slap.initializeConnection(server_url, key_file, cert_file)
current_partition = slap.registerComputerPartition(computer_guid=computer_guid,
partition_id=partition_id)
import pdb;pdb.set_trace()
# partition that will take over.
if winner_instance_suffix:
winner_instance_name = namebase + winner_instance_suffix
# XXX: we hardcode a lot of values here, because request is a settergetter, all at once.
cp_winner = current_partition.request(software_release=software_release,
software_type='resilient',
partition_reference=winner_instance_name)
else:
# This script is run in the winning partition: use this one as winner
cp_winner = current_partition
# XXX although we can already rename cp_winner, to change its software type we need to
# get hold of the root cp as well
cp_exporter_ref = namebase[:namebase.index('-clone')]
# partition to be deactivated
cp_broken = cp_winner.request(software_release=software_release,
software_type='frozen',
state='stopped',
partition_reference=cp_exporter_ref)
instance_parameter_dict = cp_broken.getInstanceParameterDict()
broken_new_ref = 'broken-{}'.format(time.strftime("%d-%b_%H:%M:%S", time.gmtime()))
log.debug("Renaming {}: {}".format(cp_broken.getId(), broken_new_ref))
cp_broken.rename(new_name=broken_new_ref)
log.debug("Renaming {}: {}".format(cp_winner.getId(), cp_exporter_ref))
# update name (and later, software type) for the partition that will take over
while True:
time.sleep(10)
try:
cp_winner.rename(new_name=cp_exporter_ref)
break
except NotFoundError:
traceback.print_exc()
log.warning('Impossible to rename. Retrying in a few seconds...')
log.debug('Renamed.')
cp_winner.bang(message='partitions have been renamed!')
# Note: Root instance will reconfigure itself the winning instance (software_type
# and parameters.)
# Create "lock" file preventing equeue to run import scripts
# XXX hardcoded
open(takeover_triggered_file_path, 'w').write('')
def run(args):
slapos.recipe.addresiliency.takeover_root.takeover(
server_url = args.pop('server_url'),
key_file = args.pop('key_file'),
cert_file = args.pop('cert_file'),
computer_guid = args.pop('computer_id'),
partition_id = args.pop('partition_id'),
software_release = args.pop('software'),
namebase = args.pop('namebase'),
takeover_triggered_file_path = args.pop('takeover_triggered_file_path'))
......@@ -53,14 +53,14 @@ parts =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg
output = ${buildout:directory}/template.cfg
md5sum = bb7e0bf9959c4437ff1e23e645315ccf
md5sum = 9c83c74fb3ef25b9d15bebd4d1854f38
mode = 0644
[template-runner]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance-runner.cfg
output = ${buildout:directory}/template-runner.cfg.in
md5sum = 0b3561ee4ef8d687fa95f2915fe9923b
md5sum = ac3877176b64686c5946f4848861bb2c
mode = 0644
[template-runner-import-script]
......
......@@ -16,6 +16,7 @@ runner-export = ${instance-runner-export:output}
frozen = ${instance-frozen:output}
pull-backup = ${template-pull-backup:output}
root-instance-clone = ${root-instance-clone:output}
[instance-base-runner]
recipe = slapos.recipe.template:jinja2
......
......@@ -4,6 +4,12 @@
# ones from Pypi, to ensure stability;
[buildout]
develop =
/srv/slapgrid/slappart16/srv//runner/project/slapos-dev
/srv/slapgrid/slappart16/srv//runner/project/slapos.toolbox
/srv/slapgrid/slappart16/srv//runner/project/slapos.core
extends = common.cfg
[versions]
......@@ -16,7 +22,7 @@ ecdsa = 0.13
erp5.util = 0.4.45
futures = 3.0.5
gitdb = 0.6.4
gunicorn = 19.5.0
gunicorn = 19.4.5
prettytable = 0.7.2
pycrypto = 2.6.1
slapos.recipe.template = 2.9
......
......@@ -93,7 +93,7 @@ recipe = slapos.recipe.template:jinja2
filename = template-monitor.cfg
template = ${:_profile_base_location_}/instance-monitor.cfg.jinja2.in
rendered = ${buildout:directory}/template-monitor.cfg
md5sum = 84998b1ca3c29445dca70b495515c35b
md5sum = 7ae1625581b00b55ee8aac7132d647b5
context =
key apache_location apache:location
key gzip_location gzip:location
......
......@@ -331,8 +331,8 @@ monitor-title = ${slap-configuration:instance-title}
monitor-httpd-ipv6 = ${slap-configuration:ipv6-random}
monitor-httpd-port = 8196
# XXX - Set monitor-base-url = ${monitor-httpd-conf-parameter:url} => https://[ipv6]:port
monitor-base-url = ${monitor-frontend-promise:url}
#monitor-base-url = ${monitor-httpd-conf-parameter:url}
#monitor-base-url = ${monitor-frontend-promise:url}
monitor-base-url = ${monitor-httpd-conf-parameter:url}
root-instance-title = ${slap-configuration:root-instance-title}
monitor-url-list =
cors-domains = monitor.app.officejs.com
......
......@@ -17,6 +17,7 @@ parts =
template-replicated
template-parts
instance-frozen
root-instance-clone
# needed tools for resiliency
gzip
......@@ -72,14 +73,14 @@ mode = 0644
[template-replicated]
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/template-replicated.cfg.in
md5sum = 7a6234465ae845cb262d4f94c158764e
md5sum = 434cfe5b55549fe12532b393d5edd632
mode = 0644
destination = ${buildout:directory}/template-replicated.cfg.in
[template-parts]
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/template-parts.cfg.in
md5sum = 071b1034ee8f5cc14f79b16fdeba2813
md5sum = 41e571360ca9c4e3300ec2b6356a521e
mode = 0644
destination = ${buildout:directory}/template-parts.cfg.in
......@@ -99,6 +100,12 @@ md5sum = c46c8e3e4ce4376c98ad2fc0e2ff0fe4
mode = 0644
destination = ${buildout:directory}/resilient-web-takeover-cgi-script.py.in
[root-instance-clone]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/root-instance-clone.cfg.in
md5sum = c5b75c0ddfe13b31c121a484cdd454ce
output = ${buildout:directory}/root-instance-clone.cfg
# Provide an empty wrapper
[template-wrapper]
recipe = slapos.recipe.template
......
[buildout]
parts =
publish-connection-information
slap-configuration
resiliency-takeover-script
resilient-web-takeover-cgi-script
resilient-web-takeover-httpd-wrapper
resilient-web-takeover-httpd-promise
eggs-directory = ${buildout:eggs-directory}
develop-eggs-directory = ${buildout:develop-eggs-directory}
offline = true
[directory]
recipe = slapos.cookbook:mkdirectory
bin = $${buildout:directory}/bin
etc = $${buildout:directory}/etc
srv = $${buildout:directory}/srv
var = $${buildout:directory}/var
services = $${:etc}/service
promises = $${:etc}/promise
log = $${:var}/log
run = $${:var}/run
cgi-bin = $${:srv}/cgi-bin
[publish-connection-information]
recipe = slapos.cookbook:publish
takeover-url = http://[$${resilient-web-takeover-httpd-configuration-file:listening-ip}]:$${resilient-web-takeover-httpd-configuration-file:listening-port}
takeover-password = $${resilient-web-takeover-password:passwd}
###########
# Generate the takeover script
###########
[resiliency-takeover-script]
recipe = slapos.cookbook:addresiliency-root
wrapper-takeover = $${directory:bin}/takeover
takeover-triggered-file-path = $${directory:srv}/takeover_triggered
# Add path of file created by takeover script when takeover is triggered
# Takeover script will create this file
# equeue process will watch for file existence.
[equeue]
recipe = slapos.cookbook:equeue
socket = $${directory:run}/equeue.sock
lockfile = $${directory:run}/equeue.lock
log = $${directory:log}/equeue.log
database = $${directory:srv}/equeue.db
wrapper = $${directory:services}/equeue
equeue-binary = ${buildout:bin-directory}/equeue
takeover-triggered-file-path = $${resiliency-takeover-script:takeover-triggered-file-path}
###########
# Deploy a webserver allowing to do takeover from a web browser.
###########
[resilient-web-takeover-password]
recipe = slapos.cookbook:generate.password
storage-path = $${directory:srv}/passwd
bytes = 8
[resilient-web-takeover-cgi-script]
recipe = collective.recipe.template
input = ${resilient-web-takeover-cgi-script-download:destination}
output = $${directory:cgi-bin}/web-takeover.cgi
password = $${resilient-web-takeover-password:passwd}
mode = 700
proof-signature-url =
[resilient-web-takeover-httpd-port]
recipe = slapos.cookbook:free_port
ip = $${resilient-web-takeover-httpd-configuration-file:listening-ip}
minimum = 9281
maximum = 9292
# XXX could it be something lighter?
# XXX Add SSL
[resilient-web-takeover-httpd-configuration-file]
recipe = collective.recipe.template
input = inline:
PidFile "$${:pid-file}"
Listen [$${:listening-ip}]:$${:listening-port}
ServerAdmin someone@email
DocumentRoot "$${:document-root}"
ErrorLog "$${:error-log}"
LoadModule unixd_module modules/mod_unixd.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule mime_module modules/mod_mime.so
LoadModule cgid_module modules/mod_cgid.so
LoadModule dir_module modules/mod_dir.so
ScriptSock $${:cgid-pid-file}
<Directory $${:document-root}>
# XXX: security????
Options +ExecCGI
AddHandler cgi-script .cgi
DirectoryIndex web-takeover.cgi
</Directory>
output = $${directory:etc}/resilient-web-takeover-httpd.conf
# md5sum =
listening-ip = $${slap-network-information:global-ipv6}
# XXX: randomize-me
listening-port = $${resilient-web-takeover-httpd-port:port}
htdocs = $${directory:cgi-bin}
pid-file = $${directory:run}/resilient-web-takeover-httpd.pid
cgid-pid-file = $${directory:run}/resilient-web-takeover-httpd-cgid.pid
document-root = $${directory:cgi-bin}
error-log = $${directory:log}/resilient-web-takeover-httpd-error-log
[resilient-web-takeover-httpd-wrapper]
recipe = slapos.cookbook:wrapper
apache-executable = ${apache:location}/bin/httpd
command-line = $${:apache-executable} -f $${resilient-web-takeover-httpd-configuration-file:output} -DFOREGROUND
wrapper-path = $${directory:services}/resilient-web-takeover-httpd
[resilient-web-takeover-httpd-promise]
recipe = slapos.cookbook:check_url_available
path = $${directory:promises}/resilient-web-takeover-httpd
url = http://[$${resilient-web-takeover-httpd-configuration-file:listening-ip}]:$${resilient-web-takeover-httpd-configuration-file:listening-port}/
dash_path = ${dash:location}/bin/dash
curl_path = ${curl:location}/bin/curl
[slap-configuration]
recipe = slapos.cookbook:slapconfiguration
computer = $${slap-connection:computer-id}
partition = $${slap-connection:partition-id}
url = $${slap-connection:server-url}
key = $${slap-connection:key-file}
cert = $${slap-connection:cert-file}
\ No newline at end of file
......@@ -17,6 +17,7 @@
request-pull-backup-server-{{namebase}}-backup-{{id}}
{% endfor %}
request-root-instance-clone
{% endmacro %}
......@@ -20,6 +20,23 @@ home = ${buildout:directory}
etc = ${:home}/etc
promise = ${:etc}/promise
### Request a clone of the root instance
[slap-configuration]
recipe = slapos.cookbook:slapconfiguration.serialised
computer = ${slap-connection:computer-id}
partition = ${slap-connection:partition-id}
url = ${slap-connection:server-url}
key = ${slap-connection:key-file}
cert = ${slap-connection:cert-file}
[request-root-instance-clone]
<= slap-connection
recipe = slapos.cookbook:request
software-url = ${slap-connection:software-release-url}
software-type = root-instance-clone
name = ${slap-configuration:root-instance-title}-clone
config-namebase = ${:name}
return = takeover-url
## Tells the Backupable recipe that we want a backup
[resilient]
......@@ -234,7 +251,7 @@ sla-instance_guid = ${request-pbs-{{namebase}}-{{id}}:instance_guid}
[publish-connection-information]
feed-url-{{namebase}}-{{id}}-pull = ${request-pbs-{{namebase}}-{{id}}:connection-feeds-url}${request-pull-backup-server-{{namebase}}-{{id}}:config-notification-id}
clone-takeover-url = ${request-root-instance-clone:connection-takeover-url}
{% if 'monitor-base-url' in monitor_return -%}
{% do monitor_url_list.append('${request-pbs-' ~ namebase ~ '-' ~ id ~ ':connection-monitor-base-url}') -%}
{% endif -%}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment