Commit 75f83ced authored by Marco Mariani's avatar Marco Mariani

bin/takeover script

parent f8124124
......@@ -31,7 +31,10 @@ import os
class Recipe(GenericSlapRecipe):
""" This class provides the installation of the resilience
script on the partition.
scripts on the partition.
bin/takeover will perform a rename (must be run manually).
bin/bully will monitor, run elections and perform renames when needed.
"""
def _install(self):
......@@ -55,12 +58,12 @@ class Recipe(GenericSlapRecipe):
slap_connection = self.buildout['slap-connection']
if self.optionIsTrue('enable-bully-service', default=False):
wrapper_dir = self.options['services']
bully_dir = self.options['services']
else:
wrapper_dir = self.options['bin']
bully_dir = self.options['bin']
wrapper = self.createPythonScript(
name=os.path.join(wrapper_dir, self.parameter_dict['wrapper']),
bully_wrapper = self.createPythonScript(
name=os.path.join(bully_dir, self.options['wrapper-bully']),
absolute_function='slapos.recipe.addresiliency.bully.run',
arguments={
'confpath': confpath,
......@@ -73,7 +76,22 @@ class Recipe(GenericSlapRecipe):
'namebase': self.parameter_dict['namebase'],
})
path_list.append(wrapper)
path_list.append(bully_wrapper)
takeover_wrapper = self.createPythonScript(
name=os.path.join(self.options['bin'], self.options['wrapper-takeover']),
absolute_function='slapos.recipe.addresiliency.takeover.run',
arguments={
'server_url': slap_connection['server-url'],
'key_file': slap_connection.get('key-file'),
'cert_file': slap_connection.get('cert-file'),
'computer_id': slap_connection['computer-id'],
'partition_id': slap_connection['partition-id'],
'software': slap_connection['software-release-url'],
'namebase': self.parameter_dict['namebase'],
})
path_list.append(takeover_wrapper)
return path_list
......
......@@ -6,7 +6,7 @@ import socket
import thread
import time
from slapos import slap as slapmodule
import slapos.recipe.addresiliency.renamer
import slapos
log = logging.getLogger(__name__)
......@@ -29,86 +29,6 @@ STATE_REORGANIZATION = 'reorganization'
class Renamer(object):
def __init__(self, server_url, key_file, cert_file, computer_guid,
partition_id, software_release, namebase):
self.server_url = server_url
self.key_file = key_file
self.cert_file = cert_file
self.computer_guid = computer_guid
self.partition_id = partition_id
self.software_release = software_release
self.namebase = namebase
def _failover(self):
"""\
This method does
- retrieve the broken computer partition
- change its reference to 'broken-...' and its software type to 'frozen'
- retrieve the winner computer partition (attached to this process)
- change its reference and software type to replace the broken one
Then, after running slapgrid-cp a few times, the winner takes over and
a new cp is created to replace it as an importer.
"""
# TODO: replace hardcoded strings with values from the API
slap = slapmodule.slap()
slap.initializeConnection(self.server_url, self.key_file, self.cert_file)
# partition that will take over.
cp_winner = slap.registerComputerPartition(computer_guid=self.computer_guid,
partition_id=self.partition_id)
# XXX although we can already rename cp_winner, to change its software type we need to
# get hold of the root cp as well
root_partition_id = 'slappart0' # XXX hardcoded. what's the API for this?
cp_root = slap.registerComputerPartition(computer_guid=self.computer_guid,
partition_id=root_partition_id)
cp_exporter_ref = self.namebase + '0' # this is ok. the boss is always number zero.
# partition to be deactivated
cp_broken = cp_root.request(software_release=self.software_release,
software_type='frozen',
state='stopped',
partition_reference=cp_exporter_ref)
broken_new_ref = 'broken-{}'.format(time.strftime("%d-%b_%H:%M:%S", time.gmtime()))
# XXX can we retrieve and log the old reference name?
log.debug("Renaming {}: {}".format(cp_broken.getId(), broken_new_ref))
cp_broken.rename(new_name=broken_new_ref)
cp_broken.stopped()
log.debug("Renaming {}: {}".format(cp_broken.getId(), cp_exporter_ref))
# update software type and name for the partition that will take over
cp_winner_ref = self.namebase + '2' # XXX hardcoded. what's the API for this?
cp_root.request(software_release=self.software_release,
software_type=self.namebase+'-export',
partition_reference=cp_winner_ref).rename(new_name=cp_exporter_ref)
def failover(self):
try:
self._failover()
log.info('Renaming done')
except slapos.slap.slap.ServerError:
log.info('Internal server error')
## Leader is always number 0
class ResilientInstance(object):
......@@ -291,13 +211,13 @@ class Wrapper(object):
def run(args):
confpath = args.pop('confpath')
renamer = Renamer(server_url = args.pop('server_url'),
key_file = args.pop('key_file'),
cert_file = args.pop('cert_file'),
computer_guid = args.pop('computer_id'),
partition_id = args.pop('partition_id'),
software_release = args.pop('software'),
namebase = args.pop('namebase'))
renamer = slapos.recipe.addresiliency.renamer.Renamer(server_url = args.pop('server_url'),
key_file = args.pop('key_file'),
cert_file = args.pop('cert_file'),
computer_guid = args.pop('computer_id'),
partition_id = args.pop('partition_id'),
software_release = args.pop('software'),
namebase = args.pop('namebase'))
if args:
raise ValueError('Unknown arguments: %s' % ', '.join(args))
......
# -*- coding: utf-8 -*-
import logging
import time
from slapos import slap as slapmodule
import slapos
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
class Renamer(object):
def __init__(self, server_url, key_file, cert_file, computer_guid,
partition_id, software_release, namebase):
self.server_url = server_url
self.key_file = key_file
self.cert_file = cert_file
self.computer_guid = computer_guid
self.partition_id = partition_id
self.software_release = software_release
self.namebase = namebase
def _failover(self):
"""\
This method does
- retrieve the broken computer partition
- change its reference to 'broken-...' and its software type to 'frozen'
- retrieve the winner computer partition (attached to this process)
- change its reference and software type to replace the broken one
Then, after running slapgrid-cp a few times, the winner takes over and
a new cp is created to replace it as an importer.
"""
# TODO: replace hardcoded strings with values from the API
slap = slapmodule.slap()
slap.initializeConnection(self.server_url, self.key_file, self.cert_file)
# partition that will take over.
cp_winner = slap.registerComputerPartition(computer_guid=self.computer_guid,
partition_id=self.partition_id)
# XXX although we can already rename cp_winner, to change its software type we need to
# get hold of the root cp as well
root_partition_id = 'slappart0' # XXX hardcoded. what's the API for this?
cp_root = slap.registerComputerPartition(computer_guid=self.computer_guid,
partition_id=root_partition_id)
cp_exporter_ref = self.namebase + '0' # this is ok. the boss is always number zero.
# partition to be deactivated
cp_broken = cp_root.request(software_release=self.software_release,
software_type='frozen',
state='stopped',
partition_reference=cp_exporter_ref)
broken_new_ref = 'broken-{}'.format(time.strftime("%d-%b_%H:%M:%S", time.gmtime()))
# XXX can we retrieve and log the old reference name?
log.debug("Renaming {}: {}".format(cp_broken.getId(), broken_new_ref))
cp_broken.rename(new_name=broken_new_ref)
cp_broken.stopped()
log.debug("Renaming {}: {}".format(cp_broken.getId(), cp_exporter_ref))
# update software type and name for the partition that will take over
cp_winner_ref = self.namebase + '2' # XXX hardcoded. what's the API for this?
cp_root.request(software_release=self.software_release,
software_type=self.namebase+'-export',
partition_reference=cp_winner_ref).rename(new_name=cp_exporter_ref)
def failover(self):
try:
self._failover()
log.info('Renaming done')
except slapos.slap.slap.ServerError:
log.info('Internal server error')
# -*- coding: utf-8 -*-
import slapos.recipe.addresiliency.renamer
def run(args):
renamer = slapos.recipe.addresiliency.renamer.Renamer(server_url = args.pop('server_url'),
key_file = args.pop('key_file'),
cert_file = args.pop('cert_file'),
computer_guid = args.pop('computer_id'),
partition_id = args.pop('partition_id'),
software_release = args.pop('software'),
namebase = args.pop('namebase'))
renamer.failover()
......@@ -22,7 +22,7 @@ parts =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/pbsready.cfg.in
output = ${buildout:directory}/pbsready.cfg
md5sum = b6102416d000cae81dd2b06268946ea9
md5sum = af02afc439530a6cef9f5a21a25f3363
mode = 0644
[pbsready-import]
......@@ -53,7 +53,7 @@ mode = 0644
[template-replicated]
recipe = slapos.recipe.download
url = ${:_profile_base_location_}/template-replicated.cfg.in
md5sum = 1017d919dbf41904f04f5c17dcb574fa
md5sum = 63b5649f3cf1c9a77315382793d9593f
mode = 0644
destination = ${buildout:directory}/template-replicated.cfg.in
......
......@@ -45,6 +45,8 @@ notifier-callbacks = $${basedirectory:notifier}/callbacks
# If false, they can be run with bin/bullly for all the PBSReady instances.
enable-bully-service = False
recipe = slapos.cookbook:addresiliency
wrapper-bully = bully
wrapper-takeover = takeover
services = $${basedirectory:services}
bin = $${rootdirectory:bin}
etc = $${rootdirectory:etc}
......
......@@ -2,8 +2,6 @@
## Tells the Backupable recipe that we want a backup
[resilient]
config-script = bully.py
config-wrapper = bully
config-namebase = {{namebase}}
## Every request is double to provide the 3 IPs.
......@@ -15,7 +13,7 @@ software-type = {{typeexport}}
name = {{namebase}}0
return = url ssh-public-key ssh-url notification-id ip
config = number script wrapper authorized-key notify ip-list namebase
config = number authorized-key notify ip-list namebase
config-number = 0
config-authorized-key = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}{% endfor %}
config-notify = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-notification-url}{% endfor %}
......@@ -36,7 +34,7 @@ return = url ssh-public-key ssh-url notification-url ip
pbs-notification-id = ${slap-connection:computer-id}-${slap-connection:partition-id}-{{namebase}}-push
config = number script wrapper authorized-key on-notification ip-list namebase
config = number authorized-key on-notification ip-list namebase
config-number = {{id}}
config-authorized-key = ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}
config-on-notification = ${request-pbs-{{namebase}}-{{id}}:connection-feeds-url}${:pbs-notification-id}
......@@ -60,7 +58,7 @@ software-url = ${slap-connection:software-release-url}
software-type = {{typeexport}}
return = url ssh-public-key ssh-url notification-id ip
config = number script wrapper authorized-key notify ip-list namebase
config = number authorized-key notify ip-list namebase
config-number = 0
config-authorized-key = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}{% endfor %}
config-notify = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-notification-url}{% endfor %}
......@@ -81,7 +79,7 @@ return = url ssh-public-key ssh-url notification-url
pbs-notification-id = ${slap-connection:computer-id}-${slap-connection:partition-id}-{{namebase}}-push
config = number script wrapper authorized-key on-notification ip-list namebase
config = number authorized-key on-notification ip-list namebase
config-number = {{id}}
config-authorized-key = ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}
config-on-notification = ${request-pbs-{{namebase}}-{{id}}:connection-feeds-url}${:pbs-notification-id}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment