Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
slapos
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Eric Zheng
slapos
Commits
75f83ced
Commit
75f83ced
authored
12 years ago
by
Marco Mariani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bin/takeover script
parent
f8124124
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
146 additions
and
102 deletions
+146
-102
slapos/recipe/addresiliency/__init__.py
slapos/recipe/addresiliency/__init__.py
+24
-6
slapos/recipe/addresiliency/bully.py
slapos/recipe/addresiliency/bully.py
+8
-88
slapos/recipe/addresiliency/renamer.py
slapos/recipe/addresiliency/renamer.py
+90
-0
slapos/recipe/addresiliency/takeover.py
slapos/recipe/addresiliency/takeover.py
+16
-0
stack/resilient/buildout.cfg
stack/resilient/buildout.cfg
+2
-2
stack/resilient/pbsready.cfg.in
stack/resilient/pbsready.cfg.in
+2
-0
stack/resilient/template-replicated.cfg.in
stack/resilient/template-replicated.cfg.in
+4
-6
No files found.
slapos/recipe/addresiliency/__init__.py
View file @
75f83ced
...
...
@@ -31,7 +31,10 @@ import os
class
Recipe
(
GenericSlapRecipe
):
""" This class provides the installation of the resilience
script on the partition.
scripts on the partition.
bin/takeover will perform a rename (must be run manually).
bin/bully will monitor, run elections and perform renames when needed.
"""
def
_install
(
self
):
...
...
@@ -55,12 +58,12 @@ class Recipe(GenericSlapRecipe):
slap_connection
=
self
.
buildout
[
'slap-connection'
]
if
self
.
optionIsTrue
(
'enable-bully-service'
,
default
=
False
):
wrapper
_dir
=
self
.
options
[
'services'
]
bully
_dir
=
self
.
options
[
'services'
]
else
:
wrapper
_dir
=
self
.
options
[
'bin'
]
bully
_dir
=
self
.
options
[
'bin'
]
wrapper
=
self
.
createPythonScript
(
name
=
os
.
path
.
join
(
wrapper_dir
,
self
.
parameter_dict
[
'wrapper
'
]),
bully_
wrapper
=
self
.
createPythonScript
(
name
=
os
.
path
.
join
(
bully_dir
,
self
.
options
[
'wrapper-bully
'
]),
absolute_function
=
'slapos.recipe.addresiliency.bully.run'
,
arguments
=
{
'confpath'
:
confpath
,
...
...
@@ -73,7 +76,22 @@ class Recipe(GenericSlapRecipe):
'namebase'
:
self
.
parameter_dict
[
'namebase'
],
})
path_list
.
append
(
wrapper
)
path_list
.
append
(
bully_wrapper
)
takeover_wrapper
=
self
.
createPythonScript
(
name
=
os
.
path
.
join
(
self
.
options
[
'bin'
],
self
.
options
[
'wrapper-takeover'
]),
absolute_function
=
'slapos.recipe.addresiliency.takeover.run'
,
arguments
=
{
'server_url'
:
slap_connection
[
'server-url'
],
'key_file'
:
slap_connection
.
get
(
'key-file'
),
'cert_file'
:
slap_connection
.
get
(
'cert-file'
),
'computer_id'
:
slap_connection
[
'computer-id'
],
'partition_id'
:
slap_connection
[
'partition-id'
],
'software'
:
slap_connection
[
'software-release-url'
],
'namebase'
:
self
.
parameter_dict
[
'namebase'
],
})
path_list
.
append
(
takeover_wrapper
)
return
path_list
...
...
This diff is collapsed.
Click to expand it.
slapos/recipe/addresiliency/bully.py
View file @
75f83ced
...
...
@@ -6,7 +6,7 @@ import socket
import
thread
import
time
from
slapos
import
slap
as
slapmodule
import
slapos.recipe.addresiliency.renamer
import
slapos
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -29,86 +29,6 @@ STATE_REORGANIZATION = 'reorganization'
class
Renamer
(
object
):
def
__init__
(
self
,
server_url
,
key_file
,
cert_file
,
computer_guid
,
partition_id
,
software_release
,
namebase
):
self
.
server_url
=
server_url
self
.
key_file
=
key_file
self
.
cert_file
=
cert_file
self
.
computer_guid
=
computer_guid
self
.
partition_id
=
partition_id
self
.
software_release
=
software_release
self
.
namebase
=
namebase
def
_failover
(
self
):
"""
\
This method does
- retrieve the broken computer partition
- change its reference to 'broken-...' and its software type to 'frozen'
- retrieve the winner computer partition (attached to this process)
- change its reference and software type to replace the broken one
Then, after running slapgrid-cp a few times, the winner takes over and
a new cp is created to replace it as an importer.
"""
# TODO: replace hardcoded strings with values from the API
slap
=
slapmodule
.
slap
()
slap
.
initializeConnection
(
self
.
server_url
,
self
.
key_file
,
self
.
cert_file
)
# partition that will take over.
cp_winner
=
slap
.
registerComputerPartition
(
computer_guid
=
self
.
computer_guid
,
partition_id
=
self
.
partition_id
)
# XXX although we can already rename cp_winner, to change its software type we need to
# get hold of the root cp as well
root_partition_id
=
'slappart0'
# XXX hardcoded. what's the API for this?
cp_root
=
slap
.
registerComputerPartition
(
computer_guid
=
self
.
computer_guid
,
partition_id
=
root_partition_id
)
cp_exporter_ref
=
self
.
namebase
+
'0'
# this is ok. the boss is always number zero.
# partition to be deactivated
cp_broken
=
cp_root
.
request
(
software_release
=
self
.
software_release
,
software_type
=
'frozen'
,
state
=
'stopped'
,
partition_reference
=
cp_exporter_ref
)
broken_new_ref
=
'broken-{}'
.
format
(
time
.
strftime
(
"%d-%b_%H:%M:%S"
,
time
.
gmtime
()))
# XXX can we retrieve and log the old reference name?
log
.
debug
(
"Renaming {}: {}"
.
format
(
cp_broken
.
getId
(),
broken_new_ref
))
cp_broken
.
rename
(
new_name
=
broken_new_ref
)
cp_broken
.
stopped
()
log
.
debug
(
"Renaming {}: {}"
.
format
(
cp_broken
.
getId
(),
cp_exporter_ref
))
# update software type and name for the partition that will take over
cp_winner_ref
=
self
.
namebase
+
'2'
# XXX hardcoded. what's the API for this?
cp_root
.
request
(
software_release
=
self
.
software_release
,
software_type
=
self
.
namebase
+
'-export'
,
partition_reference
=
cp_winner_ref
).
rename
(
new_name
=
cp_exporter_ref
)
def
failover
(
self
):
try
:
self
.
_failover
()
log
.
info
(
'Renaming done'
)
except
slapos
.
slap
.
slap
.
ServerError
:
log
.
info
(
'Internal server error'
)
## Leader is always number 0
class
ResilientInstance
(
object
):
...
...
@@ -291,13 +211,13 @@ class Wrapper(object):
def
run
(
args
):
confpath
=
args
.
pop
(
'confpath'
)
renamer
=
Renamer
(
server_url
=
args
.
pop
(
'server_url'
),
key_file
=
args
.
pop
(
'key_file'
),
cert_file
=
args
.
pop
(
'cert_file'
),
computer_guid
=
args
.
pop
(
'computer_id'
),
partition_id
=
args
.
pop
(
'partition_id'
),
software_release
=
args
.
pop
(
'software'
),
namebase
=
args
.
pop
(
'namebase'
))
renamer
=
slapos
.
recipe
.
addresiliency
.
renamer
.
Renamer
(
server_url
=
args
.
pop
(
'server_url'
),
key_file
=
args
.
pop
(
'key_file'
),
cert_file
=
args
.
pop
(
'cert_file'
),
computer_guid
=
args
.
pop
(
'computer_id'
),
partition_id
=
args
.
pop
(
'partition_id'
),
software_release
=
args
.
pop
(
'software'
),
namebase
=
args
.
pop
(
'namebase'
))
if
args
:
raise
ValueError
(
'Unknown arguments: %s'
%
', '
.
join
(
args
))
...
...
This diff is collapsed.
Click to expand it.
slapos/recipe/addresiliency/renamer.py
0 → 100644
View file @
75f83ced
# -*- coding: utf-8 -*-
import
logging
import
time
from
slapos
import
slap
as
slapmodule
import
slapos
log
=
logging
.
getLogger
(
__name__
)
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
class
Renamer
(
object
):
def
__init__
(
self
,
server_url
,
key_file
,
cert_file
,
computer_guid
,
partition_id
,
software_release
,
namebase
):
self
.
server_url
=
server_url
self
.
key_file
=
key_file
self
.
cert_file
=
cert_file
self
.
computer_guid
=
computer_guid
self
.
partition_id
=
partition_id
self
.
software_release
=
software_release
self
.
namebase
=
namebase
def
_failover
(
self
):
"""
\
This method does
- retrieve the broken computer partition
- change its reference to 'broken-...' and its software type to 'frozen'
- retrieve the winner computer partition (attached to this process)
- change its reference and software type to replace the broken one
Then, after running slapgrid-cp a few times, the winner takes over and
a new cp is created to replace it as an importer.
"""
# TODO: replace hardcoded strings with values from the API
slap
=
slapmodule
.
slap
()
slap
.
initializeConnection
(
self
.
server_url
,
self
.
key_file
,
self
.
cert_file
)
# partition that will take over.
cp_winner
=
slap
.
registerComputerPartition
(
computer_guid
=
self
.
computer_guid
,
partition_id
=
self
.
partition_id
)
# XXX although we can already rename cp_winner, to change its software type we need to
# get hold of the root cp as well
root_partition_id
=
'slappart0'
# XXX hardcoded. what's the API for this?
cp_root
=
slap
.
registerComputerPartition
(
computer_guid
=
self
.
computer_guid
,
partition_id
=
root_partition_id
)
cp_exporter_ref
=
self
.
namebase
+
'0'
# this is ok. the boss is always number zero.
# partition to be deactivated
cp_broken
=
cp_root
.
request
(
software_release
=
self
.
software_release
,
software_type
=
'frozen'
,
state
=
'stopped'
,
partition_reference
=
cp_exporter_ref
)
broken_new_ref
=
'broken-{}'
.
format
(
time
.
strftime
(
"%d-%b_%H:%M:%S"
,
time
.
gmtime
()))
# XXX can we retrieve and log the old reference name?
log
.
debug
(
"Renaming {}: {}"
.
format
(
cp_broken
.
getId
(),
broken_new_ref
))
cp_broken
.
rename
(
new_name
=
broken_new_ref
)
cp_broken
.
stopped
()
log
.
debug
(
"Renaming {}: {}"
.
format
(
cp_broken
.
getId
(),
cp_exporter_ref
))
# update software type and name for the partition that will take over
cp_winner_ref
=
self
.
namebase
+
'2'
# XXX hardcoded. what's the API for this?
cp_root
.
request
(
software_release
=
self
.
software_release
,
software_type
=
self
.
namebase
+
'-export'
,
partition_reference
=
cp_winner_ref
).
rename
(
new_name
=
cp_exporter_ref
)
def
failover
(
self
):
try
:
self
.
_failover
()
log
.
info
(
'Renaming done'
)
except
slapos
.
slap
.
slap
.
ServerError
:
log
.
info
(
'Internal server error'
)
This diff is collapsed.
Click to expand it.
slapos/recipe/addresiliency/takeover.py
0 → 100644
View file @
75f83ced
# -*- coding: utf-8 -*-
import
slapos.recipe.addresiliency.renamer
def
run
(
args
):
renamer
=
slapos
.
recipe
.
addresiliency
.
renamer
.
Renamer
(
server_url
=
args
.
pop
(
'server_url'
),
key_file
=
args
.
pop
(
'key_file'
),
cert_file
=
args
.
pop
(
'cert_file'
),
computer_guid
=
args
.
pop
(
'computer_id'
),
partition_id
=
args
.
pop
(
'partition_id'
),
software_release
=
args
.
pop
(
'software'
),
namebase
=
args
.
pop
(
'namebase'
))
renamer
.
failover
()
This diff is collapsed.
Click to expand it.
stack/resilient/buildout.cfg
View file @
75f83ced
...
...
@@ -22,7 +22,7 @@ parts =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/pbsready.cfg.in
output = ${buildout:directory}/pbsready.cfg
md5sum =
b6102416d000cae81dd2b06268946ea9
md5sum =
af02afc439530a6cef9f5a21a25f3363
mode = 0644
[pbsready-import]
...
...
@@ -53,7 +53,7 @@ mode = 0644
[template-replicated]
recipe = slapos.recipe.download
url = ${:_profile_base_location_}/template-replicated.cfg.in
md5sum =
1017d919dbf41904f04f5c17dcb574fa
md5sum =
63b5649f3cf1c9a77315382793d9593f
mode = 0644
destination = ${buildout:directory}/template-replicated.cfg.in
...
...
This diff is collapsed.
Click to expand it.
stack/resilient/pbsready.cfg.in
View file @
75f83ced
...
...
@@ -45,6 +45,8 @@ notifier-callbacks = $${basedirectory:notifier}/callbacks
# If false, they can be run with bin/bullly for all the PBSReady instances.
enable-bully-service = False
recipe = slapos.cookbook:addresiliency
wrapper-bully = bully
wrapper-takeover = takeover
services = $${basedirectory:services}
bin = $${rootdirectory:bin}
etc = $${rootdirectory:etc}
...
...
This diff is collapsed.
Click to expand it.
stack/resilient/template-replicated.cfg.in
View file @
75f83ced
...
...
@@ -2,8 +2,6 @@
## Tells the Backupable recipe that we want a backup
[resilient]
config-script = bully.py
config-wrapper = bully
config-namebase = {{namebase}}
## Every request is double to provide the 3 IPs.
...
...
@@ -15,7 +13,7 @@ software-type = {{typeexport}}
name = {{namebase}}0
return = url ssh-public-key ssh-url notification-id ip
config = number
script wrapper
authorized-key notify ip-list namebase
config = number authorized-key notify ip-list namebase
config-number = 0
config-authorized-key = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}{% endfor %}
config-notify = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-notification-url}{% endfor %}
...
...
@@ -36,7 +34,7 @@ return = url ssh-public-key ssh-url notification-url ip
pbs-notification-id = ${slap-connection:computer-id}-${slap-connection:partition-id}-{{namebase}}-push
config = number
script wrapper
authorized-key on-notification ip-list namebase
config = number authorized-key on-notification ip-list namebase
config-number = {{id}}
config-authorized-key = ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}
config-on-notification = ${request-pbs-{{namebase}}-{{id}}:connection-feeds-url}${:pbs-notification-id}
...
...
@@ -60,7 +58,7 @@ software-url = ${slap-connection:software-release-url}
software-type = {{typeexport}}
return = url ssh-public-key ssh-url notification-id ip
config = number
script wrapper
authorized-key notify ip-list namebase
config = number authorized-key notify ip-list namebase
config-number = 0
config-authorized-key = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}{% endfor %}
config-notify = {% for id in range(1,nbbackup|int) %} ${request-pbs-{{namebase}}-{{id}}:connection-notification-url}{% endfor %}
...
...
@@ -81,7 +79,7 @@ return = url ssh-public-key ssh-url notification-url
pbs-notification-id = ${slap-connection:computer-id}-${slap-connection:partition-id}-{{namebase}}-push
config = number
script wrapper
authorized-key on-notification ip-list namebase
config = number authorized-key on-notification ip-list namebase
config-number = {{id}}
config-authorized-key = ${request-pbs-{{namebase}}-{{id}}:connection-ssh-key}
config-on-notification = ${request-pbs-{{namebase}}-{{id}}:connection-feeds-url}${:pbs-notification-id}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment