Commit e6bd1e78 authored by Julien Muchembled's avatar Julien Muchembled

ERP5: new 'private-dev-shm' zope partition option for wendelin.core

Current version of wendelin.core is limited in that there's no cooperation
in memory allocation between several processes sharing the same /dev/shm.
Over time, the distribution of memory among processes becomes more and more
inequal until some of them get killed due to allocation failure.

This is a temporary solution that relies on user namespaces.
parent 411212fb
...@@ -28,8 +28,25 @@ def _wait_files_creation(file_list): ...@@ -28,8 +28,25 @@ def _wait_files_creation(file_list):
if event.name in directory: if event.name in directory:
directory[event.name] = event.mask & (flags.CREATE | flags.MOVED_TO) directory[event.name] = event.mask & (flags.CREATE | flags.MOVED_TO)
def _libc():
from ctypes import CDLL, get_errno, c_char_p, c_int, c_ulong, util
libc = CDLL(util.find_library('c'), use_errno=True)
libc_mount = libc.mount
libc_mount.argtypes = c_char_p, c_char_p, c_char_p, c_ulong, c_char_p
def mount(source, target, filesystemtype, mountflags, data):
if libc_mount(source, target, filesystemtype, mountflags, data):
e = get_errno()
raise OSError(e, os.strerror(e))
libc_unshare = libc.unshare
libc_unshare.argtypes = c_int,
def unshare(flags):
if libc_unshare(flags):
e = get_errno()
raise OSError(e, os.strerror(e))
return mount, unshare
def generic_exec(args, extra_environ=None, wait_list=None, def generic_exec(args, extra_environ=None, wait_list=None,
pidfile=None, reserve_cpu=False, pidfile=None, reserve_cpu=False, private_dev_shm=None,
#shebang_workaround=False, # XXX: still needed ? #shebang_workaround=False, # XXX: still needed ?
): ):
args = list(args) args = list(args)
...@@ -62,6 +79,18 @@ def generic_exec(args, extra_environ=None, wait_list=None, ...@@ -62,6 +79,18 @@ def generic_exec(args, extra_environ=None, wait_list=None,
if wait_list: if wait_list:
_wait_files_creation(wait_list) _wait_files_creation(wait_list)
if private_dev_shm:
mount, unshare = _libc()
CLONE_NEWNS = 0x00020000
CLONE_NEWUSER = 0x10000000
uid = os.getuid()
gid = os.getgid()
unshare(CLONE_NEWUSER |CLONE_NEWNS)
with open('/proc/self/setgroups', 'wb') as f: f.write('deny')
with open('/proc/self/uid_map', 'wb') as f: f.write('%s %s 1' % (uid, uid))
with open('/proc/self/gid_map', 'wb') as f: f.write('%s %s 1' % (gid, gid))
mount('tmpfs', '/dev/shm', 'tmpfs', 0, 'size=' + private_dev_shm)
if extra_environ: if extra_environ:
env = os.environ.copy() env = os.environ.copy()
env.update(extra_environ) env.update(extra_environ)
......
...@@ -37,6 +37,7 @@ class Recipe(GenericBaseRecipe): ...@@ -37,6 +37,7 @@ class Recipe(GenericBaseRecipe):
:param lines wait-for-files: list of files to wait for :param lines wait-for-files: list of files to wait for
:param str pidfile: path to pidfile ensure exclusivity for the process :param str pidfile: path to pidfile ensure exclusivity for the process
:param str private-dev-shm: size of private /dev/shm, using user namespaces
:param bool reserve-cpu: command will ask for an exclusive CPU core :param bool reserve-cpu: command will ask for an exclusive CPU core
""" """
def install(self): def install(self):
...@@ -44,6 +45,7 @@ class Recipe(GenericBaseRecipe): ...@@ -44,6 +45,7 @@ class Recipe(GenericBaseRecipe):
wrapper_path = self.options['wrapper-path'] wrapper_path = self.options['wrapper-path']
wait_files = self.options.get('wait-for-files') wait_files = self.options.get('wait-for-files')
pidfile = self.options.get('pidfile') pidfile = self.options.get('pidfile')
private_dev_shm = self.options.get('private-dev-shm')
environment = {} environment = {}
for line in (self.options.get('environment') or '').splitlines(): for line in (self.options.get('environment') or '').splitlines():
...@@ -57,6 +59,8 @@ class Recipe(GenericBaseRecipe): ...@@ -57,6 +59,8 @@ class Recipe(GenericBaseRecipe):
kw['wait_list'] = wait_files.split() kw['wait_list'] = wait_files.split()
if pidfile: if pidfile:
kw['pidfile'] = pidfile kw['pidfile'] = pidfile
if private_dev_shm:
kw['private_dev_shm'] = private_dev_shm
if self.isTrueValue(self.options.get('reserve-cpu')): if self.isTrueValue(self.options.get('reserve-cpu')):
kw['reserve_cpu'] = True kw['reserve_cpu'] = True
......
...@@ -119,6 +119,10 @@ ...@@ -119,6 +119,10 @@
"default": 5, "default": 5,
"type": "number" "type": "number"
}, },
"private-dev-shm": {
"description": "Size of private /dev/shm for wendelin.core. If sysctl kernel.unprivileged_userns_clone exists, it must be set to 1.",
"type": "string"
},
"ssl-authentication": { "ssl-authentication": {
"title": "Enable SSL Client authentication on this zope instance.", "title": "Enable SSL Client authentication on this zope instance.",
"description": "If set to true, will set SSL Client verification to required on apache VirtualHost which allow to access this zope instance.", "description": "If set to true, will set SSL Client verification to required on apache VirtualHost which allow to access this zope instance.",
......
...@@ -79,7 +79,7 @@ md5sum = d41d8cd98f00b204e9800998ecf8427e ...@@ -79,7 +79,7 @@ md5sum = d41d8cd98f00b204e9800998ecf8427e
[template-erp5] [template-erp5]
filename = instance-erp5.cfg.in filename = instance-erp5.cfg.in
md5sum = 02ed5d9b74c70789004d01dd2ecde7b1 md5sum = 1d6735a803c9d28930bf2ad00706c06b
[template-zeo] [template-zeo]
filename = instance-zeo.cfg.in filename = instance-zeo.cfg.in
...@@ -87,7 +87,7 @@ md5sum = d1f33d406d528ae27d973e2dd0efb1ba ...@@ -87,7 +87,7 @@ md5sum = d1f33d406d528ae27d973e2dd0efb1ba
[template-zope] [template-zope]
filename = instance-zope.cfg.in filename = instance-zope.cfg.in
md5sum = fd7e8c507cef1950e6c0347ce2a01021 md5sum = e08c00d5973916d796bf08aa78dba34a
[template-balancer] [template-balancer]
filename = instance-balancer.cfg.in filename = instance-balancer.cfg.in
......
...@@ -205,6 +205,7 @@ name = {{ partition_name }} ...@@ -205,6 +205,7 @@ name = {{ partition_name }}
{{ root_common.sla(partition_name) }} {{ root_common.sla(partition_name) }}
config-name = {{ dumps(custom_name) }} config-name = {{ dumps(custom_name) }}
config-instance-count = {{ dumps(zope_parameter_dict.get('instance-count', 1)) }} config-instance-count = {{ dumps(zope_parameter_dict.get('instance-count', 1)) }}
config-private-dev-shm = {{ zope_parameter_dict.get('private-dev-shm', '') }}
config-thread-amount = {{ dumps(zope_parameter_dict.get('thread-amount', 4)) }} config-thread-amount = {{ dumps(zope_parameter_dict.get('thread-amount', 4)) }}
config-timerserver-interval = {{ dumps(zope_parameter_dict.get('timerserver-interval', 5)) }} config-timerserver-interval = {{ dumps(zope_parameter_dict.get('timerserver-interval', 5)) }}
config-longrequest-logger-interval = {{ dumps(zope_parameter_dict.get('longrequest-logger-interval', -1)) }} config-longrequest-logger-interval = {{ dumps(zope_parameter_dict.get('longrequest-logger-interval', -1)) }}
......
...@@ -206,6 +206,7 @@ environment = ...@@ -206,6 +206,7 @@ environment =
{% endif %} {% endif %}
parameters-extra = true parameters-extra = true
command-line = '{{ parameter_dict['userhosts'] }}' '{{ bin_directory }}/runzope' -C '${:configuration-file}' command-line = '{{ parameter_dict['userhosts'] }}' '{{ bin_directory }}/runzope' -C '${:configuration-file}'
private-dev-shm = {{ slapparameter_dict['private-dev-shm'] }}
[{{ section('zcml') }}] [{{ section('zcml') }}]
recipe = slapos.cookbook:copyfilelist recipe = slapos.cookbook:copyfilelist
......
  • After this change, when I requested a erp5 instance without the new private-dev-shm parameter, the instantiation failed here: https://lab.nexedi.com/nexedi/slapos/blob/master/stack/erp5/instance-zope.cfg.in#L225, while processing "Installing dynamic-template-zope." for the zope partition, due to: UndefinedError: 'dict object' has no attribute 'private-dev-shm'. As workaround, I requested the instance with the new parameter set in "".

  • Your workaround makes no sense to me. It looks a coincidence.

  • An explanation might be that node instance considered there was no change (despite the upgrade of SR) and skipped the root partition. And when you changed parameters (no matter how), it forced reinstanciation of the root partition and the new parameter was propagated to the zope partition.

    If you remove the parameter you added, it would still work.

  • Yes, probably something like that occurred. I re-requested the instance and it is working fine. Sorry for the inconvenience.

Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment