Commit 973a0724 authored by Jérome Perrin's avatar Jérome Perrin

Bug fixes and important missing features for standalone / testcase

The most important are:
 * sometimes instance was not properly destroyed, apparently we need to retry `slapos node report`
 * if instance is not properly destroy, remove the files ourself and mark test failed
 * we need to see log files and config files from the instance

there's also a few small commits not directly related.

/reviewed-on nexedi/slapos.core!145
parents 8307f425 88a73f75
......@@ -50,6 +50,7 @@ import psutil
from .interface.slap import IException
from .interface.slap import ISupply
from .interface.slap import IRequester
from ..grid.slapgrid import SLAPGRID_PROMISE_FAIL
from .slap import slap
from ..grid.svcbackend import getSupervisorRPC
......@@ -60,10 +61,12 @@ class SlapOSNodeCommandError(Exception):
"""Exception raised when running a SlapOS Node command failed.
"""
def __str__(self):
# This is a false positive in pylint https://github.com/PyCQA/pylint/issues/1498
called_process_error = self.args[0] # pylint: disable=unsubscriptable-object
return "{} exitstatus: {} output:\n{}".format(
self.__class__.__name__,
self.args[0]['exitstatus'],
self.args[0]['output'],
called_process_error['exitstatus'],
called_process_error['output'],
)
......@@ -471,7 +474,19 @@ class StandaloneSlapOS(object):
for part in unknown_partition_set:
self._logger.debug(
"removing partition no longer part of format spec %s", part)
# remove partition directory
shutil.rmtree(part)
# remove partition supervisor config, if it was not removed cleanly
supervisor_conf = os.path.join(
self._instance_root,
'etc',
'supervisord.conf.d',
'%s.conf' % os.path.basename(part))
if os.path.exists(supervisor_conf):
self._logger.info(
"removing leftover supervisor config from destroyed partition at %s",
supervisor_conf)
os.unlink(supervisor_conf)
def supply(self, software_url, computer_guid=None, state="available"):
"""Supply a software, see ISupply.supply
......@@ -630,9 +645,14 @@ class StandaloneSlapOS(object):
prog = self._slapos_commands[command]
# used in format(**locals()) below
debug_args = prog.get('debug_args', '') # pylint: disable=unused-variable
return subprocess.check_call(
prog['command'].format(**locals()), shell=True)
command = prog['command'].format(**locals())
try:
return subprocess.check_call(command, shell=True)
except subprocess.CalledProcessError as e:
if e.returncode == SLAPGRID_PROMISE_FAIL:
self._logger.exception('Promise error when running %s', command)
import pdb; pdb.post_mortem()
raise
with self.system_supervisor_rpc as supervisor:
retry = 0
while True:
......
......@@ -30,6 +30,8 @@ import unittest
import os
import glob
import logging
import shutil
from six.moves.urllib.parse import urlparse
try:
import subprocess32 as subprocess
......@@ -43,6 +45,7 @@ from ..slap.standalone import StandaloneSlapOS
from ..slap.standalone import SlapOSNodeCommandError
from ..slap.standalone import PathTooDeepError
from ..grid.utils import md5digest
from ..util import mkdir_p
try:
from typing import Iterable, Tuple, Callable, Type
......@@ -59,6 +62,7 @@ def makeModuleSetUpAndTestCaseClass(
verbose=bool(int(os.environ.get('SLAPOS_TEST_VERBOSE', 0))),
shared_part_list=os.environ.get('SLAPOS_TEST_SHARED_PART_LIST',
'').split(os.pathsep),
snapshot_directory=os.environ.get('SLAPOS_TEST_LOG_DIRECTORY')
):
# type: (str, str, str, str, bool, bool, List[str]) -> Tuple[Callable[[], None], Type[SlapOSInstanceTestCase]]
"""Create a setup module function and a testcase for testing `software_url`.
......@@ -116,16 +120,22 @@ def makeModuleSetUpAndTestCaseClass(
'base directory ( {} ) is too deep, try setting '
'SLAPOS_TEST_WORKING_DIR to a shallow enough directory'.format(
base_directory))
if not snapshot_directory:
snapshot_directory = os.path.join(base_directory, "snapshots")
cls = type(
'SlapOSInstanceTestCase for {}'.format(software_url),
(SlapOSInstanceTestCase,), {
'slap': slap,
'getSoftwareURL': classmethod(lambda _cls: software_url),
'software_id': urlparse(software_url).path.split('/')[-2],
'_debug': debug,
'_verbose': verbose,
'_ipv4_address': ipv4_address,
'_ipv6_address': ipv6_address
'_ipv6_address': ipv6_address,
'_base_directory': base_directory,
'_test_file_snapshot_directory': snapshot_directory
})
class SlapOSInstanceTestCase_(cls, SlapOSInstanceTestCase):
......@@ -136,8 +146,8 @@ def makeModuleSetUpAndTestCaseClass(
# type: () -> None
if debug:
unittest.installHandler()
if verbose or debug:
logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(
level=logging.DEBUG if (verbose or debug) else logging.WARNING)
installSoftwareUrlList(cls, [software_url], debug=debug)
return setUpModule, SlapOSInstanceTestCase_
......@@ -260,7 +270,7 @@ class SlapOSInstanceTestCase(unittest.TestCase):
# maximum retries for `slapos node instance`
instance_max_retry = 10
# maximum retries for `slapos node report`
report_max_retry = 0
report_max_retry = 2
# number of partitions needed for this instance
partition_count = 10
# reference of the default requested partition
......@@ -274,6 +284,19 @@ class SlapOSInstanceTestCase(unittest.TestCase):
_ipv4_address = ""
_ipv6_address = ""
# a short name of that software URL.
# eg. helloworld instead of
# https://lab.nexedi.com/nexedi/slapos/raw/software/helloworld/software.cfg
software_id = ""
_base_directory = "" # base directory for standalone
_test_file_snapshot_directory = "" # directory to save snapshot files for inspections
# patterns of files to save for inspection, relative to instance directory
_save_instance_file_pattern_list = (
'*/etc/*',
'*/var/log/*',
'*/.*log',
)
# Methods to be defined by subclasses.
@classmethod
def getSoftwareURL(cls):
......@@ -361,6 +384,42 @@ class SlapOSInstanceTestCase(unittest.TestCase):
"""
cls._cleanup()
def tearDown(self):
# copy log files from standalone
for standalone_log in glob.glob(os.path.join(
self._base_directory, 'var', 'log', '*')):
self._snapshot_instance_file(standalone_log)
# copy config and log files from partitions
for pattern in self._save_instance_file_pattern_list:
for f in glob.glob(os.path.join(self.slap.instance_directory, pattern)):
self._snapshot_instance_file(f)
def _snapshot_instance_file(self, source_file_name):
"""Save a file for later inspection.
The path are made relative to slapos root directory and
we keep the same directory structure.
"""
# we cannot use os.path.commonpath on python2, so implement something similar
common_path = os.path.commonprefix((source_file_name, self._base_directory))
if not os.path.isdir(common_path):
common_path = os.path.dirname(common_path)
relative_path = source_file_name[len(common_path):]
if relative_path[0] == os.sep:
relative_path = relative_path[1:]
destination = os.path.join(
self._test_file_snapshot_directory,
self.software_id,
self.id(),
relative_path)
destination_dirname = os.path.dirname(destination)
mkdir_p(destination_dirname)
if os.path.isfile(source_file_name):
self.logger.debug("copy %s as %s", source_file_name, destination)
shutil.copy(source_file_name, destination)
# implementation methods
@classmethod
def _cleanup(cls):
......@@ -383,25 +442,30 @@ class SlapOSInstanceTestCase(unittest.TestCase):
cls.logger.critical(
"The following partitions were not cleaned up: %s",
[cp.getId() for cp in leaked_partitions])
for cp in leaked_partitions:
for cp in leaked_partitions:
try:
cls.slap.request(
software_release=cp.getSoftwareRelease().getURI(),
# software_type=cp.getType(), # TODO
# XXX is this really the reference ?
partition_reference=cp.getInstanceParameterDict()['instance_title'],
state="destroyed")
except:
cls.logger.exception(
"Error during request destruction of leaked partition")
try:
cls.slap.request(
software_release=cp.getSoftwareRelease().getURI(),
# software_type=cp.getType(), # TODO
# XXX is this really the reference ?
partition_reference=cp.getInstanceParameterDict()['instance_title'],
state="destroyed")
cls.slap.waitForReport(max_retry=cls.report_max_retry, debug=cls._debug)
except:
cls.logger.exception(
"Error during request destruction of leaked partition")
try:
cls.slap.waitForReport(max_retry=cls.report_max_retry, debug=cls._debug)
except:
cls.logger.exception("Error during leaked partitions actual destruction")
cls.logger.exception("Error during leaked partitions actual destruction")
try:
cls.slap.stop()
except:
cls.logger.exception("Error during stop")
leaked_supervisor_configs = glob.glob(
os.path.join(cls.slap.instance_directory, 'etc', 'supervisord.conf.d', '*.conf'))
if leaked_supervisor_configs:
[os.unlink(config) for config in leaked_supervisor_configs]
raise AssertionError("Test leaked supervisor configurations: %s" % leaked_supervisor_configs)
@classmethod
def requestDefaultInstance(cls, state='started'):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment