testcase.py 21.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

import unittest
import os
31
import fnmatch
32 33
import glob
import logging
34 35
import shutil
from six.moves.urllib.parse import urlparse
36 37 38 39 40 41 42 43 44 45 46 47 48

try:
  import subprocess32 as subprocess
except ImportError:
  import subprocess
  subprocess  # pyflakes

from .utils import getPortFromPath

from ..slap.standalone import StandaloneSlapOS
from ..slap.standalone import SlapOSNodeCommandError
from ..slap.standalone import PathTooDeepError
from ..grid.utils import md5digest
49
from ..util import mkdir_p
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65

try:
  from typing import Iterable, Tuple, Callable, Type
except ImportError:
  pass


def makeModuleSetUpAndTestCaseClass(
    software_url,
    base_directory=None,
    ipv4_address=os.environ['SLAPOS_TEST_IPV4'],
    ipv6_address=os.environ['SLAPOS_TEST_IPV6'],
    debug=bool(int(os.environ.get('SLAPOS_TEST_DEBUG', 0))),
    verbose=bool(int(os.environ.get('SLAPOS_TEST_VERBOSE', 0))),
    shared_part_list=os.environ.get('SLAPOS_TEST_SHARED_PART_LIST',
                                    '').split(os.pathsep),
66
    snapshot_directory=os.environ.get('SLAPOS_TEST_LOG_DIRECTORY')
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
):
  # type: (str, str, str, str, bool, bool, List[str]) -> Tuple[Callable[[], None], Type[SlapOSInstanceTestCase]]
  """Create a setup module function and a testcase for testing `software_url`.

  This function returns a tuple of two arguments:
   * a function to install the software, to be used as `unittest`'s
     `setUpModule`
   * a base class for test cases.

  The SlapOS instance will be using ip addresses defined by
  environment variables `SLAPOS_TEST_IPV4` and `SLAPOS_TEST_IPV6`, or by the
  explicits `ipv4_address` and `ipv6_address` arguments.

  To ease development and troubleshooting, two switches are available:
   * `verbose` (also controlled by `SLAPOS_TEST_VERBOSE` environment variable)
     to tell the test framework to log information describing the actions taken.
   * `debug` (also controlled by `SLAPOS_TEST_DEBUG` environment variable) to
     enable debugging mode which will drop in a debugger session when errors
     occurs.

  The base_directory directory is by default .slapos in current directory,
  or a path from `SLAPOS_TEST_WORKING_DIR` environment variable.

  This test class will use its own directory for shared parts and can also
  paths from `shared_part_list` argument to lookup existing parts.
  This is controlled by SLAPOS_TEST_SHARED_PART_LIST environment variable,
  which should be a : separated list of path.

  A note about paths:
    SlapOS itself and some services running in SlapOS uses unix sockets and
    (sometimes very) deep paths, which does not play very well together.
    To workaround this, users can set `SLAPOS_TEST_WORKING_DIR` environment
    variable to the path of a short enough directory and local slapos will
    use this directory.
    The partitions references will be named after the unittest class name,
    which can also lead to long paths. For this, unit test classes can define
    a `__partition_reference__` attribute which will be used as partition
    reference. If the class names are long, the trick is then to use a shorter
    `__partition_reference__`.
    See https://lab.nexedi.com/kirr/slapns for a solution to this problem.
  """
  if base_directory is None:
    base_directory = os.path.realpath(
        os.environ.get(
            'SLAPOS_TEST_WORKING_DIR', os.path.join(os.getcwd(), '.slapos')))
  # TODO: fail if already running ?
  try:
    slap = StandaloneSlapOS(
        base_directory=base_directory,
        server_ip=ipv4_address,
        server_port=getPortFromPath(base_directory),
        shared_part_list=shared_part_list)
  except PathTooDeepError:
    raise RuntimeError(
        'base directory ( {} ) is too deep, try setting '
        'SLAPOS_TEST_WORKING_DIR to a shallow enough directory'.format(
            base_directory))
124 125 126
  if not snapshot_directory:
    snapshot_directory = os.path.join(base_directory, "snapshots")

127 128 129 130 131 132

  cls = type(
      'SlapOSInstanceTestCase for {}'.format(software_url),
      (SlapOSInstanceTestCase,), {
          'slap': slap,
          'getSoftwareURL': classmethod(lambda _cls: software_url),
133
          'software_id': urlparse(software_url).path.split('/')[-2],
134 135 136
          '_debug': debug,
          '_verbose': verbose,
          '_ipv4_address': ipv4_address,
137 138 139
          '_ipv6_address': ipv6_address,
          '_base_directory': base_directory,
          '_test_file_snapshot_directory': snapshot_directory
140 141 142 143 144 145 146 147 148 149
      })

  class SlapOSInstanceTestCase_(cls, SlapOSInstanceTestCase):
    # useless intermediate class so that editors provide completion anyway.
    pass

  def setUpModule():
    # type: () -> None
    if debug:
      unittest.installHandler()
150 151
    logging.basicConfig(
        level=logging.DEBUG if (verbose or debug) else logging.WARNING)
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
    installSoftwareUrlList(cls, [software_url], debug=debug)

  return setUpModule, SlapOSInstanceTestCase_


def checkSoftware(slap, software_url):
  # type: (StandaloneSlapOS, str) -> None
  """Check software installation.

  This perform a few basic static checks for common problems
  with software installations.
  """
  software_hash = md5digest(software_url)

  error_list = []
  # Check that all components set rpath correctly and we don't have miss linking any libraries.
  for path in (os.path.join(slap.software_directory,
                            software_hash), slap.shared_directory):
    if not glob.glob(os.path.join(path, '*')):
      # shared might be empty (when using a slapos command that does not support shared yet).
      continue
    out = ''
    try:
      out = subprocess.check_output(
          "find . -type f -executable "

          # We ignore parts that are binary distributions.
          "| egrep -v /parts/java-re.*/ "
          "| egrep -v /parts/firefox-.*/ "
          "| egrep -v /parts/chromium-.*/ "
          "| egrep -v /parts/chromedriver-.*/ "
183
          "| egrep -v /parts/renderjs-repository.git/node_modules/.* "
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221

          # nss has no valid rpath. It does not seem to be a problem in our case.
          "| egrep -v /parts/nss/ "
          "| xargs ldd "
          r"| egrep '(^\S|not found)' "
          "| grep -B1 'not found'",
          shell=True,
          stderr=subprocess.STDOUT,
          cwd=path,
      )
    except subprocess.CalledProcessError as e:
      # The "good case" is when grep does not match anything, but in
      # that case, it exists with exit code 1, so we accept this case.
      if e.returncode != 1 or e.output:
        error_list.append(e.output)
    if out:
      error_list.append(out)

  # check this software is not referenced in any shared parts.
  for signature_file in glob.glob(os.path.join(slap.shared_directory, '*', '*',
                                               '.*slapos.*.signature')):
    with open(signature_file) as f:
      signature_content = f.read()
      if software_hash in signature_content:
        error_list.append(
            "Software hash present in signature {}\n{}\n".format(
                signature_file, signature_content))

  if error_list:
    raise RuntimeError('\n'.join(error_list))


def installSoftwareUrlList(cls, software_url_list, max_retry=2, debug=False):
  # type: (Type[SlapOSInstanceTestCase], Iterable[str], int, bool) -> None
  """Install softwares on the current testing slapos, for use in `setUpModule`.

  This also check softwares with `checkSoftware`
  """
222 223 224 225 226 227 228 229 230
  def _storeSoftwareLogSnapshot(name):
    for standalone_log in glob.glob(os.path.join(
        cls._base_directory,
        'var',
        'log',
        '*',
    )):
      cls._copySnapshot(standalone_log, name)

231 232 233 234 235 236
  try:
    for software_url in software_url_list:
      cls.logger.debug("Supplying %s", software_url)
      cls.slap.supply(software_url)
    cls.logger.debug("Waiting for slapos node software to build")
    cls.slap.waitForSoftware(max_retry=max_retry, debug=debug)
237
    _storeSoftwareLogSnapshot('setupModule')
238 239 240 241 242 243 244 245 246 247 248 249 250
    for software_url in software_url_list:
      checkSoftware(cls.slap, software_url)
  except BaseException as e:
    if not debug:
      cls.logger.exception("Error building software, removing")
      try:
        for software_url in software_url_list:
          cls.logger.debug("Removing %s", software_url)
          cls.slap.supply(software_url, state="destroyed")
        cls.logger.debug("Waiting for slapos node software to remove")
        cls.slap.waitForSoftware(max_retry=max_retry, debug=debug)
      except BaseException:
        cls.logger.exception("Error removing software")
251 252
        _storeSoftwareLogSnapshot('setupModule removing software')
    cls._cleanup('setupModule')
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
    raise e


class SlapOSInstanceTestCase(unittest.TestCase):
  """Install one slapos instance.

  This test case install software(s) and request one instance
  during `setUpClass` and destroy that instance during `tearDownClass`.

  Software Release URL, Instance Software Type and Instance Parameters
  can be defined on the class.

  All tests from the test class will run with the same instance.

  The following class attributes are available:

    * `computer_partition`:  the `slapos.slap.slap.ComputerPartition`
      computer partition instance.

    * `computer_partition_root_path`: the path of the instance root
      directory.

  This class is not supposed to be imported directly, but needs to be setup by
  calling makeModuleSetUpAndTestCaseClass.
  """
  # can set this to true to enable debugging utilities
  _debug = False
  # can set this to true to enable more verbose output
  _verbose = False
  # maximum retries for `slapos node instance`
  instance_max_retry = 10
  # maximum retries for `slapos node report`
285
  report_max_retry = 2
286 287 288 289 290 291 292 293 294 295 296 297 298
  # number of partitions needed for this instance
  partition_count = 10
  # reference of the default requested partition
  default_partition_reference = 'testing partition 0'

  # a logger for messages of the testing framework
  logger = logging.getLogger(__name__)

  # Dynamic members
  slap = None  # type: StandaloneSlapOS
  _ipv4_address = ""
  _ipv6_address = ""

299 300 301 302 303 304 305 306
  # a short name of that software URL.
  # eg. helloworld instead of
  # https://lab.nexedi.com/nexedi/slapos/raw/software/helloworld/software.cfg
  software_id = ""
  _base_directory = ""  # base directory for standalone
  _test_file_snapshot_directory = ""  # directory to save snapshot files for inspections
  # patterns of files to save for inspection, relative to instance directory
  _save_instance_file_pattern_list = (
307
      '*/bin/*',
308 309 310
      '*/etc/*',
      '*/var/log/*',
      '*/.*log',
311 312
      '*/.*cfg',
      '*/*cfg',
313
      'etc/',
314 315
  )

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
  # Methods to be defined by subclasses.
  @classmethod
  def getSoftwareURL(cls):
    """Return URL of software release to request instance.

    This method will be defined when initialising the class
    with makeModuleSetUpAndTestCaseClass.
    """
    raise NotImplementedError()

  @classmethod
  def getInstanceParameterDict(cls):
    """Return instance parameters.

    To be defined by subclasses if they need to request instance
    with specific parameters.
    """
    return {}

  @classmethod
  def getInstanceSoftwareType(cls):
    """Return software type for instance, default "".

    To be defined by subclasses if they need to request instance with specific
    software type.
    """
    return ""

  # Unittest methods
  @classmethod
  def setUpClass(cls):
    """Request an instance.
    """
    cls._instance_parameter_dict = cls.getInstanceParameterDict()
350
    snapshot_name = "{}.{}.setUpClass".format(cls.__module__, cls.__name__)
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392

    try:
      cls.logger.debug("Starting")
      cls.slap.start()
      cls.logger.debug(
          "Formatting to remove old partitions XXX should not be needed because we delete ..."
      )
      cls.slap.format(0, cls._ipv4_address, cls._ipv6_address)
      cls.logger.debug("Formatting with %s partitions", cls.partition_count)
      cls.slap.format(
          cls.partition_count, cls._ipv4_address, cls._ipv6_address,
          getattr(cls, '__partition_reference__', '{}-'.format(cls.__name__)))

      # request
      cls.requestDefaultInstance()

      # slapos node instance
      cls.logger.debug("Waiting for instance")
      # waitForInstance does not tolerate any error but with instances,
      # promises sometimes fail on first run, because services did not
      # have time to start.
      # To make debug usable, we tolerate instance_max_retry-1 errors and
      # only debug the last.
      if cls._debug and cls.instance_max_retry:
        try:
          cls.slap.waitForInstance(max_retry=cls.instance_max_retry - 1)
        except SlapOSNodeCommandError:
          cls.slap.waitForInstance(debug=True)
      else:
        cls.slap.waitForInstance(
            max_retry=cls.instance_max_retry, debug=cls._debug)

      # expose some class attributes so that tests can use them:
      # the main ComputerPartition instance, to use getInstanceParameterDict
      cls.computer_partition = cls.requestDefaultInstance()

      # the path of the instance on the filesystem, for low level inspection
      cls.computer_partition_root_path = os.path.join(
          cls.slap._instance_root, cls.computer_partition.getId())
      cls.logger.debug("setUpClass done")
    except BaseException:
      cls.logger.exception("Error during setUpClass")
393 394
      cls._storeSystemSnapshot(snapshot_name)
      cls._cleanup(snapshot_name)
395 396
      cls.setUp = lambda self: self.fail('Setup Class failed.')
      raise
397 398
    else:
      cls._storeSystemSnapshot(snapshot_name)
399 400 401 402 403

  @classmethod
  def tearDownClass(cls):
    """Tear down class, stop the processes and destroy instance.
    """
404
    cls._cleanup("{}.{}.tearDownClass".format(cls.__module__, cls.__name__))
405 406 407 408
    if not cls._debug:
      cls.logger.debug("cleaning up slapos log files in %s", cls.slap._log_directory)
      for log_file in glob.glob(os.path.join(cls.slap._log_directory, '*')):
        os.unlink(log_file)
409

410
  @classmethod
411 412
  def _storePartitionSnapshot(cls, name):
    """Store snapshot of partitions.
413

414 415
    This uses the definition from class attribute `_save_instance_file_pattern_list`
    """
416
    # copy config and log files from partitions
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
    for (dirpath, dirnames, filenames) in os.walk(cls.slap.instance_directory):
      for dirname in list(dirnames):
        dirabspath = os.path.join(dirpath, dirname)
        if any(fnmatch.fnmatch(
            dirabspath,
            pattern,
        ) for pattern in cls._save_instance_file_pattern_list):
          cls._copySnapshot(dirabspath, name)
          # don't recurse, since _copySnapshot is already recursive
          dirnames.remove(dirname)
      for filename in filenames:
        fileabspath = os.path.join(dirpath, filename)
        if any(fnmatch.fnmatch(
            fileabspath,
            pattern,
        ) for pattern in cls._save_instance_file_pattern_list):
          cls._copySnapshot(fileabspath, name)
434

435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
  @classmethod
  def _storeSystemSnapshot(cls, name):
    """Store a snapshot of standalone slapos

    Does not include software log, because this is stored at the end of software
    installation and software log is large.
    """
    # copy log files from standalone
    for standalone_log in glob.glob(os.path.join(
        cls._base_directory,
        'var',
        'log',
        '*',
    )):
      if not standalone_log.startswith('slapos-node-software.log'):
        cls._copySnapshot(standalone_log, name)
    # store slapproxy database
    cls._copySnapshot(cls.slap._proxy_database, name)

454
  def tearDown(self):
455
    self._storePartitionSnapshot(self.id())
456 457

  @classmethod
458 459
  def _copySnapshot(cls, source_file_name, name):
    """Save a file, symbolic link or directory for later inspection.
460 461 462 463 464

    The path are made relative to slapos root directory and
    we keep the same directory structure.
    """
    # we cannot use os.path.commonpath on python2, so implement something similar
465
    common_path = os.path.commonprefix((source_file_name, cls._base_directory))
466 467 468 469 470 471 472
    if not os.path.isdir(common_path):
      common_path = os.path.dirname(common_path)

    relative_path = source_file_name[len(common_path):]
    if relative_path[0] == os.sep:
      relative_path = relative_path[1:]
    destination = os.path.join(
473 474 475
        cls._test_file_snapshot_directory,
        cls.software_id,
        name,
476 477
        relative_path,
    )
478 479
    destination_dirname = os.path.dirname(destination)
    mkdir_p(destination_dirname)
480 481 482 483 484 485 486
    if os.path.islink(
        source_file_name) and not os.path.exists(source_file_name):
      cls.logger.debug(
          "copy broken symlink %s as %s", source_file_name, destination)
      with open(destination, 'w') as f:
        f.write('broken symink to {}\n'.format(os.readlink(source_file_name)))
    elif os.path.isfile(source_file_name):
487
      cls.logger.debug("copy %s as %s", source_file_name, destination)
488
      shutil.copy(source_file_name, destination)
489 490 491 492 493
    elif os.path.isdir(source_file_name):
      cls.logger.debug("copy directory %s as %s", source_file_name, destination)
      # we copy symlinks as symlinks, so that this does not fail when
      # we copy a directory containing broken symlinks.
      shutil.copytree(source_file_name, destination, symlinks=True)
494

495 496
  # implementation methods
  @classmethod
497 498
  def _cleanup(cls, snapshot_name):
    # type: (str) -> None
499
    """Destroy all instances and stop subsystem.
500
    Catches and log all exceptions and take snapshot named `snapshot_name` + the failing step.
501 502 503 504 505
    """
    try:
      cls.requestDefaultInstance(state='destroyed')
    except:
      cls.logger.exception("Error during request destruction")
506 507
      cls._storeSystemSnapshot(
          "{}._cleanup request destroy".format(snapshot_name))
508 509 510 511
    try:
      cls.slap.waitForReport(max_retry=cls.report_max_retry, debug=cls._debug)
    except:
      cls.logger.exception("Error during actual destruction")
512 513
      cls._storeSystemSnapshot(
          "{}._cleanup waitForReport".format(snapshot_name))
514 515 516 517 518 519 520 521
    leaked_partitions = [
        cp for cp in cls.slap.computer.getComputerPartitionList()
        if cp.getState() != 'destroyed'
    ]
    if leaked_partitions:
      cls.logger.critical(
          "The following partitions were not cleaned up: %s",
          [cp.getId() for cp in leaked_partitions])
522 523
      cls._storeSystemSnapshot(
          "{}._cleanup leaked_partitions".format(snapshot_name))
524 525 526 527 528 529 530 531 532 533 534
      for cp in leaked_partitions:
        try:
          cls.slap.request(
              software_release=cp.getSoftwareRelease().getURI(),
              # software_type=cp.getType(), # TODO
              # XXX is this really the reference ?
              partition_reference=cp.getInstanceParameterDict()['instance_title'],
              state="destroyed")
        except:
          cls.logger.exception(
              "Error during request destruction of leaked partition")
535 536 537
          cls._storeSystemSnapshot(
              "{}._cleanup leaked_partitions request destruction".format(
                  snapshot_name))
538
      try:
539
        cls.slap.waitForReport(max_retry=cls.report_max_retry, debug=cls._debug)
540
      except:
541 542 543 544
        cls.logger.exception(
            "Error during leaked partitions actual destruction")
        cls._storeSystemSnapshot(
            "{}._cleanup leaked_partitions waitForReport".format(snapshot_name))
545 546 547 548
    try:
      cls.slap.stop()
    except:
      cls.logger.exception("Error during stop")
549
      cls._storeSystemSnapshot("{}._cleanup stop".format(snapshot_name))
550 551 552 553 554
    leaked_supervisor_configs = glob.glob(
        os.path.join(cls.slap.instance_directory, 'etc', 'supervisord.conf.d', '*.conf'))
    if leaked_supervisor_configs:
      [os.unlink(config) for config in leaked_supervisor_configs]
      raise AssertionError("Test leaked supervisor configurations: %s" % leaked_supervisor_configs)
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569

  @classmethod
  def requestDefaultInstance(cls, state='started'):
    software_url = cls.getSoftwareURL()
    software_type = cls.getInstanceSoftwareType()
    cls.logger.debug(
        'requesting "%s" software:%s type:%s state:%s parameters:%s',
        cls.default_partition_reference, software_url, software_type, state,
        cls._instance_parameter_dict)
    return cls.slap.request(
        software_release=software_url,
        software_type=software_type,
        partition_reference=cls.default_partition_reference,
        partition_parameter_kw=cls._instance_parameter_dict,
        state=state)