slapgrid.py 71.8 KB
Newer Older
1
# -*- coding: utf-8 -*-
2
# vim: set et sts=2:
Łukasz Nowak's avatar
Łukasz Nowak committed
3 4
##############################################################################
#
5 6
# Copyright (c) 2010, 2011, 2012 Vifib SARL and Contributors.
# All Rights Reserved.
Łukasz Nowak's avatar
Łukasz Nowak committed
7 8 9 10 11
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
12
# guarantees and support are strongly advised to contract a Free Software
Łukasz Nowak's avatar
Łukasz Nowak committed
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################
30

Łukasz Nowak's avatar
Łukasz Nowak committed
31 32
import os
import pkg_resources
33
import random
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
34
import socket
Bryton Lacquement's avatar
Bryton Lacquement committed
35
from io import BytesIO
36
import subprocess
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
37 38 39 40
import sys
import tempfile
import time
import traceback
Łukasz Nowak's avatar
Łukasz Nowak committed
41
import warnings
42
import logging
43
import json
44
import shutil
Bryton Lacquement's avatar
Bryton Lacquement committed
45
import six
46
import errno
47

Łukasz Nowak's avatar
Łukasz Nowak committed
48
if sys.version_info < (2, 6):
Marco Mariani's avatar
Marco Mariani committed
49
  warnings.warn('Used python version (%s) is old and has problems with'
Łukasz Nowak's avatar
Łukasz Nowak committed
50 51
      ' IPv6 connections' % sys.version.split('\n')[0])

Marco Mariani's avatar
Marco Mariani committed
52 53
from lxml import etree

54
from slapos import manager as slapmanager
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
55 56
from slapos.slap.slap import NotFoundError
from slapos.slap.slap import ServerError
57
from slapos.slap.slap import COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
58
from slapos.util import mkdir_p, chownDirectory, string_to_boolean
Marco Mariani's avatar
Marco Mariani committed
59
from slapos.grid.exception import BuildoutFailedError
60
from slapos.grid.SlapObject import Software, Partition
61 62 63 64
from slapos.grid.svcbackend import (launchSupervisord,
                                    createSupervisordConfiguration,
                                    _getSupervisordConfigurationDirectory,
                                    _getSupervisordSocketPath)
65 66 67
from slapos.grid.utils import (md5digest, dropPrivileges, SlapPopen, updateFile)
from slapos.grid.promise import PromiseLauncher, PromiseError
from slapos.grid.promise.generic import PROMISE_LOG_FOLDER_NAME
68
from slapos.human import human2bytes
Marco Mariani's avatar
Marco Mariani committed
69
import slapos.slap
70
from netaddr import valid_ipv4, valid_ipv6
Łukasz Nowak's avatar
Łukasz Nowak committed
71 72


Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
73
# XXX: should be moved to SLAP library
74
COMPUTER_PARTITION_DESTROYED_STATE = 'destroyed'
75 76
COMPUTER_PARTITION_STARTED_STATE = 'started'
COMPUTER_PARTITION_STOPPED_STATE = 'stopped'
Łukasz Nowak's avatar
Łukasz Nowak committed
77

78 79 80 81
# Global variables about return state of slapgrid
SLAPGRID_SUCCESS = 0
SLAPGRID_FAIL = 1
SLAPGRID_PROMISE_FAIL = 2
82
PROMISE_TIMEOUT = 3
83

84
COMPUTER_PARTITION_TIMESTAMP_FILENAME = '.timestamp'
85
COMPUTER_PARTITION_LATEST_BANG_TIMESTAMP_FILENAME = '.slapos_latest_bang_timestamp'
86
COMPUTER_PARTITION_INSTALL_ERROR_FILENAME = '.slapgrid-%s-error.log'
87
COMPUTER_PARTITION_WAIT_LIST_FILENAME = '.slapos-report-wait-service-list'
88

89 90 91
# XXX hardcoded watchdog_path
WATCHDOG_PATH = '/opt/slapos/bin/slapos-watchdog'

92 93 94 95 96

class _formatXMLError(Exception):
  pass


97 98 99 100 101 102 103 104 105 106 107 108
class FPopen(subprocess.Popen):
  def __init__(self, *args, **kwargs):
    kwargs['stdin'] = subprocess.PIPE
    kwargs['stderr'] = subprocess.STDOUT
    kwargs.setdefault('stdout', subprocess.PIPE)
    kwargs.setdefault('close_fds', True)
    kwargs.setdefault('shell', True)
    subprocess.Popen.__init__(self, *args, **kwargs)
    self.stdin.flush()
    self.stdin.close()
    self.stdin = None

109 110 111
def check_missing_parameters(options):
  required = set([
      'computer_id',
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
112
      # XXX: instance_root is better named "partition_root"
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
      'instance_root',
      'master_url',
      'software_root',
  ])

  if 'key_file' in options:
    required.add('certificate_repository_path')
    required.add('cert_file')
  if 'cert_file' in options:
    required.add('certificate_repository_path')
    required.add('key_file')

  missing = required.difference(options)

  if missing:
    raise RuntimeError('Missing mandatory parameters: %s' % ', '.join(sorted(missing)))

130 131 132 133 134 135 136 137
  # parameter can NOT be empty string or None
  for option in required:
    if not options.get(option):
      missing.add(option)

  if missing:
    raise RuntimeError('Mandatory parameters present but empty: %s' % ', '.join(sorted(missing)))

138

139 140
def check_missing_files(options):
  req_files = [
Marco Mariani's avatar
Marco Mariani committed
141 142 143
      options.get('key_file'),
      options.get('cert_file'),
      options.get('master_ca_file'),
144
      options.get('shacache-ca-file'),
Marco Mariani's avatar
Marco Mariani committed
145 146
      options.get('shacache-cert-file'),
      options.get('shacache-key-file'),
147
      options.get('shadir-ca-file'),
Marco Mariani's avatar
Marco Mariani committed
148 149
      options.get('shadir-cert-file'),
      options.get('shadir-key-file'),
150 151
      options.get('signature-private-key-file',
        options.get('signature_private_key_file')),
Marco Mariani's avatar
Marco Mariani committed
152
  ]
153 154

  req_dirs = [
Marco Mariani's avatar
Marco Mariani committed
155 156
      options.get('certificate_repository_path')
  ]
157 158 159

  for f in req_files:
    if f and not os.path.exists(f):
Marco Mariani's avatar
Marco Mariani committed
160
      raise RuntimeError('File %r does not exist.' % f)
161 162 163 164 165 166

  for d in req_dirs:
    if d and not os.path.isdir(d):
      raise RuntimeError('Directory %r does not exist' % d)


167 168
def merged_options(args, configp):
  options = dict(configp.items('slapos'))
169

170 171
  if configp.has_section('networkcache'):
    options.update(dict(configp.items('networkcache')))
Bryton Lacquement's avatar
Bryton Lacquement committed
172
  for key, value in six.iteritems(vars(args)):
173 174 175
    if value is not None:
      options[key] = value

Marco Mariani's avatar
Marco Mariani committed
176 177 178 179 180
  if options.get('all'):
    options['develop'] = True

  # Parse cache / binary cache options
  # Backward compatibility about "binary-cache-url-blacklist" deprecated option
Marco Mariani's avatar
Marco Mariani committed
181 182
  if (options.get("binary-cache-url-blacklist") and not
        options.get("download-from-binary-cache-url-blacklist")):
Marco Mariani's avatar
Marco Mariani committed
183 184 185 186 187 188 189 190 191
    options["download-from-binary-cache-url-blacklist"] = \
        options["binary-cache-url-blacklist"]
  options["download-from-binary-cache-url-blacklist"] = [
      url.strip() for url in options.get(
          "download-from-binary-cache-url-blacklist", "").split('\n') if url]
  options["upload-to-binary-cache-url-blacklist"] = [
      url.strip() for url in options.get(
          "upload-to-binary-cache-url-blacklist", "").split('\n') if url]

192 193 194 195 196 197
  options['firewall'] = {}
  if configp.has_section('firewall'):
    options['firewall'] = dict(configp.items('firewall'))
    options['firewall']["authorized_sources"] = [
        source.strip() for source in options['firewall'].get(
            "authorized_sources", "").split('\n') if source]
198 199 200
    options['firewall']['firewall_cmd'] = options['firewall'].get(
            "firewall_cmd", "firewall-cmd")
    options['firewall']['firewall_executable'] = options['firewall'].get(
201
            "firewall_executable", "")
202 203
    options['firewall']['dbus_executable'] = options['firewall'].get(
            "dbus_executable", "")
204 205
    options['firewall']['reload_config_cmd'] = options['firewall'].get(
            "reload_config_cmd",
206
            "slapos node restart firewall")
207

208 209 210
  return options


211
def random_delay(options, logger):
212 213 214 215
  """
  Sleep for a random time to avoid SlapOS Master being DDOSed by an army of
  SlapOS Nodes configured with cron.
  """
216 217
  if options['now']:
    # XXX-Cedric: deprecate '--now'
218 219
    return

220
  maximal_delay = int(options.get('maximal_delay', '0'))
221 222
  if maximal_delay:
    duration = random.randint(1, maximal_delay)
Marco Mariani's avatar
Marco Mariani committed
223 224
    logger.info('Sleeping for %s seconds. To disable this feature, '
                'check --now parameter in slapgrid help.', duration)
225 226 227
    time.sleep(duration)


228
def create_slapgrid_object(options, logger):
229 230 231 232
  signature_certificate_list = None
  if 'signature-certificate-list' in options:
    cert_marker = '-----BEGIN CERTIFICATE-----'
    signature_certificate_list = [
Marco Mariani's avatar
Marco Mariani committed
233 234 235 236
        cert_marker + '\n' + q.strip()
        for q in options['signature-certificate-list'].split(cert_marker)
        if q.strip()
    ]
237 238

  op = options
239 240
  software_min_free_space = human2bytes(op.get('software_min_free_space', '1000M'))
  instance_min_free_space = human2bytes(op.get('instance_min_free_space', '1000M'))
241

242 243
  return Slapgrid(software_root=op['software_root'],
                  instance_root=op['instance_root'],
244
                  shared_part_list=op.get('shared_part_list', ''),
245 246
                  master_url=op['master_url'],
                  computer_id=op['computer_id'],
247
                  buildout=op.get('buildout'),
248
                  buildout_debug=op.get('buildout_debug'),
249
                  logger=logger,
250
                  maximum_periodicity = op.get('maximum_periodicity', 86400),
251 252
                  key_file=op.get('key_file'),
                  cert_file=op.get('cert_file'),
253 254
                  signature_private_key_file=op.get(
                    'signature-private-key-file', op.get('signature_private_key_file')),
255 256 257
                  signature_certificate_list=signature_certificate_list,
                  download_binary_cache_url=op.get('download-binary-cache-url'),
                  upload_binary_cache_url=op.get('upload-binary-cache-url'),
Marco Mariani's avatar
Marco Mariani committed
258
                  download_from_binary_cache_url_blacklist=
259
                      op.get('download-from-binary-cache-url-blacklist', []),
Marco Mariani's avatar
Marco Mariani committed
260
                  upload_to_binary_cache_url_blacklist=
261 262 263 264 265
                      op.get('upload-to-binary-cache-url-blacklist', []),
                  upload_cache_url=op.get('upload-cache-url'),
                  download_binary_dir_url=op.get('download-binary-dir-url'),
                  upload_binary_dir_url=op.get('upload-binary-dir-url'),
                  upload_dir_url=op.get('upload-dir-url'),
266 267
                  master_ca_file=op.get('master_ca_file'),
                  certificate_repository_path=op.get('certificate_repository_path'),
268
                  promise_timeout=op.get('promise_timeout', PROMISE_TIMEOUT),
269
                  shacache_ca_file=op.get('shacache-ca-file'),
270 271
                  shacache_cert_file=op.get('shacache-cert-file'),
                  shacache_key_file=op.get('shacache-key-file'),
272
                  shadir_ca_file=op.get('shadir-ca-file'),
273 274
                  shadir_cert_file=op.get('shadir-cert-file'),
                  shadir_key_file=op.get('shadir-key-file'),
275
                  forbid_supervisord_automatic_launch=string_to_boolean(op.get('forbid_supervisord_automatic_launch', 'false')),
276 277 278 279
                  develop=op.get('develop', False),
                  # Try to fetch from deprecated argument
                  software_release_filter_list=op.get('only-sr', op.get('only_sr')),
                  # Try to fetch from deprecated argument
280 281
                  computer_partition_filter_list=op.get('only-cp', op.get('only_cp')),
                  software_min_free_space=software_min_free_space,
282
                  instance_min_free_space=instance_min_free_space,
283
                  instance_storage_home=op.get('instance_storage_home'),
284
                  ipv4_global_network=op.get('ipv4_global_network'),
285 286
                  firewall_conf=op.get('firewall'),
                  config=options)
Łukasz Nowak's avatar
Łukasz Nowak committed
287 288


289
def check_required_only_partitions(existing, required):
290 291 292
  """
  Verify the existence of partitions specified by the --only parameter
  """
293 294
  missing = set(required) - set(existing)
  if missing:
Marco Mariani's avatar
Marco Mariani committed
295 296
    plural = ['s', ''][len(missing) == 1]
    raise ValueError('Unknown partition%s: %s' % (plural, ', '.join(sorted(missing))))
297 298


Łukasz Nowak's avatar
Łukasz Nowak committed
299 300 301 302
class Slapgrid(object):
  """ Main class for SlapGrid. Fetches and processes informations from master
  server and pushes usage information to master server.
  """
Antoine Catton's avatar
Antoine Catton committed
303

Łukasz Nowak's avatar
Łukasz Nowak committed
304 305 306 307 308
  def __init__(self,
               software_root,
               instance_root,
               master_url,
               computer_id,
309
               buildout,
310
               logger,
311
               maximum_periodicity=86400,
Łukasz Nowak's avatar
Łukasz Nowak committed
312 313
               key_file=None,
               cert_file=None,
314
               signature_private_key_file=None,
Yingjie Xu's avatar
Yingjie Xu committed
315 316 317
               signature_certificate_list=None,
               download_binary_cache_url=None,
               upload_binary_cache_url=None,
318 319
               download_from_binary_cache_url_blacklist=None,
               upload_to_binary_cache_url_blacklist=None,
320
               upload_cache_url=None,
Yingjie Xu's avatar
Yingjie Xu committed
321 322
               download_binary_dir_url=None,
               upload_binary_dir_url=None,
323
               upload_dir_url=None,
Łukasz Nowak's avatar
Łukasz Nowak committed
324 325
               master_ca_file=None,
               certificate_repository_path=None,
326
               promise_timeout=3,
327
               shacache_ca_file=None,
328 329
               shacache_cert_file=None,
               shacache_key_file=None,
330
               shadir_ca_file=None,
331
               shadir_cert_file=None,
332
               shadir_key_file=None,
333
               forbid_supervisord_automatic_launch=False,
334
               develop=False,
335
               software_release_filter_list=None,
336
               computer_partition_filter_list=None,
337 338
               software_min_free_space=None,
               instance_min_free_space=None,
339
               instance_storage_home=None,
340
               ipv4_global_network=None,
341
               firewall_conf={},
342
               config=None,
343
               buildout_debug=False,
344
               shared_part_list=''
345
               ):
Łukasz Nowak's avatar
Łukasz Nowak committed
346 347 348 349 350 351
    """Makes easy initialisation of class parameters"""
    # Parses arguments
    self.software_root = os.path.abspath(software_root)
    self.instance_root = os.path.abspath(instance_root)
    self.master_url = master_url
    self.computer_id = computer_id
352
    self.supervisord_socket = _getSupervisordSocketPath(instance_root)
Łukasz Nowak's avatar
Łukasz Nowak committed
353 354 355 356
    self.key_file = key_file
    self.cert_file = cert_file
    self.master_ca_file = master_ca_file
    self.certificate_repository_path = certificate_repository_path
357
    self.signature_private_key_file = signature_private_key_file
Yingjie Xu's avatar
Yingjie Xu committed
358 359 360
    self.signature_certificate_list = signature_certificate_list
    self.download_binary_cache_url = download_binary_cache_url
    self.upload_binary_cache_url = upload_binary_cache_url
361 362 363 364
    self.download_from_binary_cache_url_blacklist = \
        download_from_binary_cache_url_blacklist
    self.upload_to_binary_cache_url_blacklist = \
        upload_to_binary_cache_url_blacklist
365
    self.upload_cache_url = upload_cache_url
Yingjie Xu's avatar
Yingjie Xu committed
366 367
    self.download_binary_dir_url = download_binary_dir_url
    self.upload_binary_dir_url = upload_binary_dir_url
368
    self.upload_dir_url = upload_dir_url
369
    self.shacache_ca_file = shacache_ca_file
370 371
    self.shacache_cert_file = shacache_cert_file
    self.shacache_key_file = shacache_key_file
372
    self.shadir_ca_file = shadir_ca_file
373 374
    self.shadir_cert_file = shadir_cert_file
    self.shadir_key_file = shadir_key_file
375
    self.forbid_supervisord_automatic_launch = forbid_supervisord_automatic_launch
376
    self.logger = logger
Łukasz Nowak's avatar
Łukasz Nowak committed
377
    # Creates objects from slap module
Marco Mariani's avatar
Marco Mariani committed
378
    self.slap = slapos.slap.slap()
Łukasz Nowak's avatar
Łukasz Nowak committed
379 380 381 382
    self.slap.initializeConnection(self.master_url, key_file=self.key_file,
        cert_file=self.cert_file, master_ca_file=self.master_ca_file)
    self.computer = self.slap.registerComputer(self.computer_id)
    # Defines all needed paths
383
    self.buildout = buildout
384
    self.buildout_debug = buildout_debug
385
    self.promise_timeout = promise_timeout
386
    self.develop = develop
387
    if software_release_filter_list is not None:
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
388 389
      self.software_release_filter_list = \
          software_release_filter_list.split(",")
390
    else:
391
      self.software_release_filter_list = []
392 393
    self.computer_partition_filter_list = []
    if computer_partition_filter_list is not None:
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
394 395
      self.computer_partition_filter_list = \
          computer_partition_filter_list.split(",")
396
    self.maximum_periodicity = maximum_periodicity
397 398
    self.software_min_free_space = software_min_free_space
    self.instance_min_free_space = instance_min_free_space
399 400 401 402
    if instance_storage_home:
      self.instance_storage_home = os.path.abspath(instance_storage_home)
    else:
      self.instance_storage_home = ""
403 404 405 406
    if ipv4_global_network:
      self.ipv4_global_network = ipv4_global_network
    else:
      self.ipv4_global_network= ""
407
    self.firewall_conf = firewall_conf
408 409
    self.config = config
    self._manager_list = slapmanager.from_config(config)
410
    self.shared_part_list = shared_part_list
Cédric Le Ninivin's avatar
Cédric Le Ninivin committed
411

412
  def _getWatchdogLine(self):
413
    invocation_list = [WATCHDOG_PATH]
Cédric Le Ninivin's avatar
Cédric Le Ninivin committed
414
    invocation_list.append("--master-url '%s' " % self.master_url)
415
    if self.certificate_repository_path:
Marco Mariani's avatar
Marco Mariani committed
416 417
      invocation_list.append("--certificate-repository-path '%s'" %
                                self.certificate_repository_path)
Cédric Le Ninivin's avatar
Cédric Le Ninivin committed
418
    invocation_list.append("--computer-id '%s'" % self.computer_id)
419
    invocation_list.append("--instance-root '%s'" % self.instance_root)
Cédric Le Ninivin's avatar
Cédric Le Ninivin committed
420
    return ' '.join(invocation_list)
Łukasz Nowak's avatar
Łukasz Nowak committed
421

422 423 424 425 426 427 428 429
  def _generateFirewallSupervisorConf(self):
    """If firewall section is defined in slapos configuration, generate
      supervisor configuration entry for firewall process.
    """
    supervisord_conf_folder_path = os.path.join(self.instance_root,
                                               'etc', 'supervisord.conf.d')
    supervisord_firewall_conf = os.path.join(supervisord_conf_folder_path,
                                              'firewall.conf')
430 431
    if not self.firewall_conf or not self.firewall_conf.get('firewall_executable') \
      or self.firewall_conf.get('testing', False):
432 433 434 435 436 437 438 439
      if os.path.exists(supervisord_firewall_conf):
        os.unlink(supervisord_firewall_conf)
      return
    supervisord_firewall_program_conf = """\
[program:firewall]
directory=/opt/slapos
command=%(firewall_executable)s
process_name=firewall
440
priority=5
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
autostart=true
autorestart=true
startsecs=0
startretries=0
exitcodes=0
stopsignal=TERM
stopwaitsecs=60
user=0
group=0
serverurl=AUTO
redirect_stderr=true
stdout_logfile=%(log_file)s
stdout_logfile_maxbytes=100KB
stdout_logfile_backups=1
stderr_logfile=%(log_file)s
stderr_logfile_maxbytes=100KB
stderr_logfile_backups=1
""" %  {'firewall_executable': self.firewall_conf['firewall_executable'],
        'log_file': self.firewall_conf.get('log_file', '/var/log/firewall.log')}

    if not os.path.exists(supervisord_conf_folder_path):
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
      os.makedirs(supervisord_conf_folder_path)
    updateFile(supervisord_firewall_conf, supervisord_firewall_program_conf)


  def _generateDbusSupervisorConf(self):
    """If dbus command is defined in slapos configuration, generate
      supervisor configuration entry for dbus daemon.
    """
    supervisord_conf_folder_path = os.path.join(self.instance_root,
                                               'etc', 'supervisord.conf.d')
    supervisord_dbus_conf = os.path.join(supervisord_conf_folder_path,
                                              'dbus.conf')
    if not self.firewall_conf or not self.firewall_conf.get('dbus_executable') \
      or self.firewall_conf.get('testing', False):
      if os.path.exists(supervisord_dbus_conf):
        os.unlink(supervisord_dbus_conf)
      return
    supervisord_dbus_program_conf = """\
[program:dbus]
directory=/opt/slapos
command=%(dbus_executable)s
process_name=dbus
priority=1
autostart=true
autorestart=true
startsecs=0
startretries=0
exitcodes=0
stopsignal=TERM
stopwaitsecs=60
user=0
group=0
serverurl=AUTO
redirect_stderr=true
stdout_logfile=%(dbus_log_file)s
stdout_logfile_maxbytes=100KB
stdout_logfile_backups=1
stderr_logfile=%(dbus_log_file)s
stderr_logfile_maxbytes=100KB
stderr_logfile_backups=1
""" %  {'dbus_executable': self.firewall_conf['dbus_executable'],
        'dbus_log_file': self.firewall_conf.get('dbus_log_file', '/var/log/dbus.log')}

    if not os.path.exists(supervisord_conf_folder_path):
      os.makedirs(supervisord_conf_folder_path)
    updateFile(supervisord_dbus_conf, supervisord_dbus_program_conf)
508

Łukasz Nowak's avatar
Łukasz Nowak committed
509 510 511 512 513 514
  def checkEnvironmentAndCreateStructure(self):
    """Checks for software_root and instance_root existence, then creates
       needed files and directories.
    """
    # Checks for software_root and instance_root existence
    if not os.path.isdir(self.software_root):
515
      raise OSError('%s does not exist.' % self.software_root)
516 517

    createSupervisordConfiguration(self.instance_root, self._getWatchdogLine())
518
    self._generateFirewallSupervisorConf()
519
    self._generateDbusSupervisorConf()
520 521

  def _launchSupervisord(self):
522 523
    if not self.forbid_supervisord_automatic_launch:
      launchSupervisord(instance_root=self.instance_root, logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
524 525 526

  def getComputerPartitionList(self):
    try:
527
      return self.computer.getComputerPartitionList()
Marco Mariani's avatar
Marco Mariani committed
528 529
    except socket.error as exc:
      self.logger.fatal(exc)
530
      raise
Łukasz Nowak's avatar
Łukasz Nowak committed
531 532 533 534 535

  def processSoftwareReleaseList(self):
    """Will process each Software Release.
    """
    self.checkEnvironmentAndCreateStructure()
536
    self.logger.info('Processing software releases...')
537
    # Boolean to know if every instance has correctly been deployed
Łukasz Nowak's avatar
Łukasz Nowak committed
538 539
    clean_run = True
    for software_release in self.computer.getSoftwareReleaseList():
Łukasz Nowak's avatar
Łukasz Nowak committed
540
      state = software_release.getState()
Łukasz Nowak's avatar
Łukasz Nowak committed
541 542
      try:
        software_release_uri = software_release.getURI()
Marco Mariani's avatar
Marco Mariani committed
543
        url_hash = md5digest(software_release_uri)
544
        software_path = os.path.join(self.software_root, url_hash)
Łukasz Nowak's avatar
Łukasz Nowak committed
545 546
        software = Software(url=software_release_uri,
            software_root=self.software_root,
547
            buildout=self.buildout,
548
            buildout_debug=self.buildout_debug,
549
            logger=self.logger,
550
            signature_private_key_file=self.signature_private_key_file,
Yingjie Xu's avatar
Yingjie Xu committed
551 552 553
            signature_certificate_list=self.signature_certificate_list,
            download_binary_cache_url=self.download_binary_cache_url,
            upload_binary_cache_url=self.upload_binary_cache_url,
Marco Mariani's avatar
Marco Mariani committed
554
            download_from_binary_cache_url_blacklist=
555
                self.download_from_binary_cache_url_blacklist,
Marco Mariani's avatar
Marco Mariani committed
556
            upload_to_binary_cache_url_blacklist=
557
                self.upload_to_binary_cache_url_blacklist,
558
            upload_cache_url=self.upload_cache_url,
Yingjie Xu's avatar
Yingjie Xu committed
559 560
            download_binary_dir_url=self.download_binary_dir_url,
            upload_binary_dir_url=self.upload_binary_dir_url,
561
            upload_dir_url=self.upload_dir_url,
562
            shacache_ca_file=self.shacache_ca_file,
563 564
            shacache_cert_file=self.shacache_cert_file,
            shacache_key_file=self.shacache_key_file,
565
            shadir_ca_file=self.shadir_ca_file,
566
            shadir_cert_file=self.shadir_cert_file,
567
            shadir_key_file=self.shadir_key_file,
568 569
            software_min_free_space=self.software_min_free_space,
            shared_part_list=self.shared_part_list)
570 571 572 573 574

        # call manager for every software release
        for manager in self._manager_list:
          manager.software(software)

Łukasz Nowak's avatar
Łukasz Nowak committed
575
        if state == 'available':
576
          completed_tag = os.path.join(software_path, '.completed')
Marco Mariani's avatar
Marco Mariani committed
577 578 579 580
          if (self.develop or (not os.path.exists(completed_tag) and
                 len(self.software_release_filter_list) == 0) or
                 url_hash in self.software_release_filter_list or
                 url_hash in (md5digest(uri) for uri in self.software_release_filter_list)):
581 582 583 584 585
            try:
              software_release.building()
            except NotFoundError:
              pass
            software.install()
Marco Mariani's avatar
Marco Mariani committed
586 587
            with open(completed_tag, 'w') as fout:
              fout.write(time.asctime())
Łukasz Nowak's avatar
Łukasz Nowak committed
588
        elif state == 'destroyed':
589
          if os.path.exists(software_path):
590
            self.logger.info('Destroying %r...' % software_release_uri)
591
            software.destroy()
592
            self.logger.info('Destroyed %r.' % software_release_uri)
593 594 595 596

        # call manager for every software release
        for manager in self._manager_list:
          manager.softwareTearDown(software)
597
      # Send log before exiting
Łukasz Nowak's avatar
Łukasz Nowak committed
598
      except (SystemExit, KeyboardInterrupt):
599
        software_release.error(traceback.format_exc(), logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
600
        raise
601 602

      # Buildout failed: send log but don't print it to output (already done)
Marco Mariani's avatar
Marco Mariani committed
603
      except BuildoutFailedError as exc:
604 605
        clean_run = False
        try:
606
          software_release.error(exc, logger=self.logger)
607 608 609
        except (SystemExit, KeyboardInterrupt):
          raise
        except Exception:
610
          self.logger.exception('Problem while reporting error, continuing:')
611 612

      # For everything else: log it, send it, continue.
Łukasz Nowak's avatar
Łukasz Nowak committed
613
      except Exception:
614 615
        self.logger.exception('')
        software_release.error(traceback.format_exc(), logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
616 617
        clean_run = False
      else:
Łukasz Nowak's avatar
Łukasz Nowak committed
618
        if state == 'available':
619 620
          try:
            software_release.available()
621
          except (NotFoundError, ServerError):
622
            pass
Łukasz Nowak's avatar
Łukasz Nowak committed
623
        elif state == 'destroyed':
624 625
          try:
            software_release.destroyed()
626
          except (NotFoundError, ServerError):
627
            self.logger.exception('')
628
    self.logger.info('Finished software releases.')
629 630 631 632 633

    # Return success value
    if not clean_run:
      return SLAPGRID_FAIL
    return SLAPGRID_SUCCESS
Łukasz Nowak's avatar
Łukasz Nowak committed
634

635 636 637
  def _checkPromiseList(self, partition, force=True, check_anomaly=False):
    instance_path = os.path.join(self.instance_root, partition.partition_id)
    promise_log_path = os.path.join(instance_path, PROMISE_LOG_FOLDER_NAME)
Antoine Catton's avatar
Antoine Catton committed
638

639
    self.logger.info("Checking %s promises..." % partition.partition_id)
Antoine Catton's avatar
Antoine Catton committed
640 641 642 643 644 645
    uid, gid = None, None
    stat_info = os.stat(instance_path)

    #stat sys call to get statistics informations
    uid = stat_info.st_uid
    gid = stat_info.st_gid
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
    promise_dir = os.path.join(instance_path, 'etc', 'plugin')
    legacy_promise_dir = os.path.join(instance_path, 'etc', 'promise')
    promise_config = {
      'promise-folder': promise_dir,
      'legacy-promise-folder': legacy_promise_dir,
      'promise-timeout': self.promise_timeout,
      'uid': uid,
      'gid': gid,
      'partition-folder': instance_path,
      'log-folder': promise_log_path,
      'force': force,
      'check-anomaly': check_anomaly,
      'master-url': partition.server_url,
      'partition-cert': partition.cert_file,
      'partition-key': partition.key_file,
      'partition-id': partition.partition_id,
      'computer-id': self.computer_id,
    }

    promise_checker = PromiseLauncher(config=promise_config, logger=self.logger)
    return promise_checker.run()
Antoine Catton's avatar
Antoine Catton committed
667

668 669 670 671 672 673 674 675 676
  def _endInstallationTransaction(self, computer_partition):
    partition_id = computer_partition.getId()
    transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % partition_id
    transaction_file_path = os.path.join(self.instance_root,
                                      partition_id,
                                      transaction_file_name)

    if os.path.exists(transaction_file_path):
      with open(transaction_file_path, 'r') as tf:
677 678 679 680
        try:
          computer_partition.setComputerPartitionRelatedInstanceList(
            [reference for reference in tf.read().split('\n') if reference]
          )
Bryton Lacquement's avatar
Bryton Lacquement committed
681
        except NotFoundError as e:
682 683 684 685
          # Master doesn't implement this feature ?
          self.logger.warning("NotFoundError: %s. \nCannot send requested instance "\
                            "list to master. Please check if this feature is"\
                            "implemented on SlapOS Master." % str(e))
686

687 688 689 690
  def _addFirewallRule(self, rule_command):
    """
    """
    query_cmd = rule_command.replace('--add-rule', '--query-rule')
Bryton Lacquement's avatar
Bryton Lacquement committed
691
    process = FPopen(query_cmd, universal_newlines=True)
692
    result, stderr = process.communicate()
693
    if result.strip() == 'no':
694
      # rule doesn't exist add to firewall
695
      self.logger.debug(rule_command)
Bryton Lacquement's avatar
Bryton Lacquement committed
696
      process = FPopen(rule_command, universal_newlines=True)
697 698 699 700 701 702 703 704 705 706
      rule_result, stderr = process.communicate()
      if process.returncode == 0:
        if rule_result.strip() != 'success':
          raise Exception(rule_result)
      else:
        raise Exception("Failed to add firewalld rule %s\n%s.\n%s" % (
                        rule_command, rule_result, stderr))
    elif result.strip() != 'no' and process.returncode != 0:
      raise Exception("Failed to run firewalld rule %s\n%s.\n%s" % (
                      query_cmd, result, stderr))
707 708 709 710 711 712 713

    return result.strip() == 'no'

  def _removeFirewallRule(self, rule_command):
    """
    """
    query_cmd = rule_command.replace('--add-rule', '--query-rule')
Bryton Lacquement's avatar
Bryton Lacquement committed
714
    process = FPopen(query_cmd, universal_newlines=True)
715
    result, stderr = process.communicate()
716
    if result.strip() == 'yes':
717
      # The rule really exist, remove it
718 719
      remove_command = rule_command.replace('--add-rule', '--remove-rule')
      self.logger.debug(remove_command)
Bryton Lacquement's avatar
Bryton Lacquement committed
720
      process = FPopen(remove_command, universal_newlines=True)
721 722 723 724 725 726 727 728 729 730
      rule_result, stderr = process.communicate()
      if process.returncode == 0:
        if rule_result.strip() != 'success':
          raise Exception(rule_result)
      else:
        raise Exception("Failed to add firewalld rule %s\n%s.\n%s" % (
                        rule_command, rule_result, stderr))
    elif result.strip() != 'no' and process.returncode != 0:
      raise Exception("Failed to run firewalld rule %s\n%s.\n%s" % (
                      query_cmd, result, stderr))
731 732 733

    return result.strip() == 'yes'

734
  def _checkAddFirewallRules(self, partition_id, command_list, add=True):
735
    """
736
    Process Firewall rules from and save rules to firewall_rules_path
737
    """
738
    
739
    instance_path = os.path.join(self.instance_root, partition_id)
740 741
    firewall_rules_path = os.path.join(instance_path,
                                Partition.partition_firewall_rules_name)
742
    reload_rules = False
743 744
    fw_base_cmd = self.firewall_conf['firewall_cmd']
    json_list = []
745

746 747
    if os.path.exists(firewall_rules_path):
      with open(firewall_rules_path, 'r') as frules:
748 749 750
        rules_list = json.loads(frules.read())

      for command in rules_list:
751
        skip_remove = False
752
        if add:
753
          for new_cmd in command_list:
754
            if command == new_cmd:
755
              skip_remove = True
756
              break
757 758 759 760

        if not skip_remove:
          state = self._removeFirewallRule('%s %s' % (fw_base_cmd, command))
          reload_rules = reload_rules or state
761 762

    if add:
763 764 765 766
      json_list = command_list
      for command in command_list:
        state = self._addFirewallRule('%s %s' % (fw_base_cmd, command))
        reload_rules = reload_rules or state
767

768 769 770 771 772
    if reload_rules:
      # Apply changes: reload configuration
      # XXX - need to check firewalld reload instead of restart
      self.logger.info("Reloading firewall configuration...")
      reload_cmd = self.firewall_conf['reload_config_cmd']
Bryton Lacquement's avatar
Bryton Lacquement committed
773
      reload_process = FPopen(reload_cmd, universal_newlines=True)
774 775 776 777
      stdout, stderr = reload_process.communicate()
      if reload_process.returncode != 0:
        raise Exception("Failed to load firewalld rules with command %s.\n%" % (
                        stderr, reload_cmd))
778

779
      with open(firewall_rules_path, 'w') as frules:
780
        frules.write(json.dumps(json_list))
781

782
  def _getFirewallAcceptRules(self, ip, hosting_ip_list, source_ip_list, ip_type='ipv4'):
783
    """
784
    Generate rules for firewall based on list of IP that should have access to `ip`
785
    """
786 787 788
    if ip_type not in ['ipv4', 'ipv6', 'eb']:
      raise NotImplementedError("firewall-cmd has not rules with tables %s." % ip_type)

789
    command = '--permanent --direct --add-rule %s filter' % ip_type
790 791

    cmd_list = []
792
    ip_list = hosting_ip_list + source_ip_list
793 794

    for other_ip in ip_list:
795
      # Configure INPUT rules
796 797
      cmd_list.append('%s INPUT 0 -s %s -d %s -j ACCEPT' % (command,
                                                            other_ip, ip))
798
      # Configure FORWARD rules
799 800 801
      cmd_list.append('%s FORWARD 0 -s %s -d %s -j ACCEPT' % (command,
                                                              other_ip, ip))

802 803 804 805
    # Reject all other requests
    cmd_list.append('%s INPUT 1000 -d %s -j REJECT' % (command, ip))
    cmd_list.append('%s FORWARD 1000 -d %s -j REJECT' % (command, ip))
    cmd_list.append('%s INPUT 900 -d %s -m state --state ESTABLISHED,RELATED -j REJECT' % (
806
                    command, ip))
807
    cmd_list.append('%s FORWARD 900 -d %s -m state --state ESTABLISHED,RELATED -j REJECT' % (
808 809
                    command, ip))

810
    return cmd_list
811

812 813 814 815 816 817 818
  def _getFirewallRejectRules(self, ip, hosting_ip_list, source_ip_list, ip_type='ipv4'):
    """
    Generate rules for firewall based on list of IP that should not have access to `ip`
    """
    if ip_type not in ['ipv4', 'ipv6', 'eb']:
      raise NotImplementedError("firewall-cmd has not rules with tables %s." % ip_type)

819
    command = '--permanent --direct --add-rule %s filter' % ip_type
820 821 822 823

    cmd_list = []

    # Accept all other requests
824 825
    #cmd_list.append('%s INPUT 1000 -d %s -j ACCEPT' % (command, ip))
    #cmd_list.append('%s FORWARD 1000 -d %s -j ACCEPT' % (command, ip))
826 827 828 829 830 831 832 833 834 835 836 837 838

    # Reject all other requests from the list
    for other_ip in source_ip_list:
      cmd_list.append('%s INPUT 800 -s %s -d %s -m state --state ESTABLISHED,RELATED -j REJECT' % (
                    command, other_ip, ip))
      cmd_list.append('%s FORWARD 800 -s %s -d %s -m state --state ESTABLISHED,RELATED -j REJECT' % (
                    command, other_ip, ip))
      cmd_list.append('%s INPUT 900 -s %s -d %s -j REJECT' % (command,
                                                            other_ip, ip))
      cmd_list.append('%s FORWARD 900 -s %s -d %s -j REJECT' % (command,
                                                              other_ip, ip))
    # Accept on this hosting subscription
    for other_ip in hosting_ip_list:
839
      cmd_list.append('%s INPUT 0 -s %s -d %s -j ACCEPT' % (command,
840
                                                            other_ip, ip))
841
      cmd_list.append('%s FORWARD 0 -s %s -d %s -j ACCEPT' % (command,
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
                                                              other_ip, ip))

    return cmd_list

  def _getValidIpv4FromList(self, ipv4_list, warn=False):
    """
    Return the list containing only valid ipv4 or network address.
    """
    valid_list = []
    for ip in ipv4_list:
      if not ip:
        continue
      the_ip = ip.split('/')[0]
      if valid_ipv4(the_ip):
        valid_list.append(ip)
      elif warn:
        self.logger.warn("IP/Network address %s is not valid. ignored.." % ip)
    return valid_list
860

861
  def _setupComputerPartitionFirewall(self, computer_partition, ip_list, drop_entries=False):
862 863 864 865 866 867
    """
    Using linux iptables, limit access to IP of this partition to all 
    others partitions of the same Hosting Subscription
    """
    ipv4_list = []
    ipv6_list = []
868 869 870 871 872 873 874 875 876 877 878 879
    source_ipv4_list = []
    source_ipv6_list = []
    hosting_ipv4_list = []
    hosting_ipv6_list = []
    getFirewallRules = getattr(self, '_getFirewallAcceptRules')

    if not drop_entries:
      self.logger.info("Configuring firewall...")
      add_rules = True
    else:
      add_rules = False
      self.logger.info("Removing firewall configuration...")
880 881 882 883 884

    for net_ip in ip_list:
      iface, ip = (net_ip[0], net_ip[1])
      if not iface.startswith('route_'):
        continue
885 886 887 888
      if valid_ipv4(ip):
        ipv4_list.append(ip)
      elif valid_ipv6(ip):
        ipv6_list.append(ip)
889

890 891
    hosting_ip_list = computer_partition.getFullHostingIpAddressList()
    for iface, ip in hosting_ip_list:
892 893
      if valid_ipv4(ip):
        if not ip in ipv4_list:
894
          hosting_ipv4_list.append(ip)
895 896
      elif valid_ipv6(ip):
        if not ip in ipv6_list:
897
          hosting_ipv6_list.append(ip)
898 899

    filter_dict = getattr(computer_partition, '_filter_dict', None)
900
    extra_list = []
901
    accept_ip_list = []
902
    if filter_dict is not None:
903 904 905 906 907 908 909 910
      if filter_dict.get('fw_restricted_access', 'on') == 'off':
        extra_list = filter_dict.get('fw_rejected_sources', '').split(' ')
        getFirewallRules = getattr(self, '_getFirewallRejectRules')
        accept_ip_list.extend(self.firewall_conf.get('authorized_sources', []))
        accept_ip_list.extend(filter_dict.get('fw_authorized_sources', '').split(' '))
      else:
        extra_list = filter_dict.get('fw_authorized_sources', '').split(' ')
        extra_list.extend(self.firewall_conf.get('authorized_sources', []))
911

912 913
    source_ipv4_list = self._getValidIpv4FromList(extra_list, True)
    hosting_ipv4_list.extend(self._getValidIpv4FromList(accept_ip_list, True))
914

915
    # XXX - ipv6_list and source_ipv6_list ignored for the moment
916
    for ip in ipv4_list:
917 918 919 920
      cmd_list = getFirewallRules(ip, hosting_ipv4_list,
                                  source_ipv4_list, ip_type='ipv4')
      self._checkAddFirewallRules(computer_partition.getId(),
                                  cmd_list, add=add_rules)
Antoine Catton's avatar
Antoine Catton committed
921

922 923 924 925 926 927 928 929 930
  def _checkPromiseAnomaly(self, local_partition, computer_partition):
    partition_access_status = computer_partition.getAccessStatus()
    status_error = False
    if partition_access_status and partition_access_status.startswith("#error"):
      status_error = True
    try:
      self._checkPromiseList(local_partition,
                             check_anomaly=True,
                             force=False)
Bryton Lacquement's avatar
Bryton Lacquement committed
931
    except PromiseError as e:
932 933
      self.logger.error(e)
      if partition_access_status is None or not status_error:
934 935
        computer_partition.error(e, logger=self.logger)
    else:
936
      if partition_access_status is None or status_error:
937 938
        computer_partition.started()

939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
  def processPromise(self, computer_partition):
    """
    Process the promises from a given Computer Partition, depending on its state
    """
    computer_partition_id = computer_partition.getId()

    # Sanity checks before processing
    # Those values should not be None or empty string or any falsy value
    if not computer_partition_id:
      raise ValueError('Computer Partition id is empty.')

    # Check if we defined explicit list of partitions to process.
    # If so, if current partition not in this list, skip.
    if len(self.computer_partition_filter_list) > 0 and \
         (computer_partition_id not in self.computer_partition_filter_list):
      return

    instance_path = os.path.join(self.instance_root, computer_partition_id)
    os.environ['SLAPGRID_INSTANCE_ROOT'] = self.instance_root
    try:
      software_url = computer_partition.getSoftwareRelease().getURI()
    except NotFoundError:
      # Problem with instance: SR URI not set.
      # Try to process it anyway, it may need to be deleted.
      software_url = None

    try:
      software_path = os.path.join(self.software_root, md5digest(software_url))
    except TypeError:
      # Problem with instance: SR URI not set.
      # Try to process it anyway, it may need to be deleted.
      software_path = None

    computer_partition_state = computer_partition.getState()

    local_partition = Partition(
      software_path=software_path,
      instance_path=instance_path,
      supervisord_partition_configuration_path=os.path.join(
        _getSupervisordConfigurationDirectory(self.instance_root),
        computer_partition_id + '.conf'),
      supervisord_socket=self.supervisord_socket,
      computer_partition=computer_partition,
      computer_id=self.computer_id,
      partition_id=computer_partition_id,
      server_url=self.master_url,
      software_release_url=software_url,
      certificate_repository_path=self.certificate_repository_path,
      buildout=self.buildout,
      buildout_debug=self.buildout_debug,
      logger=self.logger,
      retention_delay=getattr(computer_partition, '_filter_dict', {}).get('retention_delay', '0'),
      instance_min_free_space=self.instance_min_free_space,
      instance_storage_home=self.instance_storage_home,
      ipv4_global_network=self.ipv4_global_network,
    )

    self.logger.info('Processing Promises for Computer Partition %s.', computer_partition_id)
    self.logger.info('  Software URL: %s', software_url)
    self.logger.info('  Software path: %s', software_path)
    self.logger.info('  Instance path: %s', instance_path)

    if computer_partition_state == COMPUTER_PARTITION_STARTED_STATE:
      self._checkPromiseList(local_partition)
      #self._checkPromiseAnomaly(local_partition, computer_partition)

1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
  def processComputerPartition(self, computer_partition):
    """
    Process a Computer Partition, depending on its state
    """
    computer_partition_id = computer_partition.getId()

    # Sanity checks before processing
    # Those values should not be None or empty string or any falsy value
    if not computer_partition_id:
      raise ValueError('Computer Partition id is empty.')

    # Check if we defined explicit list of partitions to process.
    # If so, if current partition not in this list, skip.
    if len(self.computer_partition_filter_list) > 0 and \
         (computer_partition_id not in self.computer_partition_filter_list):
      return

1022
    self.logger.debug('Check if %s requires processing...' % computer_partition_id)
1023

1024
    instance_path = os.path.join(self.instance_root, computer_partition_id)
1025 1026 1027 1028 1029 1030 1031 1032
    os.environ['SLAPGRID_INSTANCE_ROOT'] = self.instance_root

    # Check if transaction file of this partition exists, if the file was created,
    # remove it so it will be generate with this new transaction
    transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % computer_partition_id
    transaction_file_path = os.path.join(instance_path, transaction_file_name)
    if os.path.exists(transaction_file_path):
      os.unlink(transaction_file_path)
1033 1034

    # Try to get partition timestamp (last modification date)
1035 1036 1037 1038
    timestamp_path = os.path.join(
        instance_path,
        COMPUTER_PARTITION_TIMESTAMP_FILENAME
    )
1039
    parameter_dict = computer_partition.getInstanceParameterDict()
1040
    timestamp = parameter_dict.get('timestamp')
1041

1042 1043 1044 1045 1046
    error_output_file = os.path.join(
        instance_path,
        COMPUTER_PARTITION_INSTALL_ERROR_FILENAME % computer_partition_id
    )

1047 1048 1049 1050 1051 1052 1053
    try:
      software_url = computer_partition.getSoftwareRelease().getURI()
    except NotFoundError:
      # Problem with instance: SR URI not set.
      # Try to process it anyway, it may need to be deleted.
      software_url = None
    try:
Marco Mariani's avatar
Marco Mariani committed
1054
      software_path = os.path.join(self.software_root, md5digest(software_url))
1055 1056 1057 1058 1059
    except TypeError:
      # Problem with instance: SR URI not set.
      # Try to process it anyway, it may need to be deleted.
      software_path = None

1060
    computer_partition_state = computer_partition.getState()
1061
    periodicity = self.maximum_periodicity
1062
    if software_path:
1063 1064 1065
      periodicity_path = os.path.join(software_path, 'periodicity')
      if os.path.exists(periodicity_path):
        try:
1066 1067
          with open(periodicity_path) as f:
            periodicity = int(f.read())
1068 1069 1070
        except ValueError:
          os.remove(periodicity_path)
          self.logger.exception('')
1071

1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
    local_partition = Partition(
      software_path=software_path,
      instance_path=instance_path,
      supervisord_partition_configuration_path=os.path.join(
        _getSupervisordConfigurationDirectory(self.instance_root), '%s.conf' %
        computer_partition_id),
      supervisord_socket=self.supervisord_socket,
      computer_partition=computer_partition,
      computer_id=self.computer_id,
      partition_id=computer_partition_id,
      server_url=self.master_url,
      software_release_url=software_url,
      certificate_repository_path=self.certificate_repository_path,
      buildout=self.buildout,
1086
      buildout_debug=self.buildout_debug,
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
      logger=self.logger,
      retention_delay=getattr(computer_partition, '_filter_dict', {}).get('retention_delay', '0'),
      instance_min_free_space=self.instance_min_free_space,
      instance_storage_home=self.instance_storage_home,
      ipv4_global_network=self.ipv4_global_network,
    )

    # let managers modify current partition
    for manager in self._manager_list:
      manager.instance(local_partition)

1098 1099 1100
    # Check if timestamp from server is more recent than local one.
    # If not: it's not worth processing this partition (nothing has
    # changed).
Marco Mariani's avatar
Marco Mariani committed
1101
    if (computer_partition_id not in self.computer_partition_filter_list and
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
          not self.develop and timestamp and periodicity):
      try:
        last_runtime = os.path.getmtime(timestamp_path)
      except OSError as e:
        if e.errno != errno.ENOENT:
          raise
      else:
        with open(timestamp_path) as f:
          try:
            old_timestamp = float(f.read())
          except ValueError:
            self.logger.exception('')
            old_timestamp = 0
        if float(timestamp) <= old_timestamp:
1116 1117
            # Check periodicity, i.e if periodicity is one day, partition
            # should be processed at least every day.
1118
            if time.time() <= last_runtime + periodicity or periodicity < 0:
1119 1120 1121
              # check promises anomaly
              if computer_partition_state == COMPUTER_PARTITION_STARTED_STATE:
                self.logger.debug('Partition already up-to-date.')
1122
                self._checkPromiseAnomaly(local_partition, computer_partition)
1123 1124
              else:
                self.logger.debug('Partition already up-to-date. skipping.')
1125 1126 1127 1128 1129

              # Run manager tear down
              for manager in self._manager_list:
                manager.instanceTearDown(local_partition)

1130
              return
1131
        os.remove(timestamp_path)
1132

1133 1134 1135
    # Include Partition Logging
    log_folder_path = "%s/.slapgrid/log" % instance_path
    mkdir_p(log_folder_path)
1136

1137
    stat_info = os.stat(instance_path)
1138
    chownDirectory("%s/.slapgrid" % instance_path,
1139 1140 1141 1142 1143
                   uid=stat_info.st_uid,
                   gid=stat_info.st_gid)

    formatter = logging.Formatter(
       '[%(asctime)s] %(levelname)-8s %(name)s %(message)s')
1144 1145 1146 1147 1148

    # this partition_file_handler will be cleaned up after this try: block
    partition_file_handler = logging.FileHandler(
                filename="%s/instance.log" % (log_folder_path)
            )
1149 1150 1151 1152 1153 1154 1155
    partition_file_handler.setFormatter(formatter)
    self.logger.addHandler(partition_file_handler)
    try:
      self.logger.info('Processing Computer Partition %s.' % computer_partition_id)
      self.logger.info('  Software URL: %s' % software_url)
      self.logger.info('  Software path: %s' % software_path)
      self.logger.info('  Instance path: %s' % instance_path)
1156

1157 1158 1159
      # XXX this line breaks 37 tests
      # self.logger.info('  Instance type: %s' % computer_partition.getType())
      self.logger.info('  Instance status: %s' % computer_partition_state)
1160

1161 1162 1163
      if os.path.exists(error_output_file):
        os.unlink(error_output_file)

1164 1165 1166
      partition_ip_list = full_hosting_ip_list = []
      if self.firewall_conf:
        partition_ip_list = parameter_dict['ip_list'] + parameter_dict.get(
1167
                                                            'full_ip_list', [])
1168

1169
      if computer_partition_state == COMPUTER_PARTITION_STARTED_STATE:
1170
        local_partition.install()
1171
        local_partition.start()
1172 1173
        if self.firewall_conf:
          self._setupComputerPartitionFirewall(computer_partition,
1174
                                              partition_ip_list)
1175
        self._checkPromiseList(local_partition)
1176
        computer_partition.started()
1177
        self._endInstallationTransaction(computer_partition)
1178 1179 1180 1181 1182
      elif computer_partition_state == COMPUTER_PARTITION_STOPPED_STATE:
        try:
          # We want to process the partition, even if stopped, because it should
          # propagate the state to children if any.
          local_partition.install()
1183 1184
          if self.firewall_conf:
            self._setupComputerPartitionFirewall(computer_partition,
1185
                                                partition_ip_list)
1186 1187 1188
        finally:
          # Instance has to be stopped even if buildout/reporting is wrong.
          local_partition.stop()
1189 1190 1191 1192 1193 1194 1195
        try:
          computer_partition.stopped()
        except (SystemExit, KeyboardInterrupt):
          computer_partition.error(traceback.format_exc(), logger=self.logger)
          raise
        except Exception:
          pass
1196
        self._endInstallationTransaction(computer_partition)
1197 1198
      elif computer_partition_state == COMPUTER_PARTITION_DESTROYED_STATE:
        local_partition.stop()
1199 1200 1201 1202
        if self.firewall_conf:
          self._setupComputerPartitionFirewall(computer_partition,
                                              partition_ip_list,
                                              drop_entries=True)
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
        try:
          computer_partition.stopped()
        except (SystemExit, KeyboardInterrupt):
          computer_partition.error(traceback.format_exc(), logger=self.logger)
          raise
        except Exception:
          pass
      else:
        error_string = "Computer Partition %r has unsupported state: %s" % \
          (computer_partition_id, computer_partition_state)
        computer_partition.error(error_string, logger=self.logger)
        raise NotImplementedError(error_string)
Bryton Lacquement's avatar
Bryton Lacquement committed
1215
    except Exception as e:
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
      if not isinstance(e, PromiseError):
        with open(error_output_file, 'w') as error_file:
          # Write error message in a log file assible to computer partition user
          error_file.write(str(e))
        if computer_partition_state == COMPUTER_PARTITION_STARTED_STATE:
          try:
            self._checkPromiseList(local_partition)
          except PromiseError:
            # updating promises state, no need to raise here
            pass
1226
      raise e
1227
    finally:
1228
      self.logger.removeHandler(partition_file_handler)
1229
      partition_file_handler.close()
1230

1231 1232 1233 1234
    # Run manager tear down
    for manager in self._manager_list:
      manager.instanceTearDown(local_partition)

1235 1236
    # If partition has been successfully processed, write timestamp
    if timestamp:
1237
      with open(timestamp_path, 'w') as f:
1238
        f.write(str(timestamp))
1239

1240
  def FilterComputerPartitionList(self, computer_partition_list):
Cédric de Saint Martin's avatar
Cédric de Saint Martin committed
1241
    """
1242
    Try to filter valid partitions to be processed from free partitions.
Łukasz Nowak's avatar
Łukasz Nowak committed
1243
    """
1244 1245
    filtered_computer_partition_list = []
    for computer_partition in computer_partition_list:
Łukasz Nowak's avatar
Łukasz Nowak committed
1246
      try:
1247 1248 1249 1250 1251 1252 1253
        computer_partition_path = os.path.join(self.instance_root,
            computer_partition.getId())
        if not os.path.exists(computer_partition_path):
          raise NotFoundError('Partition directory %s does not exist.' %
              computer_partition_path)
        # Check state of partition. If it is in "destroyed" state, check if it
        # partition is actually installed in the Computer or if it is "free"
1254
        # partition, and check if it has some Software information.
1255 1256 1257
        # XXX-Cedric: Temporary AND ugly solution to check if an instance
        # is in the partition. Dangerous because not 100% sure it is empty
        computer_partition_state = computer_partition.getState()
1258 1259 1260 1261
        try:
          software_url = computer_partition.getSoftwareRelease().getURI()
        except (NotFoundError, TypeError, NameError):
          software_url = None
1262
        if computer_partition_state == COMPUTER_PARTITION_DESTROYED_STATE and \
1263
           not software_url:
1264 1265 1266 1267 1268 1269
          # Exclude files which may come from concurrent processing 
          #  ie.: slapos ndoe report and slapos node instance commands 
          # can create a .timestamp file.
          file_list = os.listdir(computer_partition_path)
          for garbage_file in [".slapgrid", ".timestamp"]:
            if garbage_file in file_list:
1270 1271 1272 1273 1274
              garbage_path = "/".join([computer_partition_path, garbage_file])
              if os.path.isfile(garbage_path):
                os.unlink(garbage_path)
              else:
                shutil.rmtree(garbage_path)
1275

1276 1277 1278
          if os.listdir(computer_partition_path) != []:
            self.logger.warning("Free partition %s contains file(s) in %s." % (
                computer_partition.getId(), computer_partition_path))
1279
          continue
1280

1281 1282 1283 1284 1285 1286 1287
        # Everything seems fine
        filtered_computer_partition_list.append(computer_partition)

      # XXX-Cedric: factor all this error handling

      # Send log before exiting
      except (SystemExit, KeyboardInterrupt):
1288
        computer_partition.error(traceback.format_exc(), logger=self.logger)
1289 1290
        raise

Marco Mariani's avatar
Marco Mariani committed
1291
      except Exception as exc:
1292 1293 1294 1295
        # if Buildout failed: send log but don't print it to output (already done)
        if not isinstance(exc, BuildoutFailedError):
          # For everything else: log it, send it, continue.
          self.logger.exception('')
1296
        try:
1297
          computer_partition.error(exc, logger=self.logger)
1298 1299 1300
        except (SystemExit, KeyboardInterrupt):
          raise
        except Exception:
1301
          self.logger.exception('Problem while reporting error, continuing:')
1302 1303 1304 1305 1306 1307 1308

    return filtered_computer_partition_list

  def processComputerPartitionList(self):
    """
    Will start supervisord and process each Computer Partition.
    """
1309
    self.logger.info('Processing computer partitions...')
1310 1311 1312
    # Prepares environment
    self.checkEnvironmentAndCreateStructure()
    self._launchSupervisord()
1313 1314

    # Boolean to know if every instance has correctly been deployed
1315
    clean_run = True
1316 1317
    # Boolean to know if every promises correctly passed
    clean_run_promise = True
1318

1319 1320 1321
    check_required_only_partitions([cp.getId() for cp in self.getComputerPartitionList()],
                                   self.computer_partition_filter_list)

1322 1323 1324 1325
    # Filter all dummy / empty partitions
    computer_partition_list = self.FilterComputerPartitionList(
        self.getComputerPartitionList())

1326 1327 1328
    process_error_partition_list = []
    promise_error_partition_list = []

1329 1330 1331 1332 1333
    for computer_partition in computer_partition_list:
      # Nothing should raise outside of the current loop iteration, so that
      # even if something is terribly wrong while processing an instance, it
      # won't prevent processing other ones.
      try:
1334
        # Process the partition itself
1335
        self.processComputerPartition(computer_partition)
1336

1337
      # Send log before exiting
Łukasz Nowak's avatar
Łukasz Nowak committed
1338
      except (SystemExit, KeyboardInterrupt):
1339
        computer_partition.error(traceback.format_exc(), logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
1340
        raise
1341

1342
      except PromiseError as exc:
1343 1344
        clean_run_promise = False
        try:
1345
          self.logger.error(exc)
1346
          computer_partition.error(exc, logger=self.logger)
1347
          promise_error_partition_list.append((computer_partition, exc))
1348 1349 1350
        except (SystemExit, KeyboardInterrupt):
          raise
        except Exception:
1351
          self.logger.exception('Problem while reporting error, continuing:')
1352

Marco Mariani's avatar
Marco Mariani committed
1353
      except Exception as exc:
Łukasz Nowak's avatar
Łukasz Nowak committed
1354
        clean_run = False
1355 1356 1357 1358
        # if Buildout failed: send log but don't print it to output (already done)
        if not isinstance(exc, BuildoutFailedError):
          # For everything else: log it, send it, continue.
          self.logger.exception('')
1359
        try:
1360
          computer_partition.error(exc, logger=self.logger)
1361
          process_error_partition_list.append((computer_partition, exc))
1362 1363 1364
        except (SystemExit, KeyboardInterrupt):
          raise
        except Exception:
1365
          self.logger.exception('Problem while reporting error, continuing:')
1366

1367 1368 1369 1370 1371 1372 1373 1374
    def getPartitionType(part):
      """returns the partition type, if known at that point.
      """
      try:
        return part.getType()
      except slapos.slap.ResourceNotReady:
        return '(not ready)'

1375
    self.logger.info('Finished computer partitions.')
1376 1377 1378 1379 1380 1381 1382 1383 1384
    self.logger.info('=' * 80)
    if process_error_partition_list:
      self.logger.info('Error while processing the following partitions:')
      for partition, exc in process_error_partition_list:
        self.logger.info('  %s[%s]: %s', partition.getId(), getPartitionType(partition), exc)
    if promise_error_partition_list:
      self.logger.info('Error with promises for the following partitions:')
      for partition, exc in promise_error_partition_list:
        self.logger.info('  %s[%s]: %s', partition.getId(), getPartitionType(partition), exc)
1385 1386 1387 1388 1389 1390 1391

    # Return success value
    if not clean_run:
      return SLAPGRID_FAIL
    if not clean_run_promise:
      return SLAPGRID_PROMISE_FAIL
    return SLAPGRID_SUCCESS
Łukasz Nowak's avatar
Łukasz Nowak committed
1392

1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440
  def processPromiseList(self):
    """
    Will check and process promises for each Computer Partition.
    """
    self.logger.info('Processing promises...')
    # Return success value
    clean_run_promise = True
    check_required_only_partitions([cp.getId() for cp in self.getComputerPartitionList()],
                                   self.computer_partition_filter_list)

    # Filter all dummy / empty partitions
    computer_partition_list = self.FilterComputerPartitionList(
        self.getComputerPartitionList())

    promise_error_partition_list = []
    for computer_partition in computer_partition_list:
      try:
        # Process the partition itself
        self.processPromise(computer_partition)
      except PromiseError as exc:
        clean_run_promise = False
        self.logger.error(exc)
        promise_error_partition_list.append((computer_partition, exc))
      except Exception as exc:
        clean_run_promise = False
        self.logger.exception('Problem while reporting error, continuing:')
        promise_error_partition_list.append((computer_partition, exc))

    def getPartitionType(part):
      """returns the partition type, if known at that point.
      """
      try:
        return part.getType()
      except slapos.slap.ResourceNotReady:
        return '(not ready)'

    if promise_error_partition_list:
      self.logger.info('Finished computer partitions.')
      for partition, exc in promise_error_partition_list:
        self.logger.info('  %s[%s]: %s', partition.getId(), getPartitionType(partition), exc)

    # Return success value
    if not clean_run_promise:
      return SLAPGRID_PROMISE_FAIL
    return SLAPGRID_SUCCESS



1441 1442 1443 1444 1445 1446
  def _checkWaitProcessList(self, partition, state_list):
    wait_file = os.path.join(partition.instance_path,
                             COMPUTER_PARTITION_WAIT_LIST_FILENAME)

    if os.path.exists(wait_file) and os.path.isfile(wait_file):
      with open(wait_file) as wait_f:
1447
        processes_list = [name.strip() for name in wait_f if name]
1448 1449 1450 1451 1452
        # return True if one of process in the list is running
        return partition.checkProcessesFromStateList(processes_list,
                                                     state_list)
    return False

1453 1454 1455
  def validateXML(self, to_be_validated, xsd_model):
    """Validates a given xml file"""
    #We retrieve the xsd model
Bryton Lacquement's avatar
Bryton Lacquement committed
1456
    xsd_model = BytesIO(xsd_model)
1457
    xmlschema_doc = etree.parse(xsd_model)
Łukasz Nowak's avatar
Łukasz Nowak committed
1458 1459
    xmlschema = etree.XMLSchema(xmlschema_doc)

1460
    try:
1461
      document = etree.fromstring(to_be_validated)
Marco Mariani's avatar
Marco Mariani committed
1462
    except (etree.XMLSyntaxError, etree.DocumentInvalid) as exc:
Marco Mariani's avatar
Marco Mariani committed
1463 1464
      self.logger.info('Failed to parse this XML report :  %s\n%s' %
                          (to_be_validated, _formatXMLError(exc)))
1465
      self.logger.error(_formatXMLError(exc))
1466 1467
      return False

Łukasz Nowak's avatar
Łukasz Nowak committed
1468 1469 1470 1471 1472
    if xmlschema.validate(document):
      return True

    return False

1473 1474 1475
  def asXML(self, computer_partition_usage_list):
    """Generates a XML report from computer partition usage list
    """
1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
    xml = ['<?xml version="1.0"?>',
           '<journal>',
           '<transaction type="Sale Packing List">',
           '<title>Resource consumptions</title>',
           '<start_date></start_date>',
           '<stop_date>%s</stop_date>' % time.strftime("%Y-%m-%d at %H:%M:%S"),
           '<reference>%s</reference>' % self.computer_id,
           '<currency></currency>',
           '<payment_mode></payment_mode>',
           '<category></category>',
           '<arrow type="Administration">',
           '<source></source>',
           '<destination></destination>',
           '</arrow>']
1490 1491 1492

    for computer_partition_usage in computer_partition_usage_list:
      try:
1493
        root = etree.fromstring(computer_partition_usage.usage)
Marco Mariani's avatar
Marco Mariani committed
1494
      except UnicodeError as exc:
1495
        self.logger.info("Failed to read %s." % computer_partition_usage.usage)
1496
        self.logger.error(UnicodeError)
Marco Mariani's avatar
Marco Mariani committed
1497 1498
        raise UnicodeError("Failed to read %s: %s" % (computer_partition_usage.usage, exc))
      except (etree.XMLSyntaxError, etree.DocumentInvalid) as exc:
Cédric de Saint Martin's avatar
YATTA  
Cédric de Saint Martin committed
1499
        self.logger.info("Failed to parse %s." % (computer_partition_usage.usage))
Marco Mariani's avatar
Marco Mariani committed
1500 1501 1502 1503
        self.logger.error(exc)
        raise _formatXMLError(exc)
      except Exception as exc:
        raise Exception("Failed to generate XML report: %s" % exc)
1504 1505

      for movement in root.findall('movement'):
1506 1507 1508 1509
        xml.append('<movement>')
        for child in movement.getchildren():
          if child.tag == "reference":
            xml.append('<%s>%s</%s>' % (child.tag, computer_partition_usage.getId(), child.tag))
1510
          else:
1511 1512
            xml.append('<%s>%s</%s>' % (child.tag, child.text, child.tag))
        xml.append('</movement>')
1513

1514
    xml.append('</transaction></journal>')
1515

1516
    return ''.join(xml)
1517

Łukasz Nowak's avatar
Łukasz Nowak committed
1518 1519 1520
  def agregateAndSendUsage(self):
    """Will agregate usage from each Computer Partition.
    """
1521 1522 1523 1524
    # Prepares environment
    self.checkEnvironmentAndCreateStructure()
    self._launchSupervisord()

Łukasz Nowak's avatar
Łukasz Nowak committed
1525 1526
    slap_computer_usage = self.slap.registerComputer(self.computer_id)
    computer_partition_usage_list = []
1527
    self.logger.info('Aggregating and sending usage reports...')
Łukasz Nowak's avatar
Łukasz Nowak committed
1528

1529 1530 1531 1532 1533 1534 1535 1536 1537
    #We retrieve XSD models
    try:
      computer_consumption_model = \
        pkg_resources.resource_string(
          'slapos.slap',
          'doc/computer_consumption.xsd')
    except IOError:
      computer_consumption_model = \
        pkg_resources.resource_string(
1538
          __name__,
1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
          '../../../../slapos/slap/doc/computer_consumption.xsd')

    try:
      partition_consumption_model = \
        pkg_resources.resource_string(
          'slapos.slap',
          'doc/partition_consumption.xsd')
    except IOError:
      partition_consumption_model = \
        pkg_resources.resource_string(
1549
          __name__,
1550 1551
          '../../../../slapos/slap/doc/partition_consumption.xsd')

Łukasz Nowak's avatar
Łukasz Nowak committed
1552
    clean_run = True
1553
    # Loop over the different computer partitions
1554 1555
    computer_partition_list = self.FilterComputerPartitionList(
       slap_computer_usage.getComputerPartitionList())
1556

1557
    for computer_partition in computer_partition_list:
1558 1559
      try:
        computer_partition_id = computer_partition.getId()
1560

1561
        # We want to execute all the script in the report folder
1562 1563 1564 1565 1566 1567 1568
        instance_path = os.path.join(self.instance_root,
            computer_partition.getId())
        report_path = os.path.join(instance_path, 'etc', 'report')
        if os.path.isdir(report_path):
          script_list_to_run = os.listdir(report_path)
        else:
          script_list_to_run = []
Marco Mariani's avatar
Marco Mariani committed
1569

1570
        # We now generate the pseudorandom name for the xml file
1571 1572 1573 1574 1575
        # and we add it in the invocation_list
        f = tempfile.NamedTemporaryFile()
        name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name))
        path_to_slapreport = os.path.join(instance_path, 'var', 'xml_report',
            name_xml)
Marco Mariani's avatar
Marco Mariani committed
1576

1577 1578 1579 1580 1581
        failed_script_list = []
        for script in script_list_to_run:
          invocation_list = []
          invocation_list.append(os.path.join(instance_path, 'etc', 'report',
            script))
1582
          # We add the xml_file name to the invocation_list
1583 1584 1585
          #f = tempfile.NamedTemporaryFile()
          #name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name))
          #path_to_slapreport = os.path.join(instance_path, 'var', name_xml)
Marco Mariani's avatar
Marco Mariani committed
1586

1587
          invocation_list.append(path_to_slapreport)
1588
          # Dropping privileges
1589 1590 1591 1592 1593 1594
          uid, gid = None, None
          stat_info = os.stat(instance_path)
          #stat sys call to get statistics informations
          uid = stat_info.st_uid
          gid = stat_info.st_gid
          process_handler = SlapPopen(invocation_list,
1595
                                      preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger),
Marco Mariani's avatar
Marco Mariani committed
1596 1597
                                      cwd=os.path.join(instance_path, 'etc', 'report'),
                                      env=None,
Marco Mariani's avatar
Marco Mariani committed
1598
                                      stdout=subprocess.PIPE,
1599 1600
                                      stderr=subprocess.STDOUT,
                                      logger=self.logger)
1601 1602 1603 1604
          if process_handler.returncode is None:
            process_handler.kill()
          if process_handler.returncode != 0:
            clean_run = False
1605
            failed_script_list.append("Script %r failed." % script)
1606
            self.logger.warning('Failed to run %r' % invocation_list)
1607
          if len(failed_script_list):
1608
            computer_partition.error('\n'.join(failed_script_list), logger=self.logger)
1609 1610
      # Whatever happens, don't stop processing other instances
      except Exception:
1611 1612
        self.logger.exception('Cannot run usage script(s) for %r:' %
                                  computer_partition.getId())
Łukasz Nowak's avatar
Łukasz Nowak committed
1613

1614
    # Now we loop through the different computer partitions to report
Łukasz Nowak's avatar
Łukasz Nowak committed
1615
    report_usage_issue_cp_list = []
1616
    for computer_partition in computer_partition_list:
1617 1618 1619 1620
      try:
        filename_delete_list = []
        computer_partition_id = computer_partition.getId()
        instance_path = os.path.join(self.instance_root, computer_partition_id)
1621
        dir_report_list = [os.path.join(instance_path, 'var', 'xml_report'),
1622
            os.path.join(self.instance_root, 'var', 'xml_report',
1623
                         computer_partition_id)]
1624

1625 1626 1627 1628 1629
        for dir_reports in dir_report_list:
          # The directory xml_report contain a number of files equal
          # to the number of software instance running inside the same partition
          if os.path.isdir(dir_reports):
            filename_list = os.listdir(dir_reports)
1630
          else:
1631 1632 1633 1634 1635 1636 1637
            filename_list = []
          # self.logger.debug('name List %s' % filename_list)

          for filename in filename_list:

            file_path = os.path.join(dir_reports, filename)
            if os.path.exists(file_path):
1638 1639
              with open(file_path, 'r') as f:
                usage = f.read()
1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655

              # We check the validity of xml content of each reports
              if not self.validateXML(usage, partition_consumption_model):
                self.logger.info('WARNING: The XML file %s generated by slapreport is '
                                 'not valid - This report is left as is at %s where you can '
                                 'inspect what went wrong ' % (filename, dir_reports))
                # Warn the SlapOS Master that a partition generates corrupted xml
                # report
              else:
                computer_partition_usage = self.slap.registerComputerPartition(
                    self.computer_id, computer_partition_id)
                computer_partition_usage.setUsage(usage)
                computer_partition_usage_list.append(computer_partition_usage)
                filename_delete_list.append(filename)
            else:
              self.logger.debug('Usage report %r not found, ignored' % file_path)
Łukasz Nowak's avatar
Łukasz Nowak committed
1656

1657 1658 1659
          # After sending the aggregated file we remove all the valid xml reports
          for filename in filename_delete_list:
            os.remove(os.path.join(dir_reports, filename))
1660 1661 1662

      # Whatever happens, don't stop processing other instances
      except Exception:
1663 1664
        self.logger.exception('Cannot run usage script(s) for %r:' %
                                computer_partition.getId())
1665 1666

    for computer_partition_usage in computer_partition_usage_list:
Marco Mariani's avatar
Marco Mariani committed
1667 1668
      self.logger.info('computer_partition_usage_list: %s - %s' %
                       (computer_partition_usage.usage, computer_partition_usage.getId()))
1669

1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684
    filename_delete_list = []
    computer_report_dir = os.path.join(self.instance_root,
                                 'var', 'xml_report', self.computer_id)

    # The directory xml_report contain a number of files equal
    # to the number of software instance running inside the same partition
    if os.path.isdir(computer_report_dir):
      filename_list = os.listdir(computer_report_dir)
    else:
      filename_list = []

    for filename in filename_list:

      file_path = os.path.join(computer_report_dir, filename)
      if os.path.exists(file_path):
1685 1686
        with open(file_path, 'r') as f:
          usage = f.read()
1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698

      if self.validateXML(usage, computer_consumption_model):
        self.logger.info('XML file generated by asXML is valid')
        slap_computer_usage.reportUsage(usage)
        filename_delete_list.append(filename)
      else:
        self.logger.info('XML file is invalid %s' % filename)

    # After sending the aggregated file we remove all the valid xml reports
    for filename in filename_delete_list:
      os.remove(os.path.join(computer_report_dir, filename))

1699
    # If there is, at least, one report
1700
    if computer_partition_usage_list != []:
Łukasz Nowak's avatar
Łukasz Nowak committed
1701
      try:
1702
        # We generate the final XML report with asXML method
1703 1704
        computer_consumption = self.asXML(computer_partition_usage_list)

1705
        self.logger.info('Final xml report: %s' % computer_consumption)
1706

1707
        # We test the XML report before sending it
1708
        if self.validateXML(computer_consumption, computer_consumption_model):
1709
          self.logger.info('XML file generated by asXML is valid')
1710 1711
          slap_computer_usage.reportUsage(computer_consumption)
        else:
1712
          self.logger.info('XML file generated by asXML is not valid !')
1713
          raise ValueError('XML file generated by asXML is not valid !')
Łukasz Nowak's avatar
Łukasz Nowak committed
1714
      except Exception:
1715
        issue = "Cannot report usage for %r: %s" % (
Marco Mariani's avatar
Marco Mariani committed
1716 1717
            computer_partition.getId(),
            traceback.format_exc())
1718
        self.logger.info(issue)
1719
        computer_partition.error(issue, logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
1720 1721
        report_usage_issue_cp_list.append(computer_partition_id)

1722
    for computer_partition in computer_partition_list:
1723
      if computer_partition.getState() == COMPUTER_PARTITION_DESTROYED_STATE:
1724
        destroyed = False
Łukasz Nowak's avatar
Łukasz Nowak committed
1725
        try:
1726
          computer_partition_id = computer_partition.getId()
1727
          try:
1728 1729
            software_url = computer_partition.getSoftwareRelease().getURI()
            software_path = os.path.join(self.software_root, md5digest(software_url))
1730 1731 1732
          except (NotFoundError, TypeError):
            software_url = None
            software_path = None
1733

1734 1735 1736 1737 1738
          local_partition = Partition(
            software_path=software_path,
            instance_path=os.path.join(self.instance_root,
                computer_partition.getId()),
            supervisord_partition_configuration_path=os.path.join(
1739
              _getSupervisordConfigurationDirectory(self.instance_root), '%s.conf' %
1740 1741 1742 1743 1744 1745 1746 1747
              computer_partition_id),
            supervisord_socket=self.supervisord_socket,
            computer_partition=computer_partition,
            computer_id=self.computer_id,
            partition_id=computer_partition_id,
            server_url=self.master_url,
            software_release_url=software_url,
            certificate_repository_path=self.certificate_repository_path,
1748
            buildout=self.buildout,
1749
            buildout_debug=self.buildout_debug,
1750
            logger=self.logger,
1751
            instance_storage_home=self.instance_storage_home,
1752
            ipv4_global_network=self.ipv4_global_network,
1753
          )
Łukasz Nowak's avatar
Łukasz Nowak committed
1754 1755 1756 1757
          local_partition.stop()
          try:
            computer_partition.stopped()
          except (SystemExit, KeyboardInterrupt):
1758
            computer_partition.error(traceback.format_exc(), logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
1759 1760 1761
            raise
          except Exception:
            pass
1762 1763 1764 1765
          # let managers update current partition
          for manager in self._manager_list:
            manager.report(local_partition)

1766
          if computer_partition.getId() in report_usage_issue_cp_list:
1767 1768
            self.logger.info('Ignoring destruction of %r, as no report usage was sent' %
                                computer_partition.getId())
1769
            continue
1770 1771 1772 1773 1774
          if self._checkWaitProcessList(local_partition,
              state_list=['RUNNING', 'STARTING']):
            self.logger.info('There are running processes into the partition,' \
              ' wait until they finish...')
            continue
1775
          destroyed = local_partition.destroy()
Łukasz Nowak's avatar
Łukasz Nowak committed
1776
        except (SystemExit, KeyboardInterrupt):
1777
          computer_partition.error(traceback.format_exc(), logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
1778 1779 1780
          raise
        except Exception:
          clean_run = False
1781
          self.logger.exception('')
Marco Mariani's avatar
Marco Mariani committed
1782
          exc = traceback.format_exc()
1783
          computer_partition.error(exc, logger=self.logger)
Łukasz Nowak's avatar
Łukasz Nowak committed
1784
        try:
1785 1786
          if destroyed:
            computer_partition.destroyed()
Marco Mariani's avatar
Marco Mariani committed
1787
        except NotFoundError:
1788 1789 1790
          self.logger.debug('Ignored slap error while trying to inform about '
                            'destroying not fully configured Computer Partition %r' %
                                computer_partition.getId())
1791
        except ServerError as server_error:
1792 1793 1794
          self.logger.debug('Ignored server error while trying to inform about '
                            'destroying Computer Partition %r. Error is:\n%r' %
                                (computer_partition.getId(), server_error.args[0]))
Łukasz Nowak's avatar
Łukasz Nowak committed
1795

1796
    self.logger.info('Finished usage reports.')
1797 1798 1799 1800 1801

    # Return success value
    if not clean_run:
      return SLAPGRID_FAIL
    return SLAPGRID_SUCCESS