Commit af23c67d authored by Cédric Le Ninivin's avatar Cédric Le Ninivin

wip: slapgrid-cp move out of slap

parent 41cdc016
......@@ -581,7 +581,7 @@ class Partition(object):
installs the software partition with the help of buildout
"""
self.logger.info("Installing Computer Partition %s..."
% self.computer_partition.getId())
% self.computer_partition.get("compute_partition_id"))
self.check_free_space()
......@@ -731,7 +731,7 @@ class Partition(object):
if os.path.exists(self.supervisord_partition_configuration_path):
os.unlink(self.supervisord_partition_configuration_path)
else:
partition_id = self.computer_partition.getId()
partition_id = self.computer_partition.get("compute_partition_id")
group_partition_template = bytes2str(pkg_resources.resource_string(__name__,
'templates/group_partition_supervisord.conf.in'))
self.supervisor_configuration_group = group_partition_template % {
......@@ -766,22 +766,22 @@ class Partition(object):
"""Asks supervisord to start the instance. If this instance is not
installed, we install it.
"""
partition_id = self.computer_partition.getId()
partition_id = self.computer_partition.get("compute_partition_id")
try:
with self.getSupervisorRPC() as supervisor:
supervisor.startProcessGroup(partition_id, False)
except xmlrpclib.Fault as exc:
if exc.faultString.startswith('BAD_NAME:'):
self.logger.info("Nothing to start on %s..." %
self.computer_partition.getId())
self.computer_partition.get("compute_partition_id"))
else:
raise
else:
self.logger.info("Requested start of %s..." % self.computer_partition.getId())
self.logger.info("Requested start of %s..." % self.computer_partition.get("compute_partition_id"))
def stop(self):
"""Asks supervisord to stop the instance."""
partition_id = self.computer_partition.getId()
partition_id = self.computer_partition.get("compute_partition_id")
try:
with self.getSupervisorRPC() as supervisor:
supervisor.stopProcessGroup(partition_id, False)
......@@ -791,13 +791,13 @@ class Partition(object):
else:
raise
else:
self.logger.info("Requested stop of %s..." % self.computer_partition.getId())
self.logger.info("Requested stop of %s..." % self.computer_partition.get("compute_partition_id"))
def destroy(self):
"""Destroys the partition and makes it available for subsequent use."
"""
self.logger.info("Destroying Computer Partition %s..."
% self.computer_partition.getId())
% self.computer_partition.get("compute_partition_id"))
self.createRetentionLockDate()
if not self.checkRetentionIsAuthorized():
......
......@@ -416,6 +416,7 @@ class Slapgrid(object):
if computer_partition_filter_list is not None:
self.computer_partition_filter_list = \
computer_partition_filter_list.split(",")
self.computer_partition_list = None
self.maximum_periodicity = maximum_periodicity
self.software_min_free_space = software_min_free_space
self.instance_min_free_space = instance_min_free_space
......@@ -550,11 +551,50 @@ stderr_logfile_backups=1
launchSupervisord(instance_root=self.instance_root, logger=self.logger)
def getComputerPartitionList(self):
try:
return self.computer.getComputerPartitionList()
except socket.error as exc:
self.logger.fatal(exc)
raise
if self.computer_partition_list is None:
if not self.api_backward_compatibility:
self.computer_partition_list = self.slap.jio_api_connector.allDocs({
"portal_type": "Software Instance",
"compute_node_id": self.computer_id,
})
else:
try:
slap_partition_list = self.computer.getComputerPartitionList()
except socket.error as exc:
self.logger.fatal(exc)
raise
self.computer_partition_list = []
for partition in slap_partition_list:
try:
software_release_uri = partition.getSoftwareRelease().getURI()
except (NotFoundError, TypeError, NameError):
software_release_uri = None
self.computer_partition_list.append({
"reference": partition._instance_guid,
"portal_type": "Software Instance",
"compute_partition_id": partition.getId(),
"state": partition.getState(),
"software_type": self.getInstanceParameterDict().get(
'slap_software_type', None),
"parameters": partition.getInstanceParameterDict(),
"instance_processing_timestamp": partition.getInstanceParameterDict().get(
"timestamp"),
"slap_partition": partition,
"access_status_message": partition.getAccessStatus(),
"software_release_uri": software_release_uri,
})
return self.computer_partition_list
def sendPartitionError(self, partition, error_message, logger=None):
if not self.api_backward_compatibility:
self.slap.jio_api_connector.put({
"portal_type": "Software Instance",
"reported_state": "error",
"status_message": str(error_message),
"reference": partition.get("reference")
})
else:
partition["slap_partition"].error(error_message, logger=logger)
def processSoftwareReleaseList(self):
"""Will process each Software Release.
......@@ -764,7 +804,7 @@ stderr_logfile_backups=1
return promise_checker.run()
def _endInstallationTransaction(self, computer_partition):
partition_id = computer_partition.getId()
partition_id = computer_partition.get("compute_partition_id")
transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % partition_id
transaction_file_path = os.path.join(self.instance_root,
partition_id,
......@@ -773,6 +813,7 @@ stderr_logfile_backups=1
if os.path.exists(transaction_file_path):
with open(transaction_file_path, 'r') as tf:
try:
# XXX CLN
computer_partition.setComputerPartitionRelatedInstanceList(
[reference for reference in tf.read().split('\n') if reference]
)
......@@ -986,6 +1027,7 @@ stderr_logfile_backups=1
elif valid_ipv6(ip):
ipv6_list.append(ip)
# XXX CLN
hosting_ip_list = computer_partition.getFullHostingIpAddressList()
for iface, ip in hosting_ip_list:
if valid_ipv4(ip):
......@@ -1015,11 +1057,11 @@ stderr_logfile_backups=1
for ip in ipv4_list:
cmd_list = getFirewallRules(ip, hosting_ipv4_list,
source_ipv4_list, ip_type='ipv4')
self._checkAddFirewallRules(computer_partition.getId(),
self._checkAddFirewallRules(computer_partition.get("compute_partition_id"),
cmd_list, add=add_rules)
def _checkPromiseAnomaly(self, local_partition, computer_partition):
partition_access_status = computer_partition.getAccessStatus()
partition_access_status = computer_partition.get("access_status_message", "")
status_error = False
if partition_access_status and partition_access_status.startswith("#error"):
status_error = True
......@@ -1031,17 +1073,24 @@ stderr_logfile_backups=1
self.logger.error(e)
if partition_access_status is None or not status_error:
local_partition._updateCertificate()
computer_partition.error(e, logger=self.logger)
self.sendPartitionError(computer_partition, e, logger=self.logger)
else:
if partition_access_status is None or status_error:
local_partition._updateCertificate()
computer_partition.started()
if not self.api_backward_compatibility:
self.slap.jio_api_connector({
"portal_type": "Software Instance",
"reference": computer_partition.get("reference"),
"reported_state": "started"
})
else:
computer_partition["slap_partition"].started()
def processPromise(self, computer_partition):
"""
Process the promises from a given Computer Partition, depending on its state
"""
computer_partition_id = computer_partition.getId()
computer_partition_id = computer_partition.get("compute_partition_id")
# Sanity checks before processing
# Those values should not be None or empty string or any falsy value
......@@ -1056,12 +1105,7 @@ stderr_logfile_backups=1
instance_path = os.path.join(self.instance_root, computer_partition_id)
os.environ['SLAPGRID_INSTANCE_ROOT'] = self.instance_root
try:
software_url = computer_partition.getSoftwareRelease().getURI()
except NotFoundError:
# Problem with instance: SR URI not set.
# Try to process it anyway, it may need to be deleted.
software_url = None
software_url = computer_partition.get("software_release_uri")
try:
software_path = os.path.join(self.software_root, md5digest(software_url))
......@@ -1070,7 +1114,7 @@ stderr_logfile_backups=1
# Try to process it anyway, it may need to be deleted.
software_path = None
computer_partition_state = computer_partition.getState()
computer_partition_state = computer_partition.get("state")
local_partition = Partition(
software_path=software_path,
......@@ -1107,7 +1151,7 @@ stderr_logfile_backups=1
"""
Process a Computer Partition, depending on its state
"""
computer_partition_id = computer_partition.getId()
computer_partition_id = computer_partition.get("compute_partition_id")
# Sanity checks before processing
# Those values should not be None or empty string or any falsy value
......@@ -1137,20 +1181,15 @@ stderr_logfile_backups=1
instance_path,
COMPUTER_PARTITION_TIMESTAMP_FILENAME
)
parameter_dict = computer_partition.getInstanceParameterDict()
timestamp = parameter_dict.get('timestamp')
parameter_dict = computer_partition.get("parameters", {})
timestamp = computer_partition.get("processing_timestamp")
error_output_file = os.path.join(
instance_path,
COMPUTER_PARTITION_INSTALL_ERROR_FILENAME % computer_partition_id
)
try:
software_url = computer_partition.getSoftwareRelease().getURI()
except NotFoundError:
# Problem with instance: SR URI not set.
# Try to process it anyway, it may need to be deleted.
software_url = None
software_url = computer_partition.get("software_release_uri")
try:
software_path = os.path.join(self.software_root, md5digest(software_url))
except TypeError:
......@@ -1158,7 +1197,7 @@ stderr_logfile_backups=1
# Try to process it anyway, it may need to be deleted.
software_path = None
computer_partition_state = computer_partition.getState()
computer_partition_state = computer_partition.get("state")
periodicity = self.maximum_periodicity
if software_path:
periodicity_path = os.path.join(software_path, 'periodicity')
......@@ -1266,7 +1305,7 @@ stderr_logfile_backups=1
local_partition._updateCertificate()
# XXX this line breaks 37 tests
# self.logger.info(' Instance type: %s' % computer_partition.getType())
# self.logger.info(' Instance type: %s' % computer_partition.get("software_type"))
self.logger.info(' Instance status: %s' % computer_partition_state)
if os.path.exists(error_output_file):
......@@ -1288,7 +1327,14 @@ stderr_logfile_backups=1
partition_ip_list)
if not self.force_stop:
self._checkPromiseList(local_partition)
computer_partition.started()
if not self.api_backward_compatibility:
self.slap.jio_api_connector({
"portal_type": "Software Instance",
"reference": computer_partition.get("reference"),
"reported_state": "started"
})
else:
computer_partition["slap_partition"].started()
self._endInstallationTransaction(computer_partition)
elif computer_partition_state == COMPUTER_PARTITION_STOPPED_STATE:
try:
......@@ -1302,9 +1348,16 @@ stderr_logfile_backups=1
# Instance has to be stopped even if buildout/reporting is wrong.
local_partition.stop()
try:
computer_partition.stopped()
if not self.api_backward_compatibility:
self.slap.jio_api_connector({
"portal_type": "Software Instance",
"reference": computer_partition.get("reference"),
"reported_state": "stopped"
})
else:
computer_partition["slap_partition"].stopped()
except (SystemExit, KeyboardInterrupt):
computer_partition.error(traceback.format_exc(), logger=self.logger)
self.sendPartitionError(computer_partition, traceback.format_exc(), logger=self.logger)
raise
except Exception:
pass
......@@ -1316,16 +1369,23 @@ stderr_logfile_backups=1
partition_ip_list,
drop_entries=True)
try:
computer_partition.stopped()
if not self.api_backward_compatibility:
self.slap.jio_api_connector({
"portal_type": "Software Instance",
"reference": computer_partition.get("reference"),
"reported_state": "stopped"
})
else:
computer_partition["slap_partition"].stopped()
except (SystemExit, KeyboardInterrupt):
computer_partition.error(traceback.format_exc(), logger=self.logger)
self.sendPartitionError(computer_partition, traceback.format_exc(), logger=self.logger)
raise
except Exception:
pass
else:
error_string = "Computer Partition %r has unsupported state: %s" % \
(computer_partition_id, computer_partition_state)
computer_partition.error(error_string, logger=self.logger)
self.sendPartitionError(computer_partition, error_string, logger=self.logger)
raise NotImplementedError(error_string)
except Exception as e:
if not isinstance(e, PromiseError):
......@@ -1361,7 +1421,7 @@ stderr_logfile_backups=1
for computer_partition in computer_partition_list:
try:
computer_partition_path = os.path.join(self.instance_root,
computer_partition.getId())
computer_partition.get("compute_partition_id"))
if not os.path.exists(computer_partition_path):
raise NotFoundError('Partition directory %s does not exist.' %
computer_partition_path)
......@@ -1370,11 +1430,8 @@ stderr_logfile_backups=1
# partition, and check if it has some Software information.
# XXX-Cedric: Temporary AND ugly solution to check if an instance
# is in the partition. Dangerous because not 100% sure it is empty
computer_partition_state = computer_partition.getState()
try:
software_url = computer_partition.getSoftwareRelease().getURI()
except (NotFoundError, TypeError, NameError):
software_url = None
computer_partition_state = computer_partition.get("state")
software_url = computer_partition.get("software_release_uri")
if computer_partition_state == COMPUTER_PARTITION_DESTROYED_STATE and \
not software_url:
# Exclude files which may come from concurrent processing
......@@ -1392,7 +1449,7 @@ stderr_logfile_backups=1
# Ignore .slapos-resource file dumped by slapformat.
if os.listdir(computer_partition_path) not in empty_partition_listdir:
self.logger.warning("Free partition %s contains file(s) in %s." % (
computer_partition.getId(), computer_partition_path))
computer_partition.get("compute_partition_id"), computer_partition_path))
continue
# Everything seems fine
......@@ -1402,7 +1459,7 @@ stderr_logfile_backups=1
# Send log before exiting
except (SystemExit, KeyboardInterrupt):
computer_partition.error(traceback.format_exc(), logger=self.logger)
self.sendPartitionError(computer_partition, traceback.format_exc(), logger=self.logger)
raise
except Exception as exc:
......@@ -1411,7 +1468,7 @@ stderr_logfile_backups=1
# For everything else: log it, send it, continue.
self.logger.exception('')
try:
computer_partition.error(exc, logger=self.logger)
self.sendPartitionError(computer_partition, exc, logger=self.logger)
except (SystemExit, KeyboardInterrupt):
raise
except Exception:
......@@ -1433,12 +1490,14 @@ stderr_logfile_backups=1
# Boolean to know if every promises correctly passed
clean_run_promise = True
check_required_only_partitions([cp.getId() for cp in self.getComputerPartitionList()],
computer_partition_list = self.getComputerPartitionList()
check_required_only_partitions([cp.get("computer_partition_id", "") for cp in computer_partition_list],
self.computer_partition_filter_list)
# Filter all dummy / empty partitions
computer_partition_list = self.FilterComputerPartitionList(
self.getComputerPartitionList())
computer_partition_list)
process_error_partition_list = []
promise_error_partition_list = []
......@@ -1447,20 +1506,25 @@ stderr_logfile_backups=1
# Nothing should raise outside of the current loop iteration, so that
# even if something is terribly wrong while processing an instance, it
# won't prevent processing other ones.
if not self.api_backward_compatibility:
computer_partition = self.slap.jio_connector.get({
"portal_type": "Software Instance",
"reference": computer_partition["reference"]
})
try:
# Process the partition itself
self.processComputerPartition(computer_partition)
# Send log before exiting
except (SystemExit, KeyboardInterrupt):
computer_partition.error(traceback.format_exc(), logger=self.logger)
self.sendPartitionError(computer_partition, traceback.format_exc(), logger=self.logger)
raise
except PromiseError as exc:
clean_run_promise = False
try:
self.logger.error(exc)
computer_partition.error(exc, logger=self.logger)
self.sendPartitionError(computer_partition, exc, logger=self.logger)
promise_error_partition_list.append((computer_partition, exc))
except (SystemExit, KeyboardInterrupt):
raise
......@@ -1474,7 +1538,7 @@ stderr_logfile_backups=1
# For everything else: log it, send it, continue.
self.logger.exception('')
try:
computer_partition.error(exc, logger=self.logger)
self.sendPartitionError(computer_partition, exc, logger=self.logger)
process_error_partition_list.append((computer_partition, exc))
except (SystemExit, KeyboardInterrupt):
raise
......@@ -1484,9 +1548,8 @@ stderr_logfile_backups=1
def getPartitionType(part):
"""returns the partition type, if known at that point.
"""
try:
return part.getType()
except slapos.slap.ResourceNotReady:
software_type = partition.get("software_type", None)
if software_type is None:
return '(not ready)'
self.logger.info('Finished computer partitions.')
......@@ -1494,11 +1557,11 @@ stderr_logfile_backups=1
if process_error_partition_list:
self.logger.info('Error while processing the following partitions:')
for partition, exc in process_error_partition_list:
self.logger.info(' %s[%s]: %s', partition.getId(), getPartitionType(partition), exc)
self.logger.info(' %s[%s]: %s', partition.get("compute_partition_id"), getPartitionType(partition), exc)
if promise_error_partition_list:
self.logger.info('Error with promises for the following partitions:')
for partition, exc in promise_error_partition_list:
self.logger.info(' %s[%s]: %s', partition.getId(), getPartitionType(partition), exc)
self.logger.info(' %s[%s]: %s', partition.get("compute_partition_id"), getPartitionType(partition), exc)
# Return success value
if not clean_run:
......@@ -1514,15 +1577,23 @@ stderr_logfile_backups=1
self.logger.info('Processing promises...')
# Return success value
clean_run_promise = True
check_required_only_partitions([cp.getId() for cp in self.getComputerPartitionList()],
computer_partition_list = self.getComputerPartitionList()
check_required_only_partitions([cp.get("computer_partition_id", "") for cp in computer_partition_list],
self.computer_partition_filter_list)
# Filter all dummy / empty partitions
computer_partition_list = self.FilterComputerPartitionList(
self.getComputerPartitionList())
computer_partition_list)
promise_error_partition_list = []
for computer_partition in computer_partition_list:
if not self.api_backward_compatibility:
computer_partition = self.slap.jio_connector.get({
"portal_type": "Software Instance",
"reference": computer_partition["reference"]
})
try:
# Process the partition itself
self.processPromise(computer_partition)
......@@ -1538,15 +1609,14 @@ stderr_logfile_backups=1
def getPartitionType(part):
"""returns the partition type, if known at that point.
"""
try:
return part.getType()
except slapos.slap.ResourceNotReady:
software_type = partition.get("software_type", None)
if software_type is None:
return '(not ready)'
if promise_error_partition_list:
self.logger.info('Finished computer partitions.')
for partition, exc in promise_error_partition_list:
self.logger.info(' %s[%s]: %s', partition.getId(), getPartitionType(partition), exc)
self.logger.info(' %s[%s]: %s', partition.get("compute_partition_id"), getPartitionType(partition), exc)
# Return success value
if not clean_run_promise:
......@@ -1673,11 +1743,11 @@ stderr_logfile_backups=1
for computer_partition in computer_partition_list:
try:
computer_partition_id = computer_partition.getId()
computer_partition_id = computer_partition.get("compute_partition_id")
# We want to execute all the script in the report folder
instance_path = os.path.join(self.instance_root,
computer_partition.getId())
computer_partition.get("compute_partition_id"))
report_path = os.path.join(instance_path, 'etc', 'report')
if os.path.isdir(report_path):
script_list_to_run = os.listdir(report_path)
......@@ -1722,18 +1792,18 @@ stderr_logfile_backups=1
failed_script_list.append("Script %r failed." % script)
self.logger.warning('Failed to run %r' % invocation_list)
if len(failed_script_list):
computer_partition.error('\n'.join(failed_script_list), logger=self.logger)
self.sendPartitionError(computer_partition, '\n'.join(failed_script_list), logger=self.logger)
# Whatever happens, don't stop processing other instances
except Exception:
self.logger.exception('Cannot run usage script(s) for %r:' %
computer_partition.getId())
computer_partition.get("compute_partition_id"))
# Now we loop through the different computer partitions to report
report_usage_issue_cp_list = []
for computer_partition in computer_partition_list:
try:
filename_delete_list = []
computer_partition_id = computer_partition.getId()
computer_partition_id = computer_partition.get("compute_partition_id")
instance_path = os.path.join(self.instance_root, computer_partition_id)
dir_report_list = [os.path.join(instance_path, 'var', 'xml_report'),
os.path.join(self.instance_root, 'var', 'xml_report',
......@@ -1778,7 +1848,7 @@ stderr_logfile_backups=1
# Whatever happens, don't stop processing other instances
except Exception:
self.logger.exception('Cannot run usage script(s) for %r:' %
computer_partition.getId())
computer_partition.get("compute_partition_id"))
for computer_partition_usage in computer_partition_usage_list:
self.logger.info('computer_partition_usage_list: %s - %s' %
......@@ -1830,28 +1900,29 @@ stderr_logfile_backups=1
raise ValueError('XML file generated by asXML is not valid !')
except Exception:
issue = "Cannot report usage for %r: %s" % (
computer_partition.getId(),
computer_partition.get("compute_partition_id"),
traceback.format_exc())
self.logger.info(issue)
computer_partition.error(issue, logger=self.logger)
self.sendPartitionError(computer_partition, issue, logger=self.logger)
report_usage_issue_cp_list.append(computer_partition_id)
for computer_partition in computer_partition_list:
if computer_partition.getState() == COMPUTER_PARTITION_DESTROYED_STATE:
if computer_partition.get("state") == COMPUTER_PARTITION_DESTROYED_STATE:
destroyed = False
try:
computer_partition_id = computer_partition.getId()
computer_partition_id = computer_partition.get("compute_partition_id")
software_url = computer_partition.get("software_release_uri")
try:
software_url = computer_partition.getSoftwareRelease().getURI()
software_path = os.path.join(self.software_root, md5digest(software_url))
except (NotFoundError, TypeError):
software_url = None
except TypeError:
# Problem with instance: SR URI not set.
# Try to process it anyway, it may need to be deleted.
software_path = None
local_partition = Partition(
software_path=software_path,
instance_path=os.path.join(self.instance_root,
computer_partition.getId()),
computer_partition.get("compute_partition_id")),
supervisord_partition_configuration_path=os.path.join(
_getSupervisordConfigurationDirectory(self.instance_root), '%s.conf' %
computer_partition_id),
......@@ -1871,9 +1942,16 @@ stderr_logfile_backups=1
local_partition.stop()
local_partition._updateCertificate()
try:
computer_partition.stopped()
if not self.api_backward_compatibility:
self.slap.jio_api_connector({
"portal_type": "Software Instance",
"reference": computer_partition.get("reference"),
"reported_state": "started"
})
else:
computer_partition["slap_partition"].stopped()
except (SystemExit, KeyboardInterrupt):
computer_partition.error(traceback.format_exc(), logger=self.logger)
self.sendPartitionError(computer_partition, traceback.format_exc(), logger=self.logger)
raise
except Exception:
pass
......@@ -1881,9 +1959,9 @@ stderr_logfile_backups=1
for manager in self._manager_list:
manager.report(local_partition)
if computer_partition.getId() in report_usage_issue_cp_list:
if computer_partition.get("compute_partition_id") in report_usage_issue_cp_list:
self.logger.info('Ignoring destruction of %r, as no report usage was sent' %
computer_partition.getId())
computer_partition.get("compute_partition_id"))
continue
if self._checkWaitProcessList(local_partition,
state_list=['RUNNING', 'STARTING']):
......@@ -1892,24 +1970,31 @@ stderr_logfile_backups=1
continue
destroyed = local_partition.destroy()
except (SystemExit, KeyboardInterrupt):
computer_partition.error(traceback.format_exc(), logger=self.logger)
self.sendPartitionError(computer_partition, traceback.format_exc(), logger=self.logger)
raise
except Exception:
clean_run = False
self.logger.exception('')
exc = traceback.format_exc()
computer_partition.error(exc, logger=self.logger)
self.sendPartitionError(computer_partition, exc, logger=self.logger)
try:
if destroyed:
computer_partition.destroyed()
if not self.api_backward_compatibility:
self.slap.jio_api_connector({
"portal_type": "Software Instance",
"reference": computer_partition.get("reference"),
"reported_state": "destroyed"
})
else:
computer_partition["slap_partition"].destroyed()
except NotFoundError:
self.logger.debug('Ignored slap error while trying to inform about '
'destroying not fully configured Computer Partition %r' %
computer_partition.getId())
computer_partition.get("compute_partition_id"))
except ServerError as server_error:
self.logger.debug('Ignored server error while trying to inform about '
'destroying Computer Partition %r. Error is:\n%r' %
(computer_partition.getId(), server_error.args[0]))
(computer_partition.get("compute_partition_id"), server_error.args[0]))
self.logger.info('Finished usage reports.')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment